{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 6093, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "logps_train/chosen": -64.59663391113281, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -128.38131713867188, "rewards_train/accuracies": 0.5625, "rewards_train/chosen": -0.0010213018395006657, "rewards_train/margins": -0.0004862666828557849, "rewards_train/rejected": -0.0005350351566448808, "step": 0 }, { "epoch": 0.0, "learning_rate": 3.2786885245901638e-09, "loss": 0.6935, "step": 1 }, { "epoch": 0.0, "logps_train/chosen": -67.33647155761719, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -127.46062469482422, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.0013723314041271806, "rewards_train/margins": -0.00023180840071290731, "rewards_train/rejected": -0.0011405230034142733, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.5573770491803275e-09, "loss": 0.6933, "step": 2 }, { "epoch": 0.0, "logps_train/chosen": -62.90361785888672, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -128.62429809570312, "rewards_train/accuracies": 0.5625, "rewards_train/chosen": -0.00032281281892210245, "rewards_train/margins": 0.000682014157064259, "rewards_train/rejected": -0.0010048269759863615, "step": 2 }, { "epoch": 0.0, "learning_rate": 9.836065573770492e-09, "loss": 0.6927, "step": 3 }, { "epoch": 0.0, "logps_train/chosen": -65.55517578125, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -125.59049224853516, "rewards_train/accuracies": 0.40625, "rewards_train/chosen": 9.777545346878469e-05, "rewards_train/margins": -0.0010581313690636307, "rewards_train/rejected": 0.0011559068225324154, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.3114754098360655e-08, "loss": 0.6937, "step": 4 }, { "epoch": 0.0, "logps_train/chosen": -63.94681167602539, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -126.8414306640625, "rewards_train/accuracies": 0.4375, "rewards_train/chosen": -3.552457201294601e-06, "rewards_train/margins": -0.0010162116232095286, "rewards_train/rejected": 0.001012659166008234, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.639344262295082e-08, "loss": 0.6937, "step": 5 }, { "epoch": 0.0, "logps_train/chosen": -67.93047332763672, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -132.5218505859375, "rewards_train/accuracies": 0.5625, "rewards_train/chosen": 0.0007512212032452226, "rewards_train/margins": 0.001081669412087649, "rewards_train/rejected": -0.00033044820884242654, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.9672131147540984e-08, "loss": 0.6926, "step": 6 }, { "epoch": 0.0, "logps_train/chosen": -67.55049896240234, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -129.5150604248047, "rewards_train/accuracies": 0.546875, "rewards_train/chosen": 0.0008094430668279529, "rewards_train/margins": 0.0009483695612289011, "rewards_train/rejected": -0.00013892649440094829, "step": 6 }, { "epoch": 0.0, "learning_rate": 2.2950819672131146e-08, "loss": 0.6928, "step": 7 }, { "epoch": 0.0, "logps_train/chosen": -65.09929656982422, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -127.65293884277344, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.000164031982421875, "rewards_train/margins": -0.00015324953710660338, "rewards_train/rejected": -1.0782445315271616e-05, "step": 7 }, { "epoch": 0.0, "learning_rate": 2.622950819672131e-08, "loss": 0.6932, "step": 8 }, { "epoch": 0.0, "logps_train/chosen": -65.85258483886719, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -126.20240783691406, "rewards_train/accuracies": 0.40625, "rewards_train/chosen": 0.00024010540801100433, "rewards_train/margins": -2.726903767324984e-05, "rewards_train/rejected": 0.00026737444568425417, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.9508196721311475e-08, "loss": 0.6932, "step": 9 }, { "epoch": 0.0, "logps_train/chosen": -62.38636779785156, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -124.41600799560547, "rewards_train/accuracies": 0.578125, "rewards_train/chosen": 0.0003036498965229839, "rewards_train/margins": 0.002353733725612983, "rewards_train/rejected": -0.002050083829089999, "step": 9 }, { "epoch": 0.0, "learning_rate": 3.278688524590164e-08, "loss": 0.692, "step": 10 }, { "epoch": 0.0, "logps_train/chosen": -64.4576416015625, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -123.43441009521484, "rewards_train/accuracies": 0.5625, "rewards_train/chosen": 0.0007199704414233565, "rewards_train/margins": 0.0011920749384444207, "rewards_train/rejected": -0.00047210449702106416, "step": 10 }, { "epoch": 0.01, "learning_rate": 3.606557377049181e-08, "loss": 0.6925, "step": 11 }, { "epoch": 0.01, "logps_train/chosen": -64.82386779785156, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -127.50677490234375, "rewards_train/accuracies": 0.515625, "rewards_train/chosen": 0.00018238418851979077, "rewards_train/margins": 0.0005185842455830425, "rewards_train/rejected": -0.00033620005706325173, "step": 11 }, { "epoch": 0.01, "learning_rate": 3.934426229508197e-08, "loss": 0.6929, "step": 12 }, { "epoch": 0.01, "logps_train/chosen": -62.60801696777344, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -121.01045989990234, "rewards_train/accuracies": 0.53125, "rewards_train/chosen": -0.0008410692680627108, "rewards_train/margins": 0.0004975318443030119, "rewards_train/rejected": -0.0013386011123657227, "step": 12 }, { "epoch": 0.01, "learning_rate": 4.262295081967213e-08, "loss": 0.6929, "step": 13 }, { "epoch": 0.01, "logps_train/chosen": -65.30391693115234, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -126.61194610595703, "rewards_train/accuracies": 0.46875, "rewards_train/chosen": -0.00016741750005166978, "rewards_train/margins": -7.986949640326202e-06, "rewards_train/rejected": -0.00015943055041134357, "step": 13 }, { "epoch": 0.01, "learning_rate": 4.590163934426229e-08, "loss": 0.6931, "step": 14 }, { "epoch": 0.01, "logps_train/chosen": -62.33094787597656, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -126.17008972167969, "rewards_train/accuracies": 0.5, "rewards_train/chosen": 0.00030329235596582294, "rewards_train/margins": 0.00027114158729091287, "rewards_train/rejected": 3.215076867491007e-05, "step": 14 }, { "epoch": 0.01, "learning_rate": 4.918032786885246e-08, "loss": 0.693, "step": 15 }, { "epoch": 0.01, "logps_train/chosen": -64.22964477539062, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -123.19700622558594, "rewards_train/accuracies": 0.59375, "rewards_train/chosen": 0.0006691694143228233, "rewards_train/margins": 0.0009363294229842722, "rewards_train/rejected": -0.00026716000866144896, "step": 15 }, { "epoch": 0.01, "learning_rate": 5.245901639344262e-08, "loss": 0.6927, "step": 16 }, { "epoch": 0.01, "logps_train/chosen": -64.48715209960938, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -127.39070129394531, "rewards_train/accuracies": 0.65625, "rewards_train/chosen": 0.0020656674169003963, "rewards_train/margins": 0.0020249991794116795, "rewards_train/rejected": 4.066823748871684e-05, "step": 16 }, { "epoch": 0.01, "learning_rate": 5.573770491803279e-08, "loss": 0.6922, "step": 17 }, { "epoch": 0.01, "logps_train/chosen": -64.76792907714844, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -126.71294403076172, "rewards_train/accuracies": 0.578125, "rewards_train/chosen": 0.001185062574222684, "rewards_train/margins": 0.0021178097813390195, "rewards_train/rejected": -0.0009327472071163356, "step": 17 }, { "epoch": 0.01, "learning_rate": 5.901639344262295e-08, "loss": 0.6921, "step": 18 }, { "epoch": 0.01, "logps_train/chosen": -64.34992218017578, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -129.23016357421875, "rewards_train/accuracies": 0.65625, "rewards_train/chosen": 0.0022152005694806576, "rewards_train/margins": 0.0026737391599453986, "rewards_train/rejected": -0.000458538590464741, "step": 18 }, { "epoch": 0.01, "learning_rate": 6.229508196721311e-08, "loss": 0.6919, "step": 19 }, { "epoch": 0.01, "logps_train/chosen": -64.82118225097656, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -126.23577117919922, "rewards_train/accuracies": 0.71875, "rewards_train/chosen": 0.0004496514447964728, "rewards_train/margins": 0.004300111730117351, "rewards_train/rejected": -0.003850460285320878, "step": 19 }, { "epoch": 0.01, "learning_rate": 6.557377049180328e-08, "loss": 0.6911, "step": 20 }, { "epoch": 0.01, "logps_train/chosen": -64.47505950927734, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -127.48822021484375, "rewards_train/accuracies": 0.640625, "rewards_train/chosen": 0.0027379454113543034, "rewards_train/margins": 0.002048796508461237, "rewards_train/rejected": 0.0006891489028930664, "step": 20 }, { "epoch": 0.01, "learning_rate": 6.885245901639345e-08, "loss": 0.6922, "step": 21 }, { "epoch": 0.01, "logps_train/chosen": -63.73783874511719, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -125.39576721191406, "rewards_train/accuracies": 0.59375, "rewards_train/chosen": 0.0007276833057403564, "rewards_train/margins": 0.0031946361996233463, "rewards_train/rejected": -0.00246695289388299, "step": 21 }, { "epoch": 0.01, "learning_rate": 7.213114754098361e-08, "loss": 0.6916, "step": 22 }, { "epoch": 0.01, "logps_train/chosen": -62.331642150878906, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -123.24612426757812, "rewards_train/accuracies": 0.65625, "rewards_train/chosen": 0.002236324595287442, "rewards_train/margins": 0.002678531571291387, "rewards_train/rejected": -0.0004422069760039449, "step": 22 }, { "epoch": 0.01, "learning_rate": 7.540983606557377e-08, "loss": 0.6919, "step": 23 }, { "epoch": 0.01, "logps_train/chosen": -64.98505401611328, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -131.47683715820312, "rewards_train/accuracies": 0.734375, "rewards_train/chosen": 0.002959185978397727, "rewards_train/margins": 0.003962594433687627, "rewards_train/rejected": -0.0010034084552899003, "step": 23 }, { "epoch": 0.01, "learning_rate": 7.868852459016394e-08, "loss": 0.6911, "step": 24 }, { "epoch": 0.01, "logps_train/chosen": -68.1093978881836, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -131.5288848876953, "rewards_train/accuracies": 0.84375, "rewards_train/chosen": 0.001853138324804604, "rewards_train/margins": 0.0063035666244104505, "rewards_train/rejected": -0.004450428299605846, "step": 24 }, { "epoch": 0.01, "learning_rate": 8.196721311475409e-08, "loss": 0.69, "step": 25 }, { "epoch": 0.01, "logps_train/chosen": -63.11719512939453, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -126.91737365722656, "rewards_train/accuracies": 0.71875, "rewards_train/chosen": 0.002879923675209284, "rewards_train/margins": 0.004578417749144137, "rewards_train/rejected": -0.001698494073934853, "step": 25 }, { "epoch": 0.01, "learning_rate": 8.524590163934426e-08, "loss": 0.6908, "step": 26 }, { "epoch": 0.01, "logps_train/chosen": -64.52815246582031, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -126.28927612304688, "rewards_train/accuracies": 0.84375, "rewards_train/chosen": 0.002824121853336692, "rewards_train/margins": 0.006605094764381647, "rewards_train/rejected": -0.0037809729110449553, "step": 26 }, { "epoch": 0.01, "learning_rate": 8.852459016393441e-08, "loss": 0.6899, "step": 27 }, { "epoch": 0.01, "logps_train/chosen": -63.955177307128906, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -124.40907287597656, "rewards_train/accuracies": 0.78125, "rewards_train/chosen": 0.0029685436747968197, "rewards_train/margins": 0.006278329994529486, "rewards_train/rejected": -0.003309786319732666, "step": 27 }, { "epoch": 0.01, "learning_rate": 9.180327868852458e-08, "loss": 0.6899, "step": 28 }, { "epoch": 0.01, "logps_train/chosen": -65.93914794921875, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -129.80020141601562, "rewards_train/accuracies": 0.90625, "rewards_train/chosen": 0.004571252968162298, "rewards_train/margins": 0.008665061090141535, "rewards_train/rejected": -0.004093808121979237, "step": 28 }, { "epoch": 0.01, "learning_rate": 9.508196721311475e-08, "loss": 0.6889, "step": 29 }, { "epoch": 0.01, "logps_train/chosen": -62.24956130981445, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -125.84774017333984, "rewards_train/accuracies": 0.9375, "rewards_train/chosen": 0.0038037183694541454, "rewards_train/margins": 0.00845074700191617, "rewards_train/rejected": -0.004647028632462025, "step": 29 }, { "epoch": 0.01, "learning_rate": 9.836065573770492e-08, "loss": 0.6888, "step": 30 }, { "epoch": 0.01, "logps_train/chosen": -62.0523681640625, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -122.44671630859375, "rewards_train/accuracies": 0.859375, "rewards_train/chosen": 0.0042359353974461555, "rewards_train/margins": 0.009747058153152466, "rewards_train/rejected": -0.00551112275570631, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.0163934426229507e-07, "loss": 0.6883, "step": 31 }, { "epoch": 0.02, "logps_train/chosen": -66.82209777832031, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -130.54248046875, "rewards_train/accuracies": 0.921875, "rewards_train/chosen": 0.004607212729752064, "rewards_train/margins": 0.011101079173386097, "rewards_train/rejected": -0.006493866443634033, "step": 31 }, { "epoch": 0.02, "learning_rate": 1.0491803278688524e-07, "loss": 0.6875, "step": 32 }, { "epoch": 0.02, "logps_train/chosen": -63.941871643066406, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -121.89007568359375, "rewards_train/accuracies": 0.96875, "rewards_train/chosen": 0.0056663574650883675, "rewards_train/margins": 0.011861699633300304, "rewards_train/rejected": -0.006195342168211937, "step": 32 }, { "epoch": 0.02, "learning_rate": 1.0819672131147541e-07, "loss": 0.6873, "step": 33 }, { "epoch": 0.02, "logps_train/chosen": -62.36212921142578, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -124.72809600830078, "rewards_train/accuracies": 0.921875, "rewards_train/chosen": 0.006023597903549671, "rewards_train/margins": 0.012817692942917347, "rewards_train/rejected": -0.006794095039367676, "step": 33 }, { "epoch": 0.02, "learning_rate": 1.1147540983606558e-07, "loss": 0.6867, "step": 34 }, { "epoch": 0.02, "logps_train/chosen": -63.473628997802734, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -124.68644714355469, "rewards_train/accuracies": 0.953125, "rewards_train/chosen": 0.005884135141968727, "rewards_train/margins": 0.013688803184777498, "rewards_train/rejected": -0.007804668042808771, "step": 34 }, { "epoch": 0.02, "learning_rate": 1.1475409836065573e-07, "loss": 0.6863, "step": 35 }, { "epoch": 0.02, "logps_train/chosen": -62.0738525390625, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -120.81611633300781, "rewards_train/accuracies": 0.953125, "rewards_train/chosen": 0.00872812606394291, "rewards_train/margins": 0.015828714706003666, "rewards_train/rejected": -0.007100588642060757, "step": 35 }, { "epoch": 0.02, "learning_rate": 1.180327868852459e-07, "loss": 0.6853, "step": 36 }, { "epoch": 0.02, "logps_train/chosen": -67.28268432617188, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -128.84474182128906, "rewards_train/accuracies": 0.984375, "rewards_train/chosen": 0.008597028441727161, "rewards_train/margins": 0.017435860820114613, "rewards_train/rejected": -0.008838832378387451, "step": 36 }, { "epoch": 0.02, "learning_rate": 1.180327868852459e-07, "loss": 0.6845, "step": 37 }, { "epoch": 0.02, "logps_train/chosen": -65.83193969726562, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -129.0691375732422, "rewards_train/accuracies": 0.96875, "rewards_train/chosen": 0.007846370339393616, "rewards_train/margins": 0.01700635254383087, "rewards_train/rejected": -0.009159982204437256, "step": 37 }, { "epoch": 0.02, "learning_rate": 1.2131147540983607e-07, "loss": 0.6848, "step": 38 }, { "epoch": 0.02, "logps_train/chosen": -60.71224594116211, "logps_train/ref_chosen": -60.78125, "logps_train/ref_rejected": -118.1875, "logps_train/rejected": -118.3202133178711, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.008511948399245739, "rewards_train/margins": 0.018268340267241, "rewards_train/rejected": -0.009756391867995262, "step": 38 }, { "epoch": 0.02, "learning_rate": 1.2459016393442622e-07, "loss": 0.684, "step": 39 }, { "epoch": 0.02, "logps_train/chosen": -63.943302154541016, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -129.84637451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.010064816102385521, "rewards_train/margins": 0.01935986801981926, "rewards_train/rejected": -0.009295051917433739, "step": 39 }, { "epoch": 0.02, "learning_rate": 1.2786885245901638e-07, "loss": 0.6835, "step": 40 }, { "epoch": 0.02, "logps_train/chosen": -65.22454833984375, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -124.29137420654297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.010992337018251419, "rewards_train/margins": 0.02050084061920643, "rewards_train/rejected": -0.00950850360095501, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.3114754098360656e-07, "loss": 0.683, "step": 41 }, { "epoch": 0.02, "logps_train/chosen": -63.03190231323242, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -124.53550720214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01067686639726162, "rewards_train/margins": 0.02096604648977518, "rewards_train/rejected": -0.010289180092513561, "step": 41 }, { "epoch": 0.02, "learning_rate": 1.344262295081967e-07, "loss": 0.6827, "step": 42 }, { "epoch": 0.02, "logps_train/chosen": -65.16941833496094, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -122.66033935546875, "rewards_train/accuracies": 0.984375, "rewards_train/chosen": 0.012257713824510574, "rewards_train/margins": 0.024385607801377773, "rewards_train/rejected": -0.012127893976867199, "step": 42 }, { "epoch": 0.02, "learning_rate": 1.377049180327869e-07, "loss": 0.6811, "step": 43 }, { "epoch": 0.02, "logps_train/chosen": -63.05071258544922, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -122.8788070678711, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.012213820591568947, "rewards_train/margins": 0.024654794484376907, "rewards_train/rejected": -0.01244097389280796, "step": 43 }, { "epoch": 0.02, "learning_rate": 1.4098360655737705e-07, "loss": 0.6809, "step": 44 }, { "epoch": 0.02, "logps_train/chosen": -61.39311981201172, "logps_train/ref_chosen": -61.53125, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -122.74891662597656, "rewards_train/accuracies": 0.984375, "rewards_train/chosen": 0.012543672695755959, "rewards_train/margins": 0.02845139056444168, "rewards_train/rejected": -0.015907717868685722, "step": 44 }, { "epoch": 0.02, "learning_rate": 1.4426229508196723e-07, "loss": 0.679, "step": 45 }, { "epoch": 0.02, "logps_train/chosen": -62.946022033691406, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -123.51640319824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.013942349702119827, "rewards_train/margins": 0.02925484348088503, "rewards_train/rejected": -0.015312493778765202, "step": 45 }, { "epoch": 0.02, "learning_rate": 1.4754098360655736e-07, "loss": 0.6786, "step": 46 }, { "epoch": 0.02, "logps_train/chosen": -64.3432388305664, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -130.71456909179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.014797544106841087, "rewards_train/margins": 0.0332277063280344, "rewards_train/rejected": -0.018430162221193314, "step": 46 }, { "epoch": 0.02, "learning_rate": 1.5081967213114754e-07, "loss": 0.6767, "step": 47 }, { "epoch": 0.02, "logps_train/chosen": -64.61681365966797, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -123.96551513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.014783364720642567, "rewards_train/margins": 0.034528386779129505, "rewards_train/rejected": -0.01974502205848694, "step": 47 }, { "epoch": 0.02, "learning_rate": 1.540983606557377e-07, "loss": 0.676, "step": 48 }, { "epoch": 0.02, "logps_train/chosen": -62.093414306640625, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -127.5722885131836, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.015487855300307274, "rewards_train/margins": 0.03536299988627434, "rewards_train/rejected": -0.019875144585967064, "step": 48 }, { "epoch": 0.02, "learning_rate": 1.5737704918032787e-07, "loss": 0.6756, "step": 49 }, { "epoch": 0.02, "logps_train/chosen": -64.47523498535156, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -124.82014465332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.017320197075605392, "rewards_train/margins": 0.037616459652781487, "rewards_train/rejected": -0.020296262577176094, "step": 49 }, { "epoch": 0.02, "learning_rate": 1.6065573770491803e-07, "loss": 0.6745, "step": 50 }, { "epoch": 0.02, "logps_train/chosen": -63.89207077026367, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -125.61920166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.017848648130893707, "rewards_train/margins": 0.04197574965655804, "rewards_train/rejected": -0.02412710152566433, "step": 50 }, { "epoch": 0.03, "learning_rate": 1.6393442622950818e-07, "loss": 0.6724, "step": 51 }, { "epoch": 0.03, "logps_train/chosen": -63.50442886352539, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -122.23120880126953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01996750570833683, "rewards_train/margins": 0.04338151030242443, "rewards_train/rejected": -0.0234140045940876, "step": 51 }, { "epoch": 0.03, "learning_rate": 1.6721311475409836e-07, "loss": 0.6717, "step": 52 }, { "epoch": 0.03, "logps_train/chosen": -67.43924713134766, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -135.125, "logps_train/rejected": -135.4314422607422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.023507047444581985, "rewards_train/margins": 0.054445089772343636, "rewards_train/rejected": -0.03093804232776165, "step": 52 }, { "epoch": 0.03, "learning_rate": 1.7049180327868852e-07, "loss": 0.6664, "step": 53 }, { "epoch": 0.03, "logps_train/chosen": -63.61198425292969, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -122.35955047607422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.020540256053209305, "rewards_train/margins": 0.05058647692203522, "rewards_train/rejected": -0.030046220868825912, "step": 53 }, { "epoch": 0.03, "learning_rate": 1.7377049180327867e-07, "loss": 0.6681, "step": 54 }, { "epoch": 0.03, "logps_train/chosen": -66.45802307128906, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -128.3617401123047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02416844293475151, "rewards_train/margins": 0.055069612339138985, "rewards_train/rejected": -0.030901169404387474, "step": 54 }, { "epoch": 0.03, "learning_rate": 1.7704918032786883e-07, "loss": 0.666, "step": 55 }, { "epoch": 0.03, "logps_train/chosen": -63.108314514160156, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -125.30570983886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.026887936517596245, "rewards_train/margins": 0.057850008830428123, "rewards_train/rejected": -0.03096207231283188, "step": 55 }, { "epoch": 0.03, "learning_rate": 1.80327868852459e-07, "loss": 0.6648, "step": 56 }, { "epoch": 0.03, "logps_train/chosen": -62.885711669921875, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -123.08444213867188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02803032472729683, "rewards_train/margins": 0.05898487754166126, "rewards_train/rejected": -0.030954552814364433, "step": 56 }, { "epoch": 0.03, "learning_rate": 1.8360655737704916e-07, "loss": 0.6641, "step": 57 }, { "epoch": 0.03, "logps_train/chosen": -64.72036743164062, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -122.98512268066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.031087923794984818, "rewards_train/margins": 0.06329134106636047, "rewards_train/rejected": -0.032203417271375656, "step": 57 }, { "epoch": 0.03, "learning_rate": 1.8688524590163935e-07, "loss": 0.6621, "step": 58 }, { "epoch": 0.03, "logps_train/chosen": -65.73363494873047, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -130.25758361816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03371688723564148, "rewards_train/margins": 0.07392808794975281, "rewards_train/rejected": -0.04021120071411133, "step": 58 }, { "epoch": 0.03, "learning_rate": 1.901639344262295e-07, "loss": 0.6569, "step": 59 }, { "epoch": 0.03, "logps_train/chosen": -61.42835235595703, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -118.75, "logps_train/rejected": -119.13124084472656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.031237037852406502, "rewards_train/margins": 0.07155847735702991, "rewards_train/rejected": -0.04032143950462341, "step": 59 }, { "epoch": 0.03, "learning_rate": 1.9344262295081968e-07, "loss": 0.658, "step": 60 }, { "epoch": 0.03, "logps_train/chosen": -64.21273040771484, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -126.64735412597656, "rewards_train/accuracies": 0.984375, "rewards_train/chosen": 0.03487902879714966, "rewards_train/margins": 0.07783712446689606, "rewards_train/rejected": -0.0429580956697464, "step": 60 }, { "epoch": 0.03, "learning_rate": 1.9672131147540984e-07, "loss": 0.6551, "step": 61 }, { "epoch": 0.03, "logps_train/chosen": -63.598060607910156, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -124.98638153076172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03545745462179184, "rewards_train/margins": 0.07882211357355118, "rewards_train/rejected": -0.04336465895175934, "step": 61 }, { "epoch": 0.03, "learning_rate": 2e-07, "loss": 0.6546, "step": 62 }, { "epoch": 0.03, "logps_train/chosen": -63.42018127441406, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -127.9691162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03620467334985733, "rewards_train/margins": 0.08751058205962181, "rewards_train/rejected": -0.05130590870976448, "step": 62 }, { "epoch": 0.03, "learning_rate": 2.0327868852459015e-07, "loss": 0.6506, "step": 63 }, { "epoch": 0.03, "logps_train/chosen": -62.471168518066406, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -124.26791381835938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.038918137550354004, "rewards_train/margins": 0.0875844843685627, "rewards_train/rejected": -0.048666346818208694, "step": 63 }, { "epoch": 0.03, "learning_rate": 2.0655737704918033e-07, "loss": 0.6503, "step": 64 }, { "epoch": 0.03, "logps_train/chosen": -63.08351135253906, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -124.74774169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.043797675520181656, "rewards_train/margins": 0.09586642310023308, "rewards_train/rejected": -0.05206874758005142, "step": 64 }, { "epoch": 0.03, "learning_rate": 2.0983606557377048e-07, "loss": 0.6464, "step": 65 }, { "epoch": 0.03, "logps_train/chosen": -62.74395751953125, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -122.3123779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04196147620677948, "rewards_train/margins": 0.09487981349229813, "rewards_train/rejected": -0.052918337285518646, "step": 65 }, { "epoch": 0.03, "learning_rate": 2.1311475409836064e-07, "loss": 0.647, "step": 66 }, { "epoch": 0.03, "logps_train/chosen": -64.20704650878906, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -123.93177032470703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0451153889298439, "rewards_train/margins": 0.10694460570812225, "rewards_train/rejected": -0.06182921677827835, "step": 66 }, { "epoch": 0.03, "learning_rate": 2.1311475409836064e-07, "loss": 0.6412, "step": 67 }, { "epoch": 0.03, "logps_train/chosen": -63.9333610534668, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -125.66487121582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04509183019399643, "rewards_train/margins": 0.10391362011432648, "rewards_train/rejected": -0.05882178992033005, "step": 67 }, { "epoch": 0.03, "learning_rate": 2.1639344262295082e-07, "loss": 0.6426, "step": 68 }, { "epoch": 0.03, "logps_train/chosen": -64.95674133300781, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -129.14761352539062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.048807986080646515, "rewards_train/margins": 0.1090768501162529, "rewards_train/rejected": -0.060268864035606384, "step": 68 }, { "epoch": 0.03, "learning_rate": 2.1967213114754097e-07, "loss": 0.6401, "step": 69 }, { "epoch": 0.03, "logps_train/chosen": -65.28659057617188, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -126.88087463378906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04634123295545578, "rewards_train/margins": 0.11269930750131607, "rewards_train/rejected": -0.06635807454586029, "step": 69 }, { "epoch": 0.03, "learning_rate": 2.2295081967213115e-07, "loss": 0.6386, "step": 70 }, { "epoch": 0.03, "logps_train/chosen": -63.22409439086914, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -126.70697021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.050393037497997284, "rewards_train/margins": 0.11244773864746094, "rewards_train/rejected": -0.062054701149463654, "step": 70 }, { "epoch": 0.03, "learning_rate": 2.2622950819672128e-07, "loss": 0.6387, "step": 71 }, { "epoch": 0.03, "logps_train/chosen": -63.40625, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -130.3692626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05368667095899582, "rewards_train/margins": 0.12381624430418015, "rewards_train/rejected": -0.07012957334518433, "step": 71 }, { "epoch": 0.04, "learning_rate": 2.2950819672131146e-07, "loss": 0.6334, "step": 72 }, { "epoch": 0.04, "logps_train/chosen": -64.24984741210938, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -126.5763168334961, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05587412044405937, "rewards_train/margins": 0.12566424533724785, "rewards_train/rejected": -0.06979012489318848, "step": 72 }, { "epoch": 0.04, "learning_rate": 2.3278688524590162e-07, "loss": 0.6325, "step": 73 }, { "epoch": 0.04, "logps_train/chosen": -63.25598907470703, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -123.13363647460938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.057018350809812546, "rewards_train/margins": 0.12980606779456139, "rewards_train/rejected": -0.07278771698474884, "step": 73 }, { "epoch": 0.04, "learning_rate": 2.360655737704918e-07, "loss": 0.6306, "step": 74 }, { "epoch": 0.04, "logps_train/chosen": -65.38823699951172, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -129.83251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0624946653842926, "rewards_train/margins": 0.14164523780345917, "rewards_train/rejected": -0.07915057241916656, "step": 74 }, { "epoch": 0.04, "learning_rate": 2.3934426229508195e-07, "loss": 0.6251, "step": 75 }, { "epoch": 0.04, "logps_train/chosen": -60.91940689086914, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -126.0242919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05818133056163788, "rewards_train/margins": 0.1442044898867607, "rewards_train/rejected": -0.08602315932512283, "step": 75 }, { "epoch": 0.04, "learning_rate": 2.4262295081967213e-07, "loss": 0.6239, "step": 76 }, { "epoch": 0.04, "logps_train/chosen": -66.33562469482422, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -126.35101318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06873220205307007, "rewards_train/margins": 0.14807144552469254, "rewards_train/rejected": -0.07933924347162247, "step": 76 }, { "epoch": 0.04, "learning_rate": 2.4590163934426226e-07, "loss": 0.6221, "step": 77 }, { "epoch": 0.04, "logps_train/chosen": -66.22549438476562, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -128.81787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06924712657928467, "rewards_train/margins": 0.15747913718223572, "rewards_train/rejected": -0.08823201060295105, "step": 77 }, { "epoch": 0.04, "learning_rate": 2.4918032786885244e-07, "loss": 0.6178, "step": 78 }, { "epoch": 0.04, "logps_train/chosen": -63.583900451660156, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -124.27510833740234, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0694907084107399, "rewards_train/margins": 0.1557418406009674, "rewards_train/rejected": -0.08625113219022751, "step": 78 }, { "epoch": 0.04, "learning_rate": 2.524590163934426e-07, "loss": 0.6186, "step": 79 }, { "epoch": 0.04, "logps_train/chosen": -63.01662826538086, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -132.02529907226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06904043257236481, "rewards_train/margins": 0.17528194189071655, "rewards_train/rejected": -0.10624150931835175, "step": 79 }, { "epoch": 0.04, "learning_rate": 2.5573770491803275e-07, "loss": 0.6096, "step": 80 }, { "epoch": 0.04, "logps_train/chosen": -65.25668334960938, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -130.92625427246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07481985539197922, "rewards_train/margins": 0.18121536821126938, "rewards_train/rejected": -0.10639551281929016, "step": 80 }, { "epoch": 0.04, "learning_rate": 2.5901639344262293e-07, "loss": 0.607, "step": 81 }, { "epoch": 0.04, "logps_train/chosen": -66.00372314453125, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -133.135009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07846082746982574, "rewards_train/margins": 0.18893572688102722, "rewards_train/rejected": -0.11047489941120148, "step": 81 }, { "epoch": 0.04, "learning_rate": 2.622950819672131e-07, "loss": 0.6034, "step": 82 }, { "epoch": 0.04, "logps_train/chosen": -61.344825744628906, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -122.72518920898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07662573456764221, "rewards_train/margins": 0.1852778196334839, "rewards_train/rejected": -0.10865208506584167, "step": 82 }, { "epoch": 0.04, "learning_rate": 2.6557377049180324e-07, "loss": 0.6053, "step": 83 }, { "epoch": 0.04, "logps_train/chosen": -61.66175079345703, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -124.23655700683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07620778679847717, "rewards_train/margins": 0.19239258766174316, "rewards_train/rejected": -0.11618480086326599, "step": 83 }, { "epoch": 0.04, "learning_rate": 2.688524590163934e-07, "loss": 0.602, "step": 84 }, { "epoch": 0.04, "logps_train/chosen": -61.96930694580078, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -124.79708099365234, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07816673815250397, "rewards_train/margins": 0.19420339167118073, "rewards_train/rejected": -0.11603665351867676, "step": 84 }, { "epoch": 0.04, "learning_rate": 2.721311475409836e-07, "loss": 0.6012, "step": 85 }, { "epoch": 0.04, "logps_train/chosen": -63.99660110473633, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -133.6781463623047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08471494168043137, "rewards_train/margins": 0.2216697558760643, "rewards_train/rejected": -0.13695481419563293, "step": 85 }, { "epoch": 0.04, "learning_rate": 2.754098360655738e-07, "loss": 0.5891, "step": 86 }, { "epoch": 0.04, "logps_train/chosen": -61.06770324707031, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -124.89954376220703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08863998204469681, "rewards_train/margins": 0.22410235553979874, "rewards_train/rejected": -0.13546237349510193, "step": 86 }, { "epoch": 0.04, "learning_rate": 2.786885245901639e-07, "loss": 0.5879, "step": 87 }, { "epoch": 0.04, "logps_train/chosen": -64.30404663085938, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -122.85972595214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08697780966758728, "rewards_train/margins": 0.21220825612545013, "rewards_train/rejected": -0.12523044645786285, "step": 87 }, { "epoch": 0.04, "learning_rate": 2.819672131147541e-07, "loss": 0.593, "step": 88 }, { "epoch": 0.04, "logps_train/chosen": -65.06315612792969, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -132.510498046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0948314517736435, "rewards_train/margins": 0.23763000965118408, "rewards_train/rejected": -0.1427985578775406, "step": 88 }, { "epoch": 0.04, "learning_rate": 2.852459016393443e-07, "loss": 0.582, "step": 89 }, { "epoch": 0.04, "logps_train/chosen": -60.27250671386719, "logps_train/ref_chosen": -61.15625, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -123.10338592529297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08747114986181259, "rewards_train/margins": 0.23277019709348679, "rewards_train/rejected": -0.1452990472316742, "step": 89 }, { "epoch": 0.04, "learning_rate": 2.8852459016393446e-07, "loss": 0.5841, "step": 90 }, { "epoch": 0.04, "logps_train/chosen": -62.97294616699219, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -127.3897933959961, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10006894171237946, "rewards_train/margins": 0.24793444573879242, "rewards_train/rejected": -0.14786550402641296, "step": 90 }, { "epoch": 0.04, "learning_rate": 2.9180327868852453e-07, "loss": 0.5777, "step": 91 }, { "epoch": 0.04, "logps_train/chosen": -62.77810287475586, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -125.62494659423828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10431845486164093, "rewards_train/margins": 0.2509438097476959, "rewards_train/rejected": -0.146625354886055, "step": 91 }, { "epoch": 0.05, "learning_rate": 2.950819672131147e-07, "loss": 0.5762, "step": 92 }, { "epoch": 0.05, "logps_train/chosen": -66.30656433105469, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -134.5, "logps_train/rejected": -136.2699737548828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10977328568696976, "rewards_train/margins": 0.2829626277089119, "rewards_train/rejected": -0.17318934202194214, "step": 92 }, { "epoch": 0.05, "learning_rate": 2.983606557377049e-07, "loss": 0.5625, "step": 93 }, { "epoch": 0.05, "logps_train/chosen": -63.957794189453125, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -125.82077026367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11261902004480362, "rewards_train/margins": 0.265936903655529, "rewards_train/rejected": -0.1533178836107254, "step": 93 }, { "epoch": 0.05, "learning_rate": 3.016393442622951e-07, "loss": 0.5699, "step": 94 }, { "epoch": 0.05, "logps_train/chosen": -65.67841339111328, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -128.1560516357422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11790034174919128, "rewards_train/margins": 0.2816988676786423, "rewards_train/rejected": -0.163798525929451, "step": 94 }, { "epoch": 0.05, "learning_rate": 3.049180327868852e-07, "loss": 0.563, "step": 95 }, { "epoch": 0.05, "logps_train/chosen": -65.01817321777344, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -130.42587280273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11634644865989685, "rewards_train/margins": 0.28881630301475525, "rewards_train/rejected": -0.1724698543548584, "step": 95 }, { "epoch": 0.05, "learning_rate": 3.081967213114754e-07, "loss": 0.5598, "step": 96 }, { "epoch": 0.05, "logps_train/chosen": -62.550418853759766, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -125.51197814941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11082708835601807, "rewards_train/margins": 0.2839001268148422, "rewards_train/rejected": -0.17307303845882416, "step": 96 }, { "epoch": 0.05, "learning_rate": 3.1147540983606557e-07, "loss": 0.5622, "step": 97 }, { "epoch": 0.05, "logps_train/chosen": -61.79655075073242, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -127.05672454833984, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12097954750061035, "rewards_train/margins": 0.29852724075317383, "rewards_train/rejected": -0.17754769325256348, "step": 97 }, { "epoch": 0.05, "learning_rate": 3.1475409836065575e-07, "loss": 0.5559, "step": 98 }, { "epoch": 0.05, "logps_train/chosen": -62.465492248535156, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -123.17567443847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12166354060173035, "rewards_train/margins": 0.3039773851633072, "rewards_train/rejected": -0.18231384456157684, "step": 98 }, { "epoch": 0.05, "learning_rate": 3.180327868852459e-07, "loss": 0.5537, "step": 99 }, { "epoch": 0.05, "logps_train/chosen": -65.00228881835938, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -130.53765869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13370639085769653, "rewards_train/margins": 0.32877951860427856, "rewards_train/rejected": -0.19507312774658203, "step": 99 }, { "epoch": 0.05, "learning_rate": 3.2131147540983606e-07, "loss": 0.5435, "step": 100 }, { "epoch": 0.05, "logps_train/chosen": -64.7651596069336, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -130.421630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13080787658691406, "rewards_train/margins": 0.3421591967344284, "rewards_train/rejected": -0.21135132014751434, "step": 100 }, { "epoch": 0.05, "learning_rate": 3.2459016393442624e-07, "loss": 0.5378, "step": 101 }, { "epoch": 0.05, "logps_train/chosen": -63.31742858886719, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -128.1177978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12460477650165558, "rewards_train/margins": 0.33355292677879333, "rewards_train/rejected": -0.20894815027713776, "step": 101 }, { "epoch": 0.05, "learning_rate": 3.2786885245901637e-07, "loss": 0.5416, "step": 102 }, { "epoch": 0.05, "logps_train/chosen": -61.7027702331543, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -123.21151733398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12352187931537628, "rewards_train/margins": 0.32563070952892303, "rewards_train/rejected": -0.20210883021354675, "step": 102 }, { "epoch": 0.05, "learning_rate": 3.3114754098360655e-07, "loss": 0.5445, "step": 103 }, { "epoch": 0.05, "logps_train/chosen": -64.14910888671875, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -128.47665405273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13196386396884918, "rewards_train/margins": 0.3509680777788162, "rewards_train/rejected": -0.21900421380996704, "step": 103 }, { "epoch": 0.05, "learning_rate": 3.3442622950819673e-07, "loss": 0.5347, "step": 104 }, { "epoch": 0.05, "logps_train/chosen": -64.14933013916016, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -131.23622131347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13575077056884766, "rewards_train/margins": 0.35932543873786926, "rewards_train/rejected": -0.2235746681690216, "step": 104 }, { "epoch": 0.05, "learning_rate": 3.377049180327869e-07, "loss": 0.5306, "step": 105 }, { "epoch": 0.05, "logps_train/chosen": -66.38592529296875, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -131.41250610351562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14016708731651306, "rewards_train/margins": 0.346945196390152, "rewards_train/rejected": -0.20677810907363892, "step": 105 }, { "epoch": 0.05, "learning_rate": 3.4098360655737704e-07, "loss": 0.5356, "step": 106 }, { "epoch": 0.05, "logps_train/chosen": -63.558738708496094, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -128.43080139160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13580140471458435, "rewards_train/margins": 0.36965295672416687, "rewards_train/rejected": -0.23385155200958252, "step": 106 }, { "epoch": 0.05, "learning_rate": 3.4426229508196717e-07, "loss": 0.5271, "step": 107 }, { "epoch": 0.05, "logps_train/chosen": -60.90179443359375, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -124.79090881347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1386779546737671, "rewards_train/margins": 0.3788035809993744, "rewards_train/rejected": -0.2401256263256073, "step": 107 }, { "epoch": 0.05, "learning_rate": 3.4754098360655735e-07, "loss": 0.523, "step": 108 }, { "epoch": 0.05, "logps_train/chosen": -64.716064453125, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -128.64712524414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14279800653457642, "rewards_train/margins": 0.3730389475822449, "rewards_train/rejected": -0.23024094104766846, "step": 108 }, { "epoch": 0.05, "learning_rate": 3.5081967213114753e-07, "loss": 0.5251, "step": 109 }, { "epoch": 0.05, "logps_train/chosen": -62.5989875793457, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -131.518798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15338236093521118, "rewards_train/margins": 0.40008702874183655, "rewards_train/rejected": -0.24670466780662537, "step": 109 }, { "epoch": 0.05, "learning_rate": 3.5409836065573766e-07, "loss": 0.515, "step": 110 }, { "epoch": 0.05, "logps_train/chosen": -62.41417694091797, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -124.83014678955078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15589697659015656, "rewards_train/margins": 0.3972124308347702, "rewards_train/rejected": -0.24131545424461365, "step": 110 }, { "epoch": 0.05, "learning_rate": 3.5737704918032784e-07, "loss": 0.5155, "step": 111 }, { "epoch": 0.05, "logps_train/chosen": -62.0150146484375, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -125.43077087402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16011960804462433, "rewards_train/margins": 0.40309955179691315, "rewards_train/rejected": -0.24297994375228882, "step": 111 }, { "epoch": 0.06, "learning_rate": 3.60655737704918e-07, "loss": 0.5137, "step": 112 }, { "epoch": 0.06, "logps_train/chosen": -64.14067840576172, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -131.7200927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17084430158138275, "rewards_train/margins": 0.4449053257703781, "rewards_train/rejected": -0.27406102418899536, "step": 112 }, { "epoch": 0.06, "learning_rate": 3.639344262295082e-07, "loss": 0.4968, "step": 113 }, { "epoch": 0.06, "logps_train/chosen": -61.6588020324707, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -127.21343231201172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16463738679885864, "rewards_train/margins": 0.44447630643844604, "rewards_train/rejected": -0.2798389196395874, "step": 113 }, { "epoch": 0.06, "learning_rate": 3.6721311475409833e-07, "loss": 0.4972, "step": 114 }, { "epoch": 0.06, "logps_train/chosen": -62.95560073852539, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -126.89263916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17333634197711945, "rewards_train/margins": 0.43232716619968414, "rewards_train/rejected": -0.2589908242225647, "step": 114 }, { "epoch": 0.06, "learning_rate": 3.704918032786885e-07, "loss": 0.5022, "step": 115 }, { "epoch": 0.06, "logps_train/chosen": -63.009521484375, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -129.8420867919922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17829583585262299, "rewards_train/margins": 0.4586946815252304, "rewards_train/rejected": -0.2803988456726074, "step": 115 }, { "epoch": 0.06, "learning_rate": 3.737704918032787e-07, "loss": 0.4917, "step": 116 }, { "epoch": 0.06, "logps_train/chosen": -61.54109191894531, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -124.17607116699219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17362532019615173, "rewards_train/margins": 0.4461154043674469, "rewards_train/rejected": -0.27249008417129517, "step": 116 }, { "epoch": 0.06, "learning_rate": 3.770491803278688e-07, "loss": 0.4965, "step": 117 }, { "epoch": 0.06, "logps_train/chosen": -60.26380157470703, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -127.4689712524414, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17171521484851837, "rewards_train/margins": 0.4840415269136429, "rewards_train/rejected": -0.3123263120651245, "step": 117 }, { "epoch": 0.06, "learning_rate": 3.80327868852459e-07, "loss": 0.4828, "step": 118 }, { "epoch": 0.06, "logps_train/chosen": -60.61859893798828, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -127.41474151611328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16743668913841248, "rewards_train/margins": 0.4711674153804779, "rewards_train/rejected": -0.30373072624206543, "step": 118 }, { "epoch": 0.06, "learning_rate": 3.836065573770492e-07, "loss": 0.4873, "step": 119 }, { "epoch": 0.06, "logps_train/chosen": -63.19395446777344, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -130.77284240722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19339747726917267, "rewards_train/margins": 0.5032994598150253, "rewards_train/rejected": -0.30990198254585266, "step": 119 }, { "epoch": 0.06, "learning_rate": 3.8688524590163936e-07, "loss": 0.475, "step": 120 }, { "epoch": 0.06, "logps_train/chosen": -59.19920349121094, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -125.5928955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17329247295856476, "rewards_train/margins": 0.5002573281526566, "rewards_train/rejected": -0.3269648551940918, "step": 120 }, { "epoch": 0.06, "learning_rate": 3.901639344262295e-07, "loss": 0.4761, "step": 121 }, { "epoch": 0.06, "logps_train/chosen": -63.47742462158203, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -131.37155151367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19625157117843628, "rewards_train/margins": 0.5327227711677551, "rewards_train/rejected": -0.33647119998931885, "step": 121 }, { "epoch": 0.06, "learning_rate": 3.9344262295081967e-07, "loss": 0.4647, "step": 122 }, { "epoch": 0.06, "logps_train/chosen": -65.52777099609375, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -134.42083740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2046443521976471, "rewards_train/margins": 0.5435058176517487, "rewards_train/rejected": -0.33886146545410156, "step": 122 }, { "epoch": 0.06, "learning_rate": 3.967213114754098e-07, "loss": 0.4619, "step": 123 }, { "epoch": 0.06, "logps_train/chosen": -62.84169006347656, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -130.788818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1908797174692154, "rewards_train/margins": 0.5202502757310867, "rewards_train/rejected": -0.32937055826187134, "step": 123 }, { "epoch": 0.06, "learning_rate": 4e-07, "loss": 0.4692, "step": 124 }, { "epoch": 0.06, "logps_train/chosen": -59.15007781982422, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -119.6875, "logps_train/rejected": -122.93257141113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.18677449226379395, "rewards_train/margins": 0.511330246925354, "rewards_train/rejected": -0.32455575466156006, "step": 124 }, { "epoch": 0.06, "learning_rate": 4.032786885245901e-07, "loss": 0.4726, "step": 125 }, { "epoch": 0.06, "logps_train/chosen": -63.71189880371094, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -133.20425415039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.20488396286964417, "rewards_train/margins": 0.5599777400493622, "rewards_train/rejected": -0.355093777179718, "step": 125 }, { "epoch": 0.06, "learning_rate": 4.065573770491803e-07, "loss": 0.4548, "step": 126 }, { "epoch": 0.06, "logps_train/chosen": -64.43939208984375, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -132.75, "logps_train/rejected": -136.6338653564453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2106015980243683, "rewards_train/margins": 0.6024063527584076, "rewards_train/rejected": -0.3918047547340393, "step": 126 }, { "epoch": 0.06, "learning_rate": 4.0983606557377047e-07, "loss": 0.4403, "step": 127 }, { "epoch": 0.06, "logps_train/chosen": -63.77727127075195, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -126.63351440429688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2027905285358429, "rewards_train/margins": 0.5555954277515411, "rewards_train/rejected": -0.35280489921569824, "step": 127 }, { "epoch": 0.06, "learning_rate": 4.1311475409836065e-07, "loss": 0.4585, "step": 128 }, { "epoch": 0.06, "logps_train/chosen": -61.004798889160156, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -129.8292694091797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21333879232406616, "rewards_train/margins": 0.6032969057559967, "rewards_train/rejected": -0.38995811343193054, "step": 128 }, { "epoch": 0.06, "learning_rate": 4.163934426229508e-07, "loss": 0.4401, "step": 129 }, { "epoch": 0.06, "logps_train/chosen": -62.82853698730469, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -130.4072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22198057174682617, "rewards_train/margins": 0.5988348126411438, "rewards_train/rejected": -0.3768542408943176, "step": 129 }, { "epoch": 0.06, "learning_rate": 4.1967213114754096e-07, "loss": 0.4409, "step": 130 }, { "epoch": 0.06, "logps_train/chosen": -59.804931640625, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -119.1875, "logps_train/rejected": -122.82754516601562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2149657905101776, "rewards_train/margins": 0.5779942870140076, "rewards_train/rejected": -0.36302849650382996, "step": 130 }, { "epoch": 0.06, "learning_rate": 4.2295081967213114e-07, "loss": 0.4473, "step": 131 }, { "epoch": 0.06, "logps_train/chosen": -60.97021484375, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -126.44784545898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21660149097442627, "rewards_train/margins": 0.6342374086380005, "rewards_train/rejected": -0.4176359176635742, "step": 131 }, { "epoch": 0.06, "learning_rate": 4.2622950819672127e-07, "loss": 0.4288, "step": 132 }, { "epoch": 0.06, "logps_train/chosen": -63.455631256103516, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -132.19427490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23768912255764008, "rewards_train/margins": 0.6406131237745285, "rewards_train/rejected": -0.4029240012168884, "step": 132 }, { "epoch": 0.07, "learning_rate": 4.2950819672131145e-07, "loss": 0.4268, "step": 133 }, { "epoch": 0.07, "logps_train/chosen": -57.43610382080078, "logps_train/ref_chosen": -59.6875, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -124.15904998779297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22679999470710754, "rewards_train/margins": 0.6332326829433441, "rewards_train/rejected": -0.4064326882362366, "step": 133 }, { "epoch": 0.07, "learning_rate": 4.3278688524590163e-07, "loss": 0.4301, "step": 134 }, { "epoch": 0.07, "logps_train/chosen": -61.57699203491211, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -132.34307861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22071871161460876, "rewards_train/margins": 0.6840307712554932, "rewards_train/rejected": -0.4633120596408844, "step": 134 }, { "epoch": 0.07, "learning_rate": 4.360655737704918e-07, "loss": 0.4127, "step": 135 }, { "epoch": 0.07, "logps_train/chosen": -61.22221374511719, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -126.94097900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.220698744058609, "rewards_train/margins": 0.6511738002300262, "rewards_train/rejected": -0.43047505617141724, "step": 135 }, { "epoch": 0.07, "learning_rate": 4.3934426229508194e-07, "loss": 0.4236, "step": 136 }, { "epoch": 0.07, "logps_train/chosen": -61.064754486083984, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -130.7154541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22262617945671082, "rewards_train/margins": 0.6362612545490265, "rewards_train/rejected": -0.4136350750923157, "step": 136 }, { "epoch": 0.07, "learning_rate": 4.426229508196721e-07, "loss": 0.4285, "step": 137 }, { "epoch": 0.07, "logps_train/chosen": -62.347984313964844, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -127.89393615722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2338048666715622, "rewards_train/margins": 0.6886280626058578, "rewards_train/rejected": -0.45482319593429565, "step": 137 }, { "epoch": 0.07, "learning_rate": 4.459016393442623e-07, "loss": 0.4121, "step": 138 }, { "epoch": 0.07, "logps_train/chosen": -63.419029235839844, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -132.68898010253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.25026026368141174, "rewards_train/margins": 0.6762377321720123, "rewards_train/rejected": -0.4259774684906006, "step": 138 }, { "epoch": 0.07, "learning_rate": 4.4918032786885243e-07, "loss": 0.4157, "step": 139 }, { "epoch": 0.07, "logps_train/chosen": -62.462554931640625, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -132.0875701904297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.25887104868888855, "rewards_train/margins": 0.7388683259487152, "rewards_train/rejected": -0.47999727725982666, "step": 139 }, { "epoch": 0.07, "learning_rate": 4.5245901639344256e-07, "loss": 0.3962, "step": 140 }, { "epoch": 0.07, "logps_train/chosen": -65.52338409423828, "logps_train/ref_chosen": -68.1875, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -137.13497924804688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2646541893482208, "rewards_train/margins": 0.7628208994865417, "rewards_train/rejected": -0.4981667101383209, "step": 140 }, { "epoch": 0.07, "learning_rate": 4.5573770491803274e-07, "loss": 0.3868, "step": 141 }, { "epoch": 0.07, "logps_train/chosen": -63.493377685546875, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -132.59169006347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.262967050075531, "rewards_train/margins": 0.7030926644802094, "rewards_train/rejected": -0.44012561440467834, "step": 141 }, { "epoch": 0.07, "learning_rate": 4.590163934426229e-07, "loss": 0.4066, "step": 142 }, { "epoch": 0.07, "logps_train/chosen": -63.748680114746094, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -135.55299377441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2552589178085327, "rewards_train/margins": 0.7747175693511963, "rewards_train/rejected": -0.5194586515426636, "step": 142 }, { "epoch": 0.07, "learning_rate": 4.622950819672131e-07, "loss": 0.3851, "step": 143 }, { "epoch": 0.07, "logps_train/chosen": -61.135311126708984, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -129.34034729003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.25922247767448425, "rewards_train/margins": 0.759468287229538, "rewards_train/rejected": -0.5002458095550537, "step": 143 }, { "epoch": 0.07, "learning_rate": 4.6557377049180323e-07, "loss": 0.3896, "step": 144 }, { "epoch": 0.07, "logps_train/chosen": -61.27588653564453, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -133.9249267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2748525142669678, "rewards_train/margins": 0.8201776742935181, "rewards_train/rejected": -0.5453251600265503, "step": 144 }, { "epoch": 0.07, "learning_rate": 4.688524590163934e-07, "loss": 0.3696, "step": 145 }, { "epoch": 0.07, "logps_train/chosen": -59.475364685058594, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -119.5, "logps_train/rejected": -124.83975219726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24540789425373077, "rewards_train/margins": 0.7772349864244461, "rewards_train/rejected": -0.5318270921707153, "step": 145 }, { "epoch": 0.07, "learning_rate": 4.721311475409836e-07, "loss": 0.3837, "step": 146 }, { "epoch": 0.07, "logps_train/chosen": -61.849029541015625, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -130.96702575683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2688570022583008, "rewards_train/margins": 0.8031574487686157, "rewards_train/rejected": -0.5343004465103149, "step": 146 }, { "epoch": 0.07, "learning_rate": 4.754098360655737e-07, "loss": 0.3758, "step": 147 }, { "epoch": 0.07, "logps_train/chosen": -58.79374694824219, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -119.9375, "logps_train/rejected": -125.0686264038086, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2713088393211365, "rewards_train/margins": 0.7850079536437988, "rewards_train/rejected": -0.5136991143226624, "step": 147 }, { "epoch": 0.07, "learning_rate": 4.786885245901639e-07, "loss": 0.3815, "step": 148 }, { "epoch": 0.07, "logps_train/chosen": -59.62568283081055, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -128.80560302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2627245783805847, "rewards_train/margins": 0.8219950795173645, "rewards_train/rejected": -0.5592705011367798, "step": 148 }, { "epoch": 0.07, "learning_rate": 4.819672131147541e-07, "loss": 0.3738, "step": 149 }, { "epoch": 0.07, "logps_train/chosen": -63.67113494873047, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -130.5947723388672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27209553122520447, "rewards_train/margins": 0.8225882947444916, "rewards_train/rejected": -0.5504927635192871, "step": 149 }, { "epoch": 0.07, "learning_rate": 4.852459016393443e-07, "loss": 0.3696, "step": 150 }, { "epoch": 0.07, "logps_train/chosen": -59.42032241821289, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -130.28512573242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2620205581188202, "rewards_train/margins": 0.8582593500614166, "rewards_train/rejected": -0.5962387919425964, "step": 150 }, { "epoch": 0.07, "learning_rate": 4.885245901639343e-07, "loss": 0.3589, "step": 151 }, { "epoch": 0.07, "logps_train/chosen": -60.88227081298828, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -132.51034545898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28350162506103516, "rewards_train/margins": 0.8816553354263306, "rewards_train/rejected": -0.5981537103652954, "step": 151 }, { "epoch": 0.07, "learning_rate": 4.918032786885245e-07, "loss": 0.3555, "step": 152 }, { "epoch": 0.07, "logps_train/chosen": -60.629703521728516, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -131.53573608398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2768245339393616, "rewards_train/margins": 0.8671175837516785, "rewards_train/rejected": -0.5902930498123169, "step": 152 }, { "epoch": 0.08, "learning_rate": 4.950819672131147e-07, "loss": 0.3568, "step": 153 }, { "epoch": 0.08, "logps_train/chosen": -59.14410400390625, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -128.529541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27821630239486694, "rewards_train/margins": 0.8757008910179138, "rewards_train/rejected": -0.5974845886230469, "step": 153 }, { "epoch": 0.08, "learning_rate": 4.983606557377049e-07, "loss": 0.3546, "step": 154 }, { "epoch": 0.08, "logps_train/chosen": -58.03562545776367, "logps_train/ref_chosen": -60.65625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -130.35699462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2611836791038513, "rewards_train/margins": 0.9049389958381653, "rewards_train/rejected": -0.643755316734314, "step": 154 }, { "epoch": 0.08, "learning_rate": 5.016393442622951e-07, "loss": 0.348, "step": 155 }, { "epoch": 0.08, "logps_train/chosen": -61.28636169433594, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -128.075927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.30378586053848267, "rewards_train/margins": 0.9010765552520752, "rewards_train/rejected": -0.5972906947135925, "step": 155 }, { "epoch": 0.08, "learning_rate": 5.049180327868852e-07, "loss": 0.3482, "step": 156 }, { "epoch": 0.08, "logps_train/chosen": -61.082786560058594, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -132.8736572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3161351978778839, "rewards_train/margins": 0.94573774933815, "rewards_train/rejected": -0.6296025514602661, "step": 156 }, { "epoch": 0.08, "learning_rate": 5.081967213114754e-07, "loss": 0.3374, "step": 157 }, { "epoch": 0.08, "logps_train/chosen": -61.470359802246094, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -133.82318115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2873879075050354, "rewards_train/margins": 0.9396767020225525, "rewards_train/rejected": -0.6522887945175171, "step": 157 }, { "epoch": 0.08, "learning_rate": 5.114754098360655e-07, "loss": 0.3374, "step": 158 }, { "epoch": 0.08, "logps_train/chosen": -61.883628845214844, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -134.77896118164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.31815582513809204, "rewards_train/margins": 0.9327709078788757, "rewards_train/rejected": -0.6146150827407837, "step": 158 }, { "epoch": 0.08, "learning_rate": 5.147540983606558e-07, "loss": 0.3416, "step": 159 }, { "epoch": 0.08, "logps_train/chosen": -63.24497604370117, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -133.75, "logps_train/rejected": -141.14422607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3055804967880249, "rewards_train/margins": 1.047151267528534, "rewards_train/rejected": -0.741570770740509, "step": 159 }, { "epoch": 0.08, "learning_rate": 5.180327868852459e-07, "loss": 0.3112, "step": 160 }, { "epoch": 0.08, "logps_train/chosen": -58.686492919921875, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -127.37984466552734, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.30630195140838623, "rewards_train/margins": 0.9190417528152466, "rewards_train/rejected": -0.6127398014068604, "step": 160 }, { "epoch": 0.08, "learning_rate": 5.21311475409836e-07, "loss": 0.3433, "step": 161 }, { "epoch": 0.08, "logps_train/chosen": -63.15380096435547, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -138.19027709960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3266611099243164, "rewards_train/margins": 1.0190284848213196, "rewards_train/rejected": -0.6923673748970032, "step": 161 }, { "epoch": 0.08, "learning_rate": 5.245901639344262e-07, "loss": 0.3149, "step": 162 }, { "epoch": 0.08, "logps_train/chosen": -60.944488525390625, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -134.0224151611328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.32080987095832825, "rewards_train/margins": 1.039163738489151, "rewards_train/rejected": -0.7183538675308228, "step": 162 }, { "epoch": 0.08, "learning_rate": 5.278688524590163e-07, "loss": 0.312, "step": 163 }, { "epoch": 0.08, "logps_train/chosen": -59.879722595214844, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -134.17918395996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3350256085395813, "rewards_train/margins": 1.0556292533874512, "rewards_train/rejected": -0.7206036448478699, "step": 163 }, { "epoch": 0.08, "learning_rate": 5.311475409836065e-07, "loss": 0.3085, "step": 164 }, { "epoch": 0.08, "logps_train/chosen": -59.4306526184082, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -132.12509155273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3082040846347809, "rewards_train/margins": 1.0654393136501312, "rewards_train/rejected": -0.7572352290153503, "step": 164 }, { "epoch": 0.08, "learning_rate": 5.344262295081967e-07, "loss": 0.3049, "step": 165 }, { "epoch": 0.08, "logps_train/chosen": -60.98137664794922, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -130.81964111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.332599401473999, "rewards_train/margins": 1.0241342186927795, "rewards_train/rejected": -0.6915348172187805, "step": 165 }, { "epoch": 0.08, "learning_rate": 5.377049180327868e-07, "loss": 0.3152, "step": 166 }, { "epoch": 0.08, "logps_train/chosen": -63.46160125732422, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -135.6849365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3669748306274414, "rewards_train/margins": 1.103779375553131, "rewards_train/rejected": -0.7368045449256897, "step": 166 }, { "epoch": 0.08, "learning_rate": 5.40983606557377e-07, "loss": 0.2942, "step": 167 }, { "epoch": 0.08, "logps_train/chosen": -58.224464416503906, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -129.75430297851562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.31241682171821594, "rewards_train/margins": 1.0480027496814728, "rewards_train/rejected": -0.7355859279632568, "step": 167 }, { "epoch": 0.08, "learning_rate": 5.442622950819672e-07, "loss": 0.3102, "step": 168 }, { "epoch": 0.08, "logps_train/chosen": -60.172752380371094, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -135.9460906982422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33306649327278137, "rewards_train/margins": 1.1461330354213715, "rewards_train/rejected": -0.8130665421485901, "step": 168 }, { "epoch": 0.08, "learning_rate": 5.475409836065573e-07, "loss": 0.2855, "step": 169 }, { "epoch": 0.08, "logps_train/chosen": -61.20618438720703, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -129.2274169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33626627922058105, "rewards_train/margins": 1.0332263708114624, "rewards_train/rejected": -0.6969600915908813, "step": 169 }, { "epoch": 0.08, "learning_rate": 5.508196721311476e-07, "loss": 0.32, "step": 170 }, { "epoch": 0.08, "logps_train/chosen": -61.349891662597656, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -138.8094482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3543909192085266, "rewards_train/margins": 1.1836256980895996, "rewards_train/rejected": -0.829234778881073, "step": 170 }, { "epoch": 0.08, "learning_rate": 5.540983606557376e-07, "loss": 0.2771, "step": 171 }, { "epoch": 0.08, "logps_train/chosen": -60.17649841308594, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -135.50439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34255552291870117, "rewards_train/margins": 1.1536402702331543, "rewards_train/rejected": -0.8110847473144531, "step": 171 }, { "epoch": 0.08, "learning_rate": 5.573770491803278e-07, "loss": 0.2817, "step": 172 }, { "epoch": 0.08, "logps_train/chosen": -60.04340362548828, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -130.3034210205078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3521538972854614, "rewards_train/margins": 1.1521737575531006, "rewards_train/rejected": -0.8000198602676392, "step": 172 }, { "epoch": 0.09, "learning_rate": 5.60655737704918e-07, "loss": 0.2829, "step": 173 }, { "epoch": 0.09, "logps_train/chosen": -59.901390075683594, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -136.36471557617188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37687787413597107, "rewards_train/margins": 1.1795617043972015, "rewards_train/rejected": -0.8026838302612305, "step": 173 }, { "epoch": 0.09, "learning_rate": 5.639344262295082e-07, "loss": 0.2804, "step": 174 }, { "epoch": 0.09, "logps_train/chosen": -60.22603988647461, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -138.61044311523438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3841589689254761, "rewards_train/margins": 1.2528215646743774, "rewards_train/rejected": -0.8686625957489014, "step": 174 }, { "epoch": 0.09, "learning_rate": 5.672131147540984e-07, "loss": 0.2608, "step": 175 }, { "epoch": 0.09, "logps_train/chosen": -63.26665496826172, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -137.66778564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39921337366104126, "rewards_train/margins": 1.2417237758636475, "rewards_train/rejected": -0.8425104022026062, "step": 175 }, { "epoch": 0.09, "learning_rate": 5.704918032786886e-07, "loss": 0.2665, "step": 176 }, { "epoch": 0.09, "logps_train/chosen": -57.234840393066406, "logps_train/ref_chosen": -60.6875, "logps_train/ref_rejected": -119.9375, "logps_train/rejected": -127.85697937011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34492406249046326, "rewards_train/margins": 1.135798066854477, "rewards_train/rejected": -0.7908740043640137, "step": 176 }, { "epoch": 0.09, "learning_rate": 5.737704918032786e-07, "loss": 0.2906, "step": 177 }, { "epoch": 0.09, "logps_train/chosen": -59.84308624267578, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -133.80154418945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3817558288574219, "rewards_train/margins": 1.2973094582557678, "rewards_train/rejected": -0.915553629398346, "step": 177 }, { "epoch": 0.09, "learning_rate": 5.770491803278689e-07, "loss": 0.2515, "step": 178 }, { "epoch": 0.09, "logps_train/chosen": -62.92674255371094, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -137.56414794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39685171842575073, "rewards_train/margins": 1.2897403240203857, "rewards_train/rejected": -0.892888605594635, "step": 178 }, { "epoch": 0.09, "learning_rate": 5.80327868852459e-07, "loss": 0.2557, "step": 179 }, { "epoch": 0.09, "logps_train/chosen": -60.548912048339844, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -134.5934600830078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3982335031032562, "rewards_train/margins": 1.2708611190319061, "rewards_train/rejected": -0.8726276159286499, "step": 179 }, { "epoch": 0.09, "learning_rate": 5.836065573770491e-07, "loss": 0.2568, "step": 180 }, { "epoch": 0.09, "logps_train/chosen": -59.43360900878906, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -130.13546752929688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.36782050132751465, "rewards_train/margins": 1.189423382282257, "rewards_train/rejected": -0.8216028809547424, "step": 180 }, { "epoch": 0.09, "learning_rate": 5.868852459016394e-07, "loss": 0.2765, "step": 181 }, { "epoch": 0.09, "logps_train/chosen": -58.882293701171875, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -132.98365783691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3817414343357086, "rewards_train/margins": 1.2714159190654755, "rewards_train/rejected": -0.8896744847297668, "step": 181 }, { "epoch": 0.09, "learning_rate": 5.901639344262294e-07, "loss": 0.2602, "step": 182 }, { "epoch": 0.09, "logps_train/chosen": -58.99748229980469, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -130.05984497070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38474899530410767, "rewards_train/margins": 1.254600167274475, "rewards_train/rejected": -0.8698511719703674, "step": 182 }, { "epoch": 0.09, "learning_rate": 5.934426229508196e-07, "loss": 0.2627, "step": 183 }, { "epoch": 0.09, "logps_train/chosen": -59.102317810058594, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -132.236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38984161615371704, "rewards_train/margins": 1.365623116493225, "rewards_train/rejected": -0.9757815003395081, "step": 183 }, { "epoch": 0.09, "learning_rate": 5.967213114754098e-07, "loss": 0.2395, "step": 184 }, { "epoch": 0.09, "logps_train/chosen": -59.18778991699219, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -129.9237060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3953324258327484, "rewards_train/margins": 1.2783280313014984, "rewards_train/rejected": -0.88299560546875, "step": 184 }, { "epoch": 0.09, "learning_rate": 6e-07, "loss": 0.2588, "step": 185 }, { "epoch": 0.09, "logps_train/chosen": -62.241844177246094, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -137.05186462402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41685545444488525, "rewards_train/margins": 1.404756784439087, "rewards_train/rejected": -0.9879013299942017, "step": 185 }, { "epoch": 0.09, "learning_rate": 6.032786885245902e-07, "loss": 0.2325, "step": 186 }, { "epoch": 0.09, "logps_train/chosen": -61.59782791137695, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -135.12933349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3664869964122772, "rewards_train/margins": 1.380884736776352, "rewards_train/rejected": -1.0143977403640747, "step": 186 }, { "epoch": 0.09, "learning_rate": 6.065573770491803e-07, "loss": 0.2375, "step": 187 }, { "epoch": 0.09, "logps_train/chosen": -58.401885986328125, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -116.9375, "logps_train/rejected": -125.95051574707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3827604353427887, "rewards_train/margins": 1.2845498621463776, "rewards_train/rejected": -0.9017894268035889, "step": 187 }, { "epoch": 0.09, "learning_rate": 6.098360655737704e-07, "loss": 0.2567, "step": 188 }, { "epoch": 0.09, "logps_train/chosen": -63.44580841064453, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -137.27444458007812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.410155713558197, "rewards_train/margins": 1.396681010723114, "rewards_train/rejected": -0.986525297164917, "step": 188 }, { "epoch": 0.09, "learning_rate": 6.131147540983607e-07, "loss": 0.2342, "step": 189 }, { "epoch": 0.09, "logps_train/chosen": -62.610145568847656, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -138.80677795410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4131063222885132, "rewards_train/margins": 1.4817718267440796, "rewards_train/rejected": -1.0686655044555664, "step": 189 }, { "epoch": 0.09, "learning_rate": 6.163934426229508e-07, "loss": 0.2149, "step": 190 }, { "epoch": 0.09, "logps_train/chosen": -61.45061111450195, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -134.447021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4189036786556244, "rewards_train/margins": 1.4919738471508026, "rewards_train/rejected": -1.0730701684951782, "step": 190 }, { "epoch": 0.09, "learning_rate": 6.19672131147541e-07, "loss": 0.2159, "step": 191 }, { "epoch": 0.09, "logps_train/chosen": -59.77513122558594, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -138.8997344970703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43532872200012207, "rewards_train/margins": 1.5398527383804321, "rewards_train/rejected": -1.10452401638031, "step": 191 }, { "epoch": 0.09, "learning_rate": 6.229508196721311e-07, "loss": 0.2133, "step": 192 }, { "epoch": 0.09, "logps_train/chosen": -57.62287139892578, "logps_train/ref_chosen": -61.53125, "logps_train/ref_rejected": -118.3125, "logps_train/rejected": -128.75991821289062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3907400369644165, "rewards_train/margins": 1.4366542100906372, "rewards_train/rejected": -1.0459141731262207, "step": 192 }, { "epoch": 0.1, "learning_rate": 6.262295081967213e-07, "loss": 0.2265, "step": 193 }, { "epoch": 0.1, "logps_train/chosen": -57.999168395996094, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -134.33226013183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4170754551887512, "rewards_train/margins": 1.4699304699897766, "rewards_train/rejected": -1.0528550148010254, "step": 193 }, { "epoch": 0.1, "learning_rate": 6.295081967213115e-07, "loss": 0.2211, "step": 194 }, { "epoch": 0.1, "logps_train/chosen": -57.850440979003906, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -132.69403076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44354504346847534, "rewards_train/margins": 1.4881429076194763, "rewards_train/rejected": -1.044597864151001, "step": 194 }, { "epoch": 0.1, "learning_rate": 6.327868852459016e-07, "loss": 0.2189, "step": 195 }, { "epoch": 0.1, "logps_train/chosen": -60.62010955810547, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -137.42410278320312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44633883237838745, "rewards_train/margins": 1.4868947863578796, "rewards_train/rejected": -1.0405559539794922, "step": 195 }, { "epoch": 0.1, "learning_rate": 6.360655737704918e-07, "loss": 0.2214, "step": 196 }, { "epoch": 0.1, "logps_train/chosen": -57.99637985229492, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -136.94049072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42711982131004333, "rewards_train/margins": 1.5198019444942474, "rewards_train/rejected": -1.092682123184204, "step": 196 }, { "epoch": 0.1, "learning_rate": 6.393442622950819e-07, "loss": 0.2088, "step": 197 }, { "epoch": 0.1, "logps_train/chosen": -60.3707275390625, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -135.99325561523438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46151095628738403, "rewards_train/margins": 1.6020477414131165, "rewards_train/rejected": -1.1405367851257324, "step": 197 }, { "epoch": 0.1, "learning_rate": 6.426229508196721e-07, "loss": 0.1978, "step": 198 }, { "epoch": 0.1, "logps_train/chosen": -58.15861511230469, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -129.9851837158203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43369901180267334, "rewards_train/margins": 1.4315824508666992, "rewards_train/rejected": -0.9978834390640259, "step": 198 }, { "epoch": 0.1, "learning_rate": 6.459016393442622e-07, "loss": 0.2313, "step": 199 }, { "epoch": 0.1, "logps_train/chosen": -63.331504821777344, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -133.875, "logps_train/rejected": -147.07363891601562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4580605626106262, "rewards_train/margins": 1.7761653065681458, "rewards_train/rejected": -1.3181047439575195, "step": 199 }, { "epoch": 0.1, "learning_rate": 6.491803278688525e-07, "loss": 0.1726, "step": 200 }, { "epoch": 0.1, "logps_train/chosen": -60.355072021484375, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -136.2931671142578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4496246874332428, "rewards_train/margins": 1.6061383783817291, "rewards_train/rejected": -1.1565136909484863, "step": 200 }, { "epoch": 0.1, "learning_rate": 6.524590163934426e-07, "loss": 0.1985, "step": 201 }, { "epoch": 0.1, "logps_train/chosen": -62.078102111816406, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -145.19137573242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4669945240020752, "rewards_train/margins": 1.7030274868011475, "rewards_train/rejected": -1.2360329627990723, "step": 201 }, { "epoch": 0.1, "learning_rate": 6.557377049180327e-07, "loss": 0.181, "step": 202 }, { "epoch": 0.1, "logps_train/chosen": -59.58051300048828, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -137.28500366210938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4371393322944641, "rewards_train/margins": 1.6458153128623962, "rewards_train/rejected": -1.2086759805679321, "step": 202 }, { "epoch": 0.1, "learning_rate": 6.590163934426229e-07, "loss": 0.1938, "step": 203 }, { "epoch": 0.1, "logps_train/chosen": -61.704708099365234, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -141.654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4789921045303345, "rewards_train/margins": 1.7907112836837769, "rewards_train/rejected": -1.3117191791534424, "step": 203 }, { "epoch": 0.1, "learning_rate": 6.622950819672131e-07, "loss": 0.1705, "step": 204 }, { "epoch": 0.1, "logps_train/chosen": -60.5462760925293, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -137.70242309570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.45843392610549927, "rewards_train/margins": 1.7296517491340637, "rewards_train/rejected": -1.2712178230285645, "step": 204 }, { "epoch": 0.1, "learning_rate": 6.655737704918033e-07, "loss": 0.1768, "step": 205 }, { "epoch": 0.1, "logps_train/chosen": -59.67721176147461, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -143.4280242919922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5152620673179626, "rewards_train/margins": 1.8073808550834656, "rewards_train/rejected": -1.292118787765503, "step": 205 }, { "epoch": 0.1, "learning_rate": 6.688524590163935e-07, "loss": 0.169, "step": 206 }, { "epoch": 0.1, "logps_train/chosen": -60.006141662597656, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -137.00852966308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4466511011123657, "rewards_train/margins": 1.7595646381378174, "rewards_train/rejected": -1.3129135370254517, "step": 206 }, { "epoch": 0.1, "learning_rate": 6.721311475409835e-07, "loss": 0.1739, "step": 207 }, { "epoch": 0.1, "logps_train/chosen": -63.28627014160156, "logps_train/ref_chosen": -68.3125, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -144.56976318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5033069252967834, "rewards_train/margins": 1.850077211856842, "rewards_train/rejected": -1.3467702865600586, "step": 207 }, { "epoch": 0.1, "learning_rate": 6.754098360655738e-07, "loss": 0.1669, "step": 208 }, { "epoch": 0.1, "logps_train/chosen": -58.86178207397461, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -134.79844665527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4201205372810364, "rewards_train/margins": 1.68263179063797, "rewards_train/rejected": -1.2625112533569336, "step": 208 }, { "epoch": 0.1, "learning_rate": 6.786885245901639e-07, "loss": 0.1881, "step": 209 }, { "epoch": 0.1, "logps_train/chosen": -58.88423538208008, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -134.55682373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.421927809715271, "rewards_train/margins": 1.7367407083511353, "rewards_train/rejected": -1.3148128986358643, "step": 209 }, { "epoch": 0.1, "learning_rate": 6.819672131147541e-07, "loss": 0.1815, "step": 210 }, { "epoch": 0.1, "logps_train/chosen": -60.68842315673828, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -140.0259552001953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4285699427127838, "rewards_train/margins": 1.8137824833393097, "rewards_train/rejected": -1.3852125406265259, "step": 210 }, { "epoch": 0.1, "learning_rate": 6.852459016393443e-07, "loss": 0.1724, "step": 211 }, { "epoch": 0.1, "logps_train/chosen": -63.897945404052734, "logps_train/ref_chosen": -69.25, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -144.83123779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5332034230232239, "rewards_train/margins": 1.943719208240509, "rewards_train/rejected": -1.4105157852172852, "step": 211 }, { "epoch": 0.1, "learning_rate": 6.885245901639343e-07, "loss": 0.1497, "step": 212 }, { "epoch": 0.1, "logps_train/chosen": -59.53406524658203, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -137.85235595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4820426106452942, "rewards_train/margins": 1.9052178263664246, "rewards_train/rejected": -1.4231752157211304, "step": 212 }, { "epoch": 0.1, "learning_rate": 6.918032786885245e-07, "loss": 0.1524, "step": 213 }, { "epoch": 0.1, "logps_train/chosen": -60.21894454956055, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -142.62588500976562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.491191565990448, "rewards_train/margins": 1.9983598589897156, "rewards_train/rejected": -1.5071682929992676, "step": 213 }, { "epoch": 0.11, "learning_rate": 6.950819672131147e-07, "loss": 0.1446, "step": 214 }, { "epoch": 0.11, "logps_train/chosen": -59.61597442626953, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -137.16078186035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4231681227684021, "rewards_train/margins": 1.8810915350914001, "rewards_train/rejected": -1.457923412322998, "step": 214 }, { "epoch": 0.11, "learning_rate": 6.983606557377049e-07, "loss": 0.1601, "step": 215 }, { "epoch": 0.11, "logps_train/chosen": -60.36482238769531, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -138.2681121826172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5073165893554688, "rewards_train/margins": 1.9211399555206299, "rewards_train/rejected": -1.4138233661651611, "step": 215 }, { "epoch": 0.11, "learning_rate": 7.016393442622951e-07, "loss": 0.157, "step": 216 }, { "epoch": 0.11, "logps_train/chosen": -60.97888946533203, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -137.96035766601562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5039665102958679, "rewards_train/margins": 1.9072793126106262, "rewards_train/rejected": -1.4033128023147583, "step": 216 }, { "epoch": 0.11, "learning_rate": 7.049180327868852e-07, "loss": 0.1551, "step": 217 }, { "epoch": 0.11, "logps_train/chosen": -59.649879455566406, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -140.91595458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5304710865020752, "rewards_train/margins": 2.0126423835754395, "rewards_train/rejected": -1.4821712970733643, "step": 217 }, { "epoch": 0.11, "learning_rate": 7.081967213114753e-07, "loss": 0.1417, "step": 218 }, { "epoch": 0.11, "logps_train/chosen": -60.007503509521484, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -140.789794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4577459394931793, "rewards_train/margins": 1.9147534668445587, "rewards_train/rejected": -1.4570075273513794, "step": 218 }, { "epoch": 0.11, "learning_rate": 7.114754098360656e-07, "loss": 0.1584, "step": 219 }, { "epoch": 0.11, "logps_train/chosen": -58.884803771972656, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -141.16958618164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.500923752784729, "rewards_train/margins": 2.022959589958191, "rewards_train/rejected": -1.522035837173462, "step": 219 }, { "epoch": 0.11, "learning_rate": 7.147540983606557e-07, "loss": 0.1456, "step": 220 }, { "epoch": 0.11, "logps_train/chosen": -59.41728973388672, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -139.6460418701172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5134470462799072, "rewards_train/margins": 2.0578854084014893, "rewards_train/rejected": -1.544438362121582, "step": 220 }, { "epoch": 0.11, "learning_rate": 7.180327868852459e-07, "loss": 0.1321, "step": 221 }, { "epoch": 0.11, "logps_train/chosen": -61.435218811035156, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -143.9536590576172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4695885181427002, "rewards_train/margins": 2.1298961639404297, "rewards_train/rejected": -1.6603076457977295, "step": 221 }, { "epoch": 0.11, "learning_rate": 7.21311475409836e-07, "loss": 0.1312, "step": 222 }, { "epoch": 0.11, "logps_train/chosen": -58.306610107421875, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -135.89501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46899697184562683, "rewards_train/margins": 1.9136750400066376, "rewards_train/rejected": -1.4446780681610107, "step": 222 }, { "epoch": 0.11, "learning_rate": 7.245901639344262e-07, "loss": 0.1571, "step": 223 }, { "epoch": 0.11, "logps_train/chosen": -56.38285827636719, "logps_train/ref_chosen": -60.625, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -136.37203979492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42335933446884155, "rewards_train/margins": 1.946354329586029, "rewards_train/rejected": -1.5229949951171875, "step": 223 }, { "epoch": 0.11, "learning_rate": 7.278688524590164e-07, "loss": 0.162, "step": 224 }, { "epoch": 0.11, "logps_train/chosen": -59.47881317138672, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -142.0341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4350775480270386, "rewards_train/margins": 2.1207698583602905, "rewards_train/rejected": -1.685692310333252, "step": 224 }, { "epoch": 0.11, "learning_rate": 7.311475409836066e-07, "loss": 0.1237, "step": 225 }, { "epoch": 0.11, "logps_train/chosen": -58.22025680541992, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -137.96395874023438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48949748277664185, "rewards_train/margins": 2.0868690609931946, "rewards_train/rejected": -1.5973715782165527, "step": 225 }, { "epoch": 0.11, "learning_rate": 7.344262295081967e-07, "loss": 0.1368, "step": 226 }, { "epoch": 0.11, "logps_train/chosen": -57.50926208496094, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -139.74957275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4945329427719116, "rewards_train/margins": 2.117536187171936, "rewards_train/rejected": -1.6230032444000244, "step": 226 }, { "epoch": 0.11, "learning_rate": 7.377049180327869e-07, "loss": 0.1345, "step": 227 }, { "epoch": 0.11, "logps_train/chosen": -58.34858322143555, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -144.84034729003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4904347360134125, "rewards_train/margins": 2.2464906871318817, "rewards_train/rejected": -1.7560559511184692, "step": 227 }, { "epoch": 0.11, "learning_rate": 7.40983606557377e-07, "loss": 0.1139, "step": 228 }, { "epoch": 0.11, "logps_train/chosen": -56.29745864868164, "logps_train/ref_chosen": -60.75, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -139.79205322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44305694103240967, "rewards_train/margins": 2.0997031927108765, "rewards_train/rejected": -1.6566462516784668, "step": 228 }, { "epoch": 0.11, "learning_rate": 7.442622950819671e-07, "loss": 0.1331, "step": 229 }, { "epoch": 0.11, "logps_train/chosen": -59.9169921875, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -141.60540771484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5438476800918579, "rewards_train/margins": 2.1854430437088013, "rewards_train/rejected": -1.6415953636169434, "step": 229 }, { "epoch": 0.11, "learning_rate": 7.475409836065574e-07, "loss": 0.1245, "step": 230 }, { "epoch": 0.11, "logps_train/chosen": -62.18707275390625, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -146.38348388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48251357674598694, "rewards_train/margins": 2.2573348581790924, "rewards_train/rejected": -1.7748212814331055, "step": 230 }, { "epoch": 0.11, "learning_rate": 7.508196721311475e-07, "loss": 0.1183, "step": 231 }, { "epoch": 0.11, "logps_train/chosen": -59.653564453125, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -144.8587646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4365965723991394, "rewards_train/margins": 2.2423954606056213, "rewards_train/rejected": -1.805798888206482, "step": 231 }, { "epoch": 0.11, "learning_rate": 7.540983606557376e-07, "loss": 0.1195, "step": 232 }, { "epoch": 0.11, "logps_train/chosen": -58.49479293823242, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -142.13897705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4727862477302551, "rewards_train/margins": 2.2671528458595276, "rewards_train/rejected": -1.7943665981292725, "step": 232 }, { "epoch": 0.11, "learning_rate": 7.573770491803278e-07, "loss": 0.115, "step": 233 }, { "epoch": 0.11, "logps_train/chosen": -57.51796340942383, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -142.41156005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44615286588668823, "rewards_train/margins": 2.307817757129669, "rewards_train/rejected": -1.861664891242981, "step": 233 }, { "epoch": 0.12, "learning_rate": 7.60655737704918e-07, "loss": 0.1106, "step": 234 }, { "epoch": 0.12, "logps_train/chosen": -60.98767852783203, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -150.54312133789062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.47076353430747986, "rewards_train/margins": 2.3969496190547943, "rewards_train/rejected": -1.9261860847473145, "step": 234 }, { "epoch": 0.12, "learning_rate": 7.639344262295082e-07, "loss": 0.1109, "step": 235 }, { "epoch": 0.12, "logps_train/chosen": -58.148345947265625, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -118.4375, "logps_train/rejected": -136.06092834472656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44327104091644287, "rewards_train/margins": 2.206395745277405, "rewards_train/rejected": -1.763124704360962, "step": 235 }, { "epoch": 0.12, "learning_rate": 7.672131147540984e-07, "loss": 0.1317, "step": 236 }, { "epoch": 0.12, "logps_train/chosen": -59.105926513671875, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -139.8078155517578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4838896691799164, "rewards_train/margins": 2.2631095349788666, "rewards_train/rejected": -1.7792198657989502, "step": 236 }, { "epoch": 0.12, "learning_rate": 7.704918032786884e-07, "loss": 0.1184, "step": 237 }, { "epoch": 0.12, "logps_train/chosen": -55.6690673828125, "logps_train/ref_chosen": -60.75, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -138.00021362304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5083861947059631, "rewards_train/margins": 2.324277698993683, "rewards_train/rejected": -1.8158915042877197, "step": 237 }, { "epoch": 0.12, "learning_rate": 7.737704918032787e-07, "loss": 0.1157, "step": 238 }, { "epoch": 0.12, "logps_train/chosen": -59.70110321044922, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -143.87249755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4888741374015808, "rewards_train/margins": 2.273390471935272, "rewards_train/rejected": -1.7845163345336914, "step": 238 }, { "epoch": 0.12, "learning_rate": 7.770491803278688e-07, "loss": 0.1165, "step": 239 }, { "epoch": 0.12, "logps_train/chosen": -57.44194030761719, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -138.90753173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48437026143074036, "rewards_train/margins": 2.36799493432045, "rewards_train/rejected": -1.8836246728897095, "step": 239 }, { "epoch": 0.12, "learning_rate": 7.80327868852459e-07, "loss": 0.1073, "step": 240 }, { "epoch": 0.12, "logps_train/chosen": -60.46063995361328, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -146.39556884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5404597520828247, "rewards_train/margins": 2.400818347930908, "rewards_train/rejected": -1.8603585958480835, "step": 240 }, { "epoch": 0.12, "learning_rate": 7.836065573770492e-07, "loss": 0.1083, "step": 241 }, { "epoch": 0.12, "logps_train/chosen": -58.46056365966797, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -144.77935791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5558480024337769, "rewards_train/margins": 2.4548768997192383, "rewards_train/rejected": -1.8990288972854614, "step": 241 }, { "epoch": 0.12, "learning_rate": 7.868852459016393e-07, "loss": 0.1028, "step": 242 }, { "epoch": 0.12, "logps_train/chosen": -56.86390686035156, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -144.45484924316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.490806519985199, "rewards_train/margins": 2.4284788966178894, "rewards_train/rejected": -1.9376723766326904, "step": 242 }, { "epoch": 0.12, "learning_rate": 7.901639344262294e-07, "loss": 0.1102, "step": 243 }, { "epoch": 0.12, "logps_train/chosen": -58.287784576416016, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -144.35443115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5495905876159668, "rewards_train/margins": 2.524683117866516, "rewards_train/rejected": -1.9750925302505493, "step": 243 }, { "epoch": 0.12, "learning_rate": 7.934426229508196e-07, "loss": 0.0949, "step": 244 }, { "epoch": 0.12, "logps_train/chosen": -55.01891326904297, "logps_train/ref_chosen": -59.75, "logps_train/ref_rejected": -116.3125, "logps_train/rejected": -135.23797607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4722786545753479, "rewards_train/margins": 2.363604962825775, "rewards_train/rejected": -1.8913263082504272, "step": 244 }, { "epoch": 0.12, "learning_rate": 7.967213114754098e-07, "loss": 0.1131, "step": 245 }, { "epoch": 0.12, "logps_train/chosen": -59.452388763427734, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -146.47718811035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5064702033996582, "rewards_train/margins": 2.4726457595825195, "rewards_train/rejected": -1.9661755561828613, "step": 245 }, { "epoch": 0.12, "learning_rate": 8e-07, "loss": 0.0986, "step": 246 }, { "epoch": 0.12, "logps_train/chosen": -62.57575225830078, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -153.4149169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.535320520401001, "rewards_train/margins": 2.6626029014587402, "rewards_train/rejected": -2.1272823810577393, "step": 246 }, { "epoch": 0.12, "learning_rate": 8.032786885245901e-07, "loss": 0.0836, "step": 247 }, { "epoch": 0.12, "logps_train/chosen": -60.31376647949219, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -150.8427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.528144896030426, "rewards_train/margins": 2.6607614159584045, "rewards_train/rejected": -2.1326165199279785, "step": 247 }, { "epoch": 0.12, "learning_rate": 8.065573770491802e-07, "loss": 0.0812, "step": 248 }, { "epoch": 0.12, "logps_train/chosen": -59.67120361328125, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -147.36141967773438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5726746916770935, "rewards_train/margins": 2.6016885638237, "rewards_train/rejected": -2.0290138721466064, "step": 248 }, { "epoch": 0.12, "learning_rate": 8.098360655737705e-07, "loss": 0.088, "step": 249 }, { "epoch": 0.12, "logps_train/chosen": -58.83757019042969, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -146.45303344726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5128251910209656, "rewards_train/margins": 2.6291250586509705, "rewards_train/rejected": -2.116299867630005, "step": 249 }, { "epoch": 0.12, "learning_rate": 8.131147540983606e-07, "loss": 0.0845, "step": 250 }, { "epoch": 0.12, "logps_train/chosen": -60.09439468383789, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -147.79937744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5456384420394897, "rewards_train/margins": 2.668594241142273, "rewards_train/rejected": -2.122955799102783, "step": 250 }, { "epoch": 0.12, "learning_rate": 8.163934426229508e-07, "loss": 0.0922, "step": 251 }, { "epoch": 0.12, "logps_train/chosen": -60.7847900390625, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -145.46221923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5296753644943237, "rewards_train/margins": 2.6698416471481323, "rewards_train/rejected": -2.1401662826538086, "step": 251 }, { "epoch": 0.12, "learning_rate": 8.196721311475409e-07, "loss": 0.0826, "step": 252 }, { "epoch": 0.12, "logps_train/chosen": -59.15700912475586, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -148.71365356445312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3730199337005615, "rewards_train/margins": 2.5672364234924316, "rewards_train/rejected": -2.19421648979187, "step": 252 }, { "epoch": 0.12, "learning_rate": 8.229508196721311e-07, "loss": 0.0988, "step": 253 }, { "epoch": 0.12, "logps_train/chosen": -60.381072998046875, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -146.02362060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5532013177871704, "rewards_train/margins": 2.67245876789093, "rewards_train/rejected": -2.1192574501037598, "step": 253 }, { "epoch": 0.13, "learning_rate": 8.262295081967213e-07, "loss": 0.0833, "step": 254 }, { "epoch": 0.13, "logps_train/chosen": -58.39197540283203, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -146.19874572753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6108999252319336, "rewards_train/margins": 2.7062623500823975, "rewards_train/rejected": -2.095362424850464, "step": 254 }, { "epoch": 0.13, "learning_rate": 8.295081967213115e-07, "loss": 0.0783, "step": 255 }, { "epoch": 0.13, "logps_train/chosen": -57.53927230834961, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -149.2295379638672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.47058454155921936, "rewards_train/margins": 2.6324054300785065, "rewards_train/rejected": -2.161820888519287, "step": 255 }, { "epoch": 0.13, "learning_rate": 8.327868852459016e-07, "loss": 0.0867, "step": 256 }, { "epoch": 0.13, "logps_train/chosen": -59.48403549194336, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -149.52716064453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5893406867980957, "rewards_train/margins": 2.826286792755127, "rewards_train/rejected": -2.2369461059570312, "step": 256 }, { "epoch": 0.13, "learning_rate": 8.360655737704919e-07, "loss": 0.0819, "step": 257 }, { "epoch": 0.13, "logps_train/chosen": -56.41729736328125, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -146.55050659179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49171754717826843, "rewards_train/margins": 2.89354607462883, "rewards_train/rejected": -2.4018285274505615, "step": 257 }, { "epoch": 0.13, "learning_rate": 8.393442622950819e-07, "loss": 0.0734, "step": 258 }, { "epoch": 0.13, "logps_train/chosen": -59.41978454589844, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -145.0747528076172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5428362488746643, "rewards_train/margins": 2.6854687333106995, "rewards_train/rejected": -2.142632484436035, "step": 258 }, { "epoch": 0.13, "learning_rate": 8.42622950819672e-07, "loss": 0.0893, "step": 259 }, { "epoch": 0.13, "logps_train/chosen": -57.404090881347656, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -146.72515869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5241905450820923, "rewards_train/margins": 2.7594019174575806, "rewards_train/rejected": -2.2352113723754883, "step": 259 }, { "epoch": 0.13, "learning_rate": 8.459016393442623e-07, "loss": 0.0781, "step": 260 }, { "epoch": 0.13, "logps_train/chosen": -58.79511642456055, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -149.07278442382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5229296684265137, "rewards_train/margins": 2.8912441730499268, "rewards_train/rejected": -2.368314504623413, "step": 260 }, { "epoch": 0.13, "learning_rate": 8.491803278688524e-07, "loss": 0.0763, "step": 261 }, { "epoch": 0.13, "logps_train/chosen": -58.521331787109375, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -151.60183715820312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5160306692123413, "rewards_train/margins": 2.977289319038391, "rewards_train/rejected": -2.46125864982605, "step": 261 }, { "epoch": 0.13, "learning_rate": 8.524590163934425e-07, "loss": 0.0633, "step": 262 }, { "epoch": 0.13, "logps_train/chosen": -59.13655090332031, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -152.97433471679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6346358060836792, "rewards_train/margins": 3.0631242990493774, "rewards_train/rejected": -2.4284884929656982, "step": 262 }, { "epoch": 0.13, "learning_rate": 8.557377049180327e-07, "loss": 0.0559, "step": 263 }, { "epoch": 0.13, "logps_train/chosen": -60.543495178222656, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -152.87265014648438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5551720857620239, "rewards_train/margins": 3.0564500093460083, "rewards_train/rejected": -2.5012779235839844, "step": 263 }, { "epoch": 0.13, "learning_rate": 8.590163934426229e-07, "loss": 0.0704, "step": 264 }, { "epoch": 0.13, "logps_train/chosen": -58.84012222290039, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -148.9395751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5949429273605347, "rewards_train/margins": 2.8892911672592163, "rewards_train/rejected": -2.2943482398986816, "step": 264 }, { "epoch": 0.13, "learning_rate": 8.622950819672131e-07, "loss": 0.0773, "step": 265 }, { "epoch": 0.13, "logps_train/chosen": -57.90045928955078, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -143.55027770996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5159111618995667, "rewards_train/margins": 2.910001575946808, "rewards_train/rejected": -2.394090414047241, "step": 265 }, { "epoch": 0.13, "learning_rate": 8.655737704918033e-07, "loss": 0.0722, "step": 266 }, { "epoch": 0.13, "logps_train/chosen": -59.34675598144531, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -149.32130432128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5487719774246216, "rewards_train/margins": 2.951214909553528, "rewards_train/rejected": -2.4024429321289062, "step": 266 }, { "epoch": 0.13, "learning_rate": 8.688524590163933e-07, "loss": 0.0664, "step": 267 }, { "epoch": 0.13, "logps_train/chosen": -59.147682189941406, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -152.15499877929688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4966579079627991, "rewards_train/margins": 2.93867164850235, "rewards_train/rejected": -2.442013740539551, "step": 267 }, { "epoch": 0.13, "learning_rate": 8.721311475409836e-07, "loss": 0.0754, "step": 268 }, { "epoch": 0.13, "logps_train/chosen": -57.07971954345703, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -149.21417236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49583643674850464, "rewards_train/margins": 2.935123383998871, "rewards_train/rejected": -2.439286947250366, "step": 268 }, { "epoch": 0.13, "learning_rate": 8.754098360655737e-07, "loss": 0.0699, "step": 269 }, { "epoch": 0.13, "logps_train/chosen": -55.12225341796875, "logps_train/ref_chosen": -59.8125, "logps_train/ref_rejected": -116.8125, "logps_train/rejected": -141.21731567382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46856075525283813, "rewards_train/margins": 2.9074788689613342, "rewards_train/rejected": -2.438918113708496, "step": 269 }, { "epoch": 0.13, "learning_rate": 8.786885245901639e-07, "loss": 0.0649, "step": 270 }, { "epoch": 0.13, "logps_train/chosen": -60.43303680419922, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -150.6695098876953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5159733891487122, "rewards_train/margins": 2.994544804096222, "rewards_train/rejected": -2.4785714149475098, "step": 270 }, { "epoch": 0.13, "learning_rate": 8.819672131147541e-07, "loss": 0.0627, "step": 271 }, { "epoch": 0.13, "logps_train/chosen": -59.94463348388672, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -149.5438690185547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5669134855270386, "rewards_train/margins": 3.0189565420150757, "rewards_train/rejected": -2.452043056488037, "step": 271 }, { "epoch": 0.13, "learning_rate": 8.852459016393443e-07, "loss": 0.0692, "step": 272 }, { "epoch": 0.13, "logps_train/chosen": -60.13496398925781, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -152.7018280029297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5109666585922241, "rewards_train/margins": 3.0986303091049194, "rewards_train/rejected": -2.5876636505126953, "step": 272 }, { "epoch": 0.13, "learning_rate": 8.885245901639344e-07, "loss": 0.0579, "step": 273 }, { "epoch": 0.13, "logps_train/chosen": -59.71062469482422, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -154.0432891845703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48699426651000977, "rewards_train/margins": 3.2946934700012207, "rewards_train/rejected": -2.807699203491211, "step": 273 }, { "epoch": 0.13, "learning_rate": 8.918032786885246e-07, "loss": 0.0494, "step": 274 }, { "epoch": 0.13, "logps_train/chosen": -59.59662628173828, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -160.3369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5416067242622375, "rewards_train/margins": 3.459867775440216, "rewards_train/rejected": -2.9182610511779785, "step": 274 }, { "epoch": 0.14, "learning_rate": 8.950819672131147e-07, "loss": 0.0432, "step": 275 }, { "epoch": 0.14, "logps_train/chosen": -58.89588165283203, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -153.2655029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.542785108089447, "rewards_train/margins": 3.2063960433006287, "rewards_train/rejected": -2.6636109352111816, "step": 275 }, { "epoch": 0.14, "learning_rate": 8.983606557377049e-07, "loss": 0.0536, "step": 276 }, { "epoch": 0.14, "logps_train/chosen": -63.60285568237305, "logps_train/ref_chosen": -69.125, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -158.65269470214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5521655082702637, "rewards_train/margins": 3.2911159992218018, "rewards_train/rejected": -2.738950490951538, "step": 276 }, { "epoch": 0.14, "learning_rate": 9.01639344262295e-07, "loss": 0.0591, "step": 277 }, { "epoch": 0.14, "logps_train/chosen": -61.43206787109375, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -156.83465576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5000057220458984, "rewards_train/margins": 3.2795169353485107, "rewards_train/rejected": -2.7795112133026123, "step": 277 }, { "epoch": 0.14, "learning_rate": 9.049180327868851e-07, "loss": 0.0549, "step": 278 }, { "epoch": 0.14, "logps_train/chosen": -56.540985107421875, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -150.38328552246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6067902445793152, "rewards_train/margins": 3.234327495098114, "rewards_train/rejected": -2.627537250518799, "step": 278 }, { "epoch": 0.14, "learning_rate": 9.081967213114754e-07, "loss": 0.0502, "step": 279 }, { "epoch": 0.14, "logps_train/chosen": -57.36023712158203, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -146.05416870117188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4847280979156494, "rewards_train/margins": 3.0290114879608154, "rewards_train/rejected": -2.544283390045166, "step": 279 }, { "epoch": 0.14, "learning_rate": 9.114754098360655e-07, "loss": 0.0742, "step": 280 }, { "epoch": 0.14, "logps_train/chosen": -58.4703369140625, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -147.65109252929688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4251832962036133, "rewards_train/margins": 3.0829193592071533, "rewards_train/rejected": -2.65773606300354, "step": 280 }, { "epoch": 0.14, "learning_rate": 9.147540983606557e-07, "loss": 0.0695, "step": 281 }, { "epoch": 0.14, "logps_train/chosen": -56.87605285644531, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -147.75259399414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5496994256973267, "rewards_train/margins": 3.1762274503707886, "rewards_train/rejected": -2.626528024673462, "step": 281 }, { "epoch": 0.14, "learning_rate": 9.180327868852458e-07, "loss": 0.0581, "step": 282 }, { "epoch": 0.14, "logps_train/chosen": -58.60188293457031, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -153.1268310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.493522584438324, "rewards_train/margins": 3.488628327846527, "rewards_train/rejected": -2.995105743408203, "step": 282 }, { "epoch": 0.14, "learning_rate": 9.21311475409836e-07, "loss": 0.0446, "step": 283 }, { "epoch": 0.14, "logps_train/chosen": -61.96510314941406, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -159.54074096679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5790024995803833, "rewards_train/margins": 3.4739471673965454, "rewards_train/rejected": -2.894944667816162, "step": 283 }, { "epoch": 0.14, "learning_rate": 9.245901639344262e-07, "loss": 0.0471, "step": 284 }, { "epoch": 0.14, "logps_train/chosen": -62.6416015625, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -155.02700805664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.506640613079071, "rewards_train/margins": 3.410414159297943, "rewards_train/rejected": -2.903773546218872, "step": 284 }, { "epoch": 0.14, "learning_rate": 9.278688524590164e-07, "loss": 0.0489, "step": 285 }, { "epoch": 0.14, "logps_train/chosen": -61.769874572753906, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -157.20950317382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5997215509414673, "rewards_train/margins": 3.4284359216690063, "rewards_train/rejected": -2.828714370727539, "step": 285 }, { "epoch": 0.14, "learning_rate": 9.311475409836065e-07, "loss": 0.0443, "step": 286 }, { "epoch": 0.14, "logps_train/chosen": -58.99978256225586, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -149.60784912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5467504262924194, "rewards_train/margins": 3.261637568473816, "rewards_train/rejected": -2.7148871421813965, "step": 286 }, { "epoch": 0.14, "learning_rate": 9.344262295081968e-07, "loss": 0.0616, "step": 287 }, { "epoch": 0.14, "logps_train/chosen": -59.04656219482422, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -152.47525024414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48201388120651245, "rewards_train/margins": 3.314647614955902, "rewards_train/rejected": -2.8326337337493896, "step": 287 }, { "epoch": 0.14, "learning_rate": 9.377049180327868e-07, "loss": 0.0528, "step": 288 }, { "epoch": 0.14, "logps_train/chosen": -58.31793975830078, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -151.97715759277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5188407301902771, "rewards_train/margins": 3.4484896063804626, "rewards_train/rejected": -2.9296488761901855, "step": 288 }, { "epoch": 0.14, "learning_rate": 9.40983606557377e-07, "loss": 0.0536, "step": 289 }, { "epoch": 0.14, "logps_train/chosen": -56.303916931152344, "logps_train/ref_chosen": -60.125, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -154.003173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3812046945095062, "rewards_train/margins": 3.4809370934963226, "rewards_train/rejected": -3.0997323989868164, "step": 289 }, { "epoch": 0.14, "learning_rate": 9.442622950819672e-07, "loss": 0.0555, "step": 290 }, { "epoch": 0.14, "logps_train/chosen": -56.152732849121094, "logps_train/ref_chosen": -61.09375, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -150.14697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4933445453643799, "rewards_train/margins": 3.4052586555480957, "rewards_train/rejected": -2.911914110183716, "step": 290 }, { "epoch": 0.14, "learning_rate": 9.475409836065573e-07, "loss": 0.053, "step": 291 }, { "epoch": 0.14, "logps_train/chosen": -59.17628479003906, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -155.68917846679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5318830609321594, "rewards_train/margins": 3.3446486592292786, "rewards_train/rejected": -2.812765598297119, "step": 291 }, { "epoch": 0.14, "learning_rate": 9.508196721311474e-07, "loss": 0.0535, "step": 292 }, { "epoch": 0.14, "logps_train/chosen": -58.88260269165039, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -154.13092041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5564663410186768, "rewards_train/margins": 3.5945093631744385, "rewards_train/rejected": -3.0380430221557617, "step": 292 }, { "epoch": 0.14, "learning_rate": 9.540983606557376e-07, "loss": 0.0431, "step": 293 }, { "epoch": 0.14, "logps_train/chosen": -60.46954345703125, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -156.64413452148438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.47570210695266724, "rewards_train/margins": 3.496951401233673, "rewards_train/rejected": -3.021249294281006, "step": 293 }, { "epoch": 0.14, "learning_rate": 9.573770491803278e-07, "loss": 0.044, "step": 294 }, { "epoch": 0.14, "logps_train/chosen": -57.470947265625, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -150.74444580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44035661220550537, "rewards_train/margins": 3.386480689048767, "rewards_train/rejected": -2.9461240768432617, "step": 294 }, { "epoch": 0.15, "learning_rate": 9.60655737704918e-07, "loss": 0.0522, "step": 295 }, { "epoch": 0.15, "logps_train/chosen": -58.805946350097656, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -157.98147583007812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5777062177658081, "rewards_train/margins": 3.7589110136032104, "rewards_train/rejected": -3.1812047958374023, "step": 295 }, { "epoch": 0.15, "learning_rate": 9.639344262295082e-07, "loss": 0.0332, "step": 296 }, { "epoch": 0.15, "logps_train/chosen": -59.789710998535156, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -161.53968811035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5105799436569214, "rewards_train/margins": 3.8601545095443726, "rewards_train/rejected": -3.349574565887451, "step": 296 }, { "epoch": 0.15, "learning_rate": 9.672131147540984e-07, "loss": 0.0302, "step": 297 }, { "epoch": 0.15, "logps_train/chosen": -55.727943420410156, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -152.74911499023438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5603599548339844, "rewards_train/margins": 3.6158363819122314, "rewards_train/rejected": -3.055476427078247, "step": 297 }, { "epoch": 0.15, "learning_rate": 9.704918032786885e-07, "loss": 0.0431, "step": 298 }, { "epoch": 0.15, "logps_train/chosen": -59.015281677246094, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -154.5108184814453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5183204412460327, "rewards_train/margins": 3.6329761743545532, "rewards_train/rejected": -3.1146557331085205, "step": 298 }, { "epoch": 0.15, "learning_rate": 9.737704918032787e-07, "loss": 0.045, "step": 299 }, { "epoch": 0.15, "logps_train/chosen": -60.01873016357422, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -159.5787353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5553538203239441, "rewards_train/margins": 3.6373483538627625, "rewards_train/rejected": -3.0819945335388184, "step": 299 }, { "epoch": 0.15, "learning_rate": 9.770491803278687e-07, "loss": 0.0474, "step": 300 }, { "epoch": 0.15, "logps_train/chosen": -57.4302864074707, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -159.84490966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4233287572860718, "rewards_train/margins": 3.7535237073898315, "rewards_train/rejected": -3.3301949501037598, "step": 300 }, { "epoch": 0.15, "learning_rate": 9.80327868852459e-07, "loss": 0.038, "step": 301 }, { "epoch": 0.15, "logps_train/chosen": -57.060054779052734, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -154.0571746826172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4018559455871582, "rewards_train/margins": 3.6235408782958984, "rewards_train/rejected": -3.2216849327087402, "step": 301 }, { "epoch": 0.15, "learning_rate": 9.83606557377049e-07, "loss": 0.0436, "step": 302 }, { "epoch": 0.15, "logps_train/chosen": -59.479774475097656, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -163.8692626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48957112431526184, "rewards_train/margins": 3.88050177693367, "rewards_train/rejected": -3.390930652618408, "step": 302 }, { "epoch": 0.15, "learning_rate": 9.868852459016394e-07, "loss": 0.0363, "step": 303 }, { "epoch": 0.15, "logps_train/chosen": -57.39988327026367, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -119.375, "logps_train/rejected": -151.19837951660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5496111512184143, "rewards_train/margins": 3.7292147278785706, "rewards_train/rejected": -3.1796035766601562, "step": 303 }, { "epoch": 0.15, "learning_rate": 9.901639344262294e-07, "loss": 0.0388, "step": 304 }, { "epoch": 0.15, "logps_train/chosen": -56.19861602783203, "logps_train/ref_chosen": -60.59375, "logps_train/ref_rejected": -119.3125, "logps_train/rejected": -150.4505615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44048991799354553, "rewards_train/margins": 3.555516391992569, "rewards_train/rejected": -3.1150264739990234, "step": 304 }, { "epoch": 0.15, "learning_rate": 9.934426229508196e-07, "loss": 0.0466, "step": 305 }, { "epoch": 0.15, "logps_train/chosen": -57.839447021484375, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -153.72158813476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5997465252876282, "rewards_train/margins": 3.7671201825141907, "rewards_train/rejected": -3.1673736572265625, "step": 305 }, { "epoch": 0.15, "learning_rate": 9.967213114754098e-07, "loss": 0.0448, "step": 306 }, { "epoch": 0.15, "logps_train/chosen": -59.672000885009766, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -162.45462036132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42315611243247986, "rewards_train/margins": 3.9613917768001556, "rewards_train/rejected": -3.538235664367676, "step": 306 }, { "epoch": 0.15, "learning_rate": 1e-06, "loss": 0.0343, "step": 307 }, { "epoch": 0.15, "logps_train/chosen": -60.040470123291016, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -163.39405822753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5852110385894775, "rewards_train/margins": 3.945026397705078, "rewards_train/rejected": -3.3598153591156006, "step": 307 }, { "epoch": 0.15, "learning_rate": 9.99999926348313e-07, "loss": 0.036, "step": 308 }, { "epoch": 0.15, "logps_train/chosen": -57.36831283569336, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -155.46505737304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4678073525428772, "rewards_train/margins": 3.718365252017975, "rewards_train/rejected": -3.2505578994750977, "step": 308 }, { "epoch": 0.15, "learning_rate": 9.999997053932738e-07, "loss": 0.0434, "step": 309 }, { "epoch": 0.15, "logps_train/chosen": -56.9918212890625, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -152.72793579101562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5016967058181763, "rewards_train/margins": 3.7214642763137817, "rewards_train/rejected": -3.2197675704956055, "step": 309 }, { "epoch": 0.15, "learning_rate": 9.999993371349476e-07, "loss": 0.0436, "step": 310 }, { "epoch": 0.15, "logps_train/chosen": -58.09757995605469, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -161.0369110107422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5673907995223999, "rewards_train/margins": 4.014929413795471, "rewards_train/rejected": -3.4475386142730713, "step": 310 }, { "epoch": 0.15, "learning_rate": 9.99998821573443e-07, "loss": 0.0291, "step": 311 }, { "epoch": 0.15, "logps_train/chosen": -59.427955627441406, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -155.76727294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46218442916870117, "rewards_train/margins": 3.7483861446380615, "rewards_train/rejected": -3.2862017154693604, "step": 311 }, { "epoch": 0.15, "learning_rate": 9.999981587089114e-07, "loss": 0.0418, "step": 312 }, { "epoch": 0.15, "logps_train/chosen": -60.519554138183594, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -163.22219848632812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4551249146461487, "rewards_train/margins": 4.058791220188141, "rewards_train/rejected": -3.603666305541992, "step": 312 }, { "epoch": 0.15, "learning_rate": 9.999973485415485e-07, "loss": 0.0332, "step": 313 }, { "epoch": 0.15, "logps_train/chosen": -59.135902404785156, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -156.88211059570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5324058532714844, "rewards_train/margins": 3.957141399383545, "rewards_train/rejected": -3.4247355461120605, "step": 313 }, { "epoch": 0.15, "learning_rate": 9.999963910715928e-07, "loss": 0.0282, "step": 314 }, { "epoch": 0.15, "logps_train/chosen": -57.575592041015625, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -156.83169555664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5991300344467163, "rewards_train/margins": 4.105296492576599, "rewards_train/rejected": -3.506166458129883, "step": 314 }, { "epoch": 0.16, "learning_rate": 9.999952862993264e-07, "loss": 0.0321, "step": 315 }, { "epoch": 0.16, "logps_train/chosen": -57.97962951660156, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -160.7740478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4349474310874939, "rewards_train/margins": 4.07343715429306, "rewards_train/rejected": -3.6384897232055664, "step": 315 }, { "epoch": 0.16, "learning_rate": 9.999940342250748e-07, "loss": 0.0296, "step": 316 }, { "epoch": 0.16, "logps_train/chosen": -59.862281799316406, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -160.38986206054688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5206076502799988, "rewards_train/margins": 4.023362576961517, "rewards_train/rejected": -3.5027549266815186, "step": 316 }, { "epoch": 0.16, "learning_rate": 9.99992634849207e-07, "loss": 0.0264, "step": 317 }, { "epoch": 0.16, "logps_train/chosen": -60.61077880859375, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -166.1708526611328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43784773349761963, "rewards_train/margins": 4.080617547035217, "rewards_train/rejected": -3.6427698135375977, "step": 317 }, { "epoch": 0.16, "learning_rate": 9.999910881721351e-07, "loss": 0.0307, "step": 318 }, { "epoch": 0.16, "logps_train/chosen": -58.02320098876953, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -156.8447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5609121322631836, "rewards_train/margins": 4.0210676193237305, "rewards_train/rejected": -3.460155487060547, "step": 318 }, { "epoch": 0.16, "learning_rate": 9.999893941943147e-07, "loss": 0.0361, "step": 319 }, { "epoch": 0.16, "logps_train/chosen": -60.41305160522461, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -165.1475830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4980504512786865, "rewards_train/margins": 4.268522024154663, "rewards_train/rejected": -3.7704715728759766, "step": 319 }, { "epoch": 0.16, "learning_rate": 9.999875529162452e-07, "loss": 0.0275, "step": 320 }, { "epoch": 0.16, "logps_train/chosen": -58.218833923339844, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -156.2013397216797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5250405669212341, "rewards_train/margins": 4.025448501110077, "rewards_train/rejected": -3.5004079341888428, "step": 320 }, { "epoch": 0.16, "learning_rate": 9.999855643384685e-07, "loss": 0.0293, "step": 321 }, { "epoch": 0.16, "logps_train/chosen": -59.87950134277344, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -163.18023681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5194962024688721, "rewards_train/margins": 4.224335670471191, "rewards_train/rejected": -3.7048394680023193, "step": 321 }, { "epoch": 0.16, "learning_rate": 9.999834284615709e-07, "loss": 0.023, "step": 322 }, { "epoch": 0.16, "logps_train/chosen": -61.18673324584961, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -161.932373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4429965615272522, "rewards_train/margins": 4.177444517612457, "rewards_train/rejected": -3.734447956085205, "step": 322 }, { "epoch": 0.16, "learning_rate": 9.999811452861816e-07, "loss": 0.0343, "step": 323 }, { "epoch": 0.16, "logps_train/chosen": -59.81873321533203, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -157.61866760253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4914175868034363, "rewards_train/margins": 4.181799113750458, "rewards_train/rejected": -3.6903815269470215, "step": 323 }, { "epoch": 0.16, "learning_rate": 9.99978714812973e-07, "loss": 0.0285, "step": 324 }, { "epoch": 0.16, "logps_train/chosen": -58.78413772583008, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -161.89764404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5048381686210632, "rewards_train/margins": 4.267356097698212, "rewards_train/rejected": -3.7625179290771484, "step": 324 }, { "epoch": 0.16, "learning_rate": 9.999761370426613e-07, "loss": 0.0234, "step": 325 }, { "epoch": 0.16, "logps_train/chosen": -60.599571228027344, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -162.13766479492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5184118747711182, "rewards_train/margins": 4.2331554889678955, "rewards_train/rejected": -3.7147436141967773, "step": 325 }, { "epoch": 0.16, "learning_rate": 9.99973411976006e-07, "loss": 0.0258, "step": 326 }, { "epoch": 0.16, "logps_train/chosen": -61.264198303222656, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -169.88336181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6139606833457947, "rewards_train/margins": 4.482861936092377, "rewards_train/rejected": -3.868901252746582, "step": 326 }, { "epoch": 0.16, "learning_rate": 9.999705396138098e-07, "loss": 0.0162, "step": 327 }, { "epoch": 0.16, "logps_train/chosen": -57.51762771606445, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -164.74075317382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5876904726028442, "rewards_train/margins": 4.363181948661804, "rewards_train/rejected": -3.77549147605896, "step": 327 }, { "epoch": 0.16, "learning_rate": 9.99967519956919e-07, "loss": 0.0214, "step": 328 }, { "epoch": 0.16, "logps_train/chosen": -58.24337387084961, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -167.51882934570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6181430816650391, "rewards_train/margins": 4.361334323883057, "rewards_train/rejected": -3.7431912422180176, "step": 328 }, { "epoch": 0.16, "learning_rate": 9.99964353006223e-07, "loss": 0.0274, "step": 329 }, { "epoch": 0.16, "logps_train/chosen": -59.63223648071289, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -160.77044677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5167078971862793, "rewards_train/margins": 4.344192266464233, "rewards_train/rejected": -3.827484369277954, "step": 329 }, { "epoch": 0.16, "learning_rate": 9.999610387626552e-07, "loss": 0.0254, "step": 330 }, { "epoch": 0.16, "logps_train/chosen": -61.34794616699219, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -168.4917755126953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6144243478775024, "rewards_train/margins": 4.400613903999329, "rewards_train/rejected": -3.786189556121826, "step": 330 }, { "epoch": 0.16, "learning_rate": 9.999575772271915e-07, "loss": 0.0247, "step": 331 }, { "epoch": 0.16, "logps_train/chosen": -61.40451431274414, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -171.71434020996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.580105185508728, "rewards_train/margins": 4.463844180107117, "rewards_train/rejected": -3.8837389945983887, "step": 331 }, { "epoch": 0.16, "learning_rate": 9.99953968400852e-07, "loss": 0.0209, "step": 332 }, { "epoch": 0.16, "logps_train/chosen": -57.949859619140625, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -163.01974487304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4772312343120575, "rewards_train/margins": 4.329890459775925, "rewards_train/rejected": -3.852659225463867, "step": 332 }, { "epoch": 0.16, "learning_rate": 9.999502122847e-07, "loss": 0.0258, "step": 333 }, { "epoch": 0.16, "logps_train/chosen": -57.367218017578125, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -165.74508666992188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5873260498046875, "rewards_train/margins": 4.507927417755127, "rewards_train/rejected": -3.9206013679504395, "step": 333 }, { "epoch": 0.16, "learning_rate": 9.99946308879842e-07, "loss": 0.0269, "step": 334 }, { "epoch": 0.16, "logps_train/chosen": -62.67935562133789, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -135.625, "logps_train/rejected": -175.53842163085938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46321678161621094, "rewards_train/margins": 4.455144882202148, "rewards_train/rejected": -3.9919281005859375, "step": 334 }, { "epoch": 0.16, "learning_rate": 9.999422581874276e-07, "loss": 0.021, "step": 335 }, { "epoch": 0.16, "logps_train/chosen": -59.87214660644531, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -160.15646362304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5619063973426819, "rewards_train/margins": 4.260071337223053, "rewards_train/rejected": -3.698164939880371, "step": 335 }, { "epoch": 0.17, "learning_rate": 9.999380602086506e-07, "loss": 0.0303, "step": 336 }, { "epoch": 0.17, "logps_train/chosen": -58.91260528564453, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -168.123779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5585929155349731, "rewards_train/margins": 4.581907391548157, "rewards_train/rejected": -4.023314476013184, "step": 336 }, { "epoch": 0.17, "learning_rate": 9.999337149447476e-07, "loss": 0.0172, "step": 337 }, { "epoch": 0.17, "logps_train/chosen": -59.2064208984375, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -164.0636444091797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5066039562225342, "rewards_train/margins": 4.388066530227661, "rewards_train/rejected": -3.881462574005127, "step": 337 }, { "epoch": 0.17, "learning_rate": 9.999292223969987e-07, "loss": 0.028, "step": 338 }, { "epoch": 0.17, "logps_train/chosen": -63.773590087890625, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -175.23606872558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3777679204940796, "rewards_train/margins": 4.662214636802673, "rewards_train/rejected": -4.284446716308594, "step": 338 }, { "epoch": 0.17, "learning_rate": 9.999245825667275e-07, "loss": 0.0194, "step": 339 }, { "epoch": 0.17, "logps_train/chosen": -58.79774475097656, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -158.4930419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5347761511802673, "rewards_train/margins": 4.3409640192985535, "rewards_train/rejected": -3.806187868118286, "step": 339 }, { "epoch": 0.17, "learning_rate": 9.999197954553008e-07, "loss": 0.0253, "step": 340 }, { "epoch": 0.17, "logps_train/chosen": -58.34477996826172, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -167.36383056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6132269501686096, "rewards_train/margins": 4.556641161441803, "rewards_train/rejected": -3.9434142112731934, "step": 340 }, { "epoch": 0.17, "learning_rate": 9.99914861064129e-07, "loss": 0.0192, "step": 341 }, { "epoch": 0.17, "logps_train/chosen": -59.466148376464844, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -168.81790161132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44152000546455383, "rewards_train/margins": 4.590499192476273, "rewards_train/rejected": -4.148979187011719, "step": 341 }, { "epoch": 0.17, "learning_rate": 9.99909779394666e-07, "loss": 0.0221, "step": 342 }, { "epoch": 0.17, "logps_train/chosen": -56.529510498046875, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -163.97906494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5200474262237549, "rewards_train/margins": 4.49695897102356, "rewards_train/rejected": -3.9769115447998047, "step": 342 }, { "epoch": 0.17, "learning_rate": 9.999045504484088e-07, "loss": 0.0217, "step": 343 }, { "epoch": 0.17, "logps_train/chosen": -59.27508544921875, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -165.33230590820312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5632144808769226, "rewards_train/margins": 4.475155532360077, "rewards_train/rejected": -3.9119410514831543, "step": 343 }, { "epoch": 0.17, "learning_rate": 9.998991742268976e-07, "loss": 0.0199, "step": 344 }, { "epoch": 0.17, "logps_train/chosen": -57.81425476074219, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -160.79974365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4051467180252075, "rewards_train/margins": 4.322229027748108, "rewards_train/rejected": -3.9170823097229004, "step": 344 }, { "epoch": 0.17, "learning_rate": 9.998936507317164e-07, "loss": 0.0314, "step": 345 }, { "epoch": 0.17, "logps_train/chosen": -59.27015686035156, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -166.3813018798828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6104352474212646, "rewards_train/margins": 4.593488454818726, "rewards_train/rejected": -3.983053207397461, "step": 345 }, { "epoch": 0.17, "learning_rate": 9.998879799644928e-07, "loss": 0.0192, "step": 346 }, { "epoch": 0.17, "logps_train/chosen": -59.09739685058594, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -165.03884887695312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6307387351989746, "rewards_train/margins": 4.711235523223877, "rewards_train/rejected": -4.080496788024902, "step": 346 }, { "epoch": 0.17, "learning_rate": 9.998821619268969e-07, "loss": 0.0197, "step": 347 }, { "epoch": 0.17, "logps_train/chosen": -59.24101257324219, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -167.82278442382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4998733401298523, "rewards_train/margins": 4.628344237804413, "rewards_train/rejected": -4.1284708976745605, "step": 347 }, { "epoch": 0.17, "learning_rate": 9.99876196620643e-07, "loss": 0.0199, "step": 348 }, { "epoch": 0.17, "logps_train/chosen": -58.05125427246094, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -168.51455688476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4850110411643982, "rewards_train/margins": 4.742326200008392, "rewards_train/rejected": -4.257315158843994, "step": 348 }, { "epoch": 0.17, "learning_rate": 9.998700840474887e-07, "loss": 0.0208, "step": 349 }, { "epoch": 0.17, "logps_train/chosen": -61.06053924560547, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -170.64007568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.555786669254303, "rewards_train/margins": 4.765447676181793, "rewards_train/rejected": -4.20966100692749, "step": 349 }, { "epoch": 0.17, "learning_rate": 9.998638242092347e-07, "loss": 0.0189, "step": 350 }, { "epoch": 0.17, "logps_train/chosen": -61.10081481933594, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -163.62060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33020204305648804, "rewards_train/margins": 4.270094811916351, "rewards_train/rejected": -3.9398927688598633, "step": 350 }, { "epoch": 0.17, "learning_rate": 9.998574171077252e-07, "loss": 0.031, "step": 351 }, { "epoch": 0.17, "logps_train/chosen": -61.22496032714844, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -165.241455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4288221299648285, "rewards_train/margins": 4.706189721822739, "rewards_train/rejected": -4.27736759185791, "step": 351 }, { "epoch": 0.17, "learning_rate": 9.998508627448475e-07, "loss": 0.0164, "step": 352 }, { "epoch": 0.17, "logps_train/chosen": -61.243534088134766, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -171.3759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5164667963981628, "rewards_train/margins": 4.805772483348846, "rewards_train/rejected": -4.289305686950684, "step": 352 }, { "epoch": 0.17, "learning_rate": 9.998441611225328e-07, "loss": 0.0208, "step": 353 }, { "epoch": 0.17, "logps_train/chosen": -62.151737213134766, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -175.0322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.391124963760376, "rewards_train/margins": 4.823743104934692, "rewards_train/rejected": -4.432618141174316, "step": 353 }, { "epoch": 0.17, "learning_rate": 9.998373122427552e-07, "loss": 0.0184, "step": 354 }, { "epoch": 0.17, "logps_train/chosen": -59.716339111328125, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -167.06320190429688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5042939186096191, "rewards_train/margins": 4.64733362197876, "rewards_train/rejected": -4.143039703369141, "step": 354 }, { "epoch": 0.17, "learning_rate": 9.99830316107533e-07, "loss": 0.022, "step": 355 }, { "epoch": 0.17, "logps_train/chosen": -58.44542694091797, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -166.66839599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43748849630355835, "rewards_train/margins": 4.632111489772797, "rewards_train/rejected": -4.194622993469238, "step": 355 }, { "epoch": 0.18, "learning_rate": 9.998231727189268e-07, "loss": 0.017, "step": 356 }, { "epoch": 0.18, "logps_train/chosen": -62.832523345947266, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -169.13735961914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.477782666683197, "rewards_train/margins": 4.756752908229828, "rewards_train/rejected": -4.278970241546631, "step": 356 }, { "epoch": 0.18, "learning_rate": 9.99815882079041e-07, "loss": 0.0204, "step": 357 }, { "epoch": 0.18, "logps_train/chosen": -58.812591552734375, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -169.9441680908203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4124905467033386, "rewards_train/margins": 4.569993197917938, "rewards_train/rejected": -4.1575026512146, "step": 357 }, { "epoch": 0.18, "learning_rate": 9.99808444190024e-07, "loss": 0.022, "step": 358 }, { "epoch": 0.18, "logps_train/chosen": -57.64533996582031, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -167.10025024414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5109542608261108, "rewards_train/margins": 4.745442271232605, "rewards_train/rejected": -4.234488010406494, "step": 358 }, { "epoch": 0.18, "learning_rate": 9.998008590540667e-07, "loss": 0.0163, "step": 359 }, { "epoch": 0.18, "logps_train/chosen": -59.800682067871094, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -170.4676513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5541354417800903, "rewards_train/margins": 4.881125569343567, "rewards_train/rejected": -4.326990127563477, "step": 359 }, { "epoch": 0.18, "learning_rate": 9.997931266734037e-07, "loss": 0.0193, "step": 360 }, { "epoch": 0.18, "logps_train/chosen": -60.556949615478516, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -174.03851318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5210140347480774, "rewards_train/margins": 4.938390552997589, "rewards_train/rejected": -4.417376518249512, "step": 360 }, { "epoch": 0.18, "learning_rate": 9.997852470503132e-07, "loss": 0.0192, "step": 361 }, { "epoch": 0.18, "logps_train/chosen": -57.85865020751953, "logps_train/ref_chosen": -61.53125, "logps_train/ref_rejected": -119.375, "logps_train/rejected": -159.8441619873047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3670157790184021, "rewards_train/margins": 4.416030824184418, "rewards_train/rejected": -4.049015045166016, "step": 361 }, { "epoch": 0.18, "learning_rate": 9.997772201871165e-07, "loss": 0.0336, "step": 362 }, { "epoch": 0.18, "logps_train/chosen": -61.59839630126953, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -174.7193603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.438304603099823, "rewards_train/margins": 5.02068966627121, "rewards_train/rejected": -4.582385063171387, "step": 362 }, { "epoch": 0.18, "learning_rate": 9.99769046086178e-07, "loss": 0.0147, "step": 363 }, { "epoch": 0.18, "logps_train/chosen": -57.92763137817383, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -167.49009704589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5080180764198303, "rewards_train/margins": 4.778560698032379, "rewards_train/rejected": -4.270542621612549, "step": 363 }, { "epoch": 0.18, "learning_rate": 9.997607247499066e-07, "loss": 0.0234, "step": 364 }, { "epoch": 0.18, "logps_train/chosen": -58.50259017944336, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -166.712646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5115087032318115, "rewards_train/margins": 4.908945322036743, "rewards_train/rejected": -4.397436618804932, "step": 364 }, { "epoch": 0.18, "learning_rate": 9.997522561807534e-07, "loss": 0.0161, "step": 365 }, { "epoch": 0.18, "logps_train/chosen": -58.963687896728516, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -162.35894775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5126156806945801, "rewards_train/margins": 4.735766887664795, "rewards_train/rejected": -4.223151206970215, "step": 365 }, { "epoch": 0.18, "learning_rate": 9.997436403812132e-07, "loss": 0.0298, "step": 366 }, { "epoch": 0.18, "logps_train/chosen": -59.64659881591797, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -172.1352081298828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4999399781227112, "rewards_train/margins": 5.164145052433014, "rewards_train/rejected": -4.664205074310303, "step": 366 }, { "epoch": 0.18, "learning_rate": 9.997348773538244e-07, "loss": 0.0115, "step": 367 }, { "epoch": 0.18, "logps_train/chosen": -57.86760330200195, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -170.6768341064453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5592843890190125, "rewards_train/margins": 5.065346419811249, "rewards_train/rejected": -4.506062030792236, "step": 367 }, { "epoch": 0.18, "learning_rate": 9.997259671011686e-07, "loss": 0.0121, "step": 368 }, { "epoch": 0.18, "logps_train/chosen": -60.3140983581543, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -172.189697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4355335235595703, "rewards_train/margins": 4.90255069732666, "rewards_train/rejected": -4.46701717376709, "step": 368 }, { "epoch": 0.18, "learning_rate": 9.99716909625871e-07, "loss": 0.0232, "step": 369 }, { "epoch": 0.18, "logps_train/chosen": -57.792259216308594, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -168.5296630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48268836736679077, "rewards_train/margins": 4.979355752468109, "rewards_train/rejected": -4.496667385101318, "step": 369 }, { "epoch": 0.18, "learning_rate": 9.997077049305997e-07, "loss": 0.0176, "step": 370 }, { "epoch": 0.18, "logps_train/chosen": -61.53008270263672, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -173.79351806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5247259736061096, "rewards_train/margins": 5.070631563663483, "rewards_train/rejected": -4.545905590057373, "step": 370 }, { "epoch": 0.18, "learning_rate": 9.996983530180668e-07, "loss": 0.0135, "step": 371 }, { "epoch": 0.18, "logps_train/chosen": -58.94226837158203, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -167.07420349121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5411738753318787, "rewards_train/margins": 4.836631715297699, "rewards_train/rejected": -4.29545783996582, "step": 371 }, { "epoch": 0.18, "learning_rate": 9.99688853891027e-07, "loss": 0.0247, "step": 372 }, { "epoch": 0.18, "logps_train/chosen": -61.09361267089844, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -167.65435791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3756482005119324, "rewards_train/margins": 4.720672786235809, "rewards_train/rejected": -4.345024585723877, "step": 372 }, { "epoch": 0.18, "learning_rate": 9.996792075522792e-07, "loss": 0.0207, "step": 373 }, { "epoch": 0.18, "logps_train/chosen": -59.4468879699707, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -169.01788330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6008189916610718, "rewards_train/margins": 5.148311972618103, "rewards_train/rejected": -4.547492980957031, "step": 373 }, { "epoch": 0.18, "learning_rate": 9.996694140046652e-07, "loss": 0.0107, "step": 374 }, { "epoch": 0.18, "logps_train/chosen": -58.200355529785156, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -168.37091064453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39134150743484497, "rewards_train/margins": 4.826479256153107, "rewards_train/rejected": -4.435137748718262, "step": 374 }, { "epoch": 0.18, "learning_rate": 9.9965947325107e-07, "loss": 0.0205, "step": 375 }, { "epoch": 0.18, "logps_train/chosen": -59.558387756347656, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -170.75558471679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5554407238960266, "rewards_train/margins": 4.941057503223419, "rewards_train/rejected": -4.385616779327393, "step": 375 }, { "epoch": 0.19, "learning_rate": 9.996493852944225e-07, "loss": 0.0223, "step": 376 }, { "epoch": 0.19, "logps_train/chosen": -58.821075439453125, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -170.06353759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.499093234539032, "rewards_train/margins": 5.025465190410614, "rewards_train/rejected": -4.526371955871582, "step": 376 }, { "epoch": 0.19, "learning_rate": 9.996391501376948e-07, "loss": 0.0142, "step": 377 }, { "epoch": 0.19, "logps_train/chosen": -58.16897201538086, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -170.08631896972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4356905519962311, "rewards_train/margins": 4.8659049570560455, "rewards_train/rejected": -4.4302144050598145, "step": 377 }, { "epoch": 0.19, "learning_rate": 9.996287677839017e-07, "loss": 0.0188, "step": 378 }, { "epoch": 0.19, "logps_train/chosen": -57.95729446411133, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -162.52752685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37077462673187256, "rewards_train/margins": 4.698038935661316, "rewards_train/rejected": -4.327264308929443, "step": 378 }, { "epoch": 0.19, "learning_rate": 9.996182382361024e-07, "loss": 0.0225, "step": 379 }, { "epoch": 0.19, "logps_train/chosen": -58.68206787109375, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -170.20115661621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49331632256507874, "rewards_train/margins": 5.001615673303604, "rewards_train/rejected": -4.508299350738525, "step": 379 }, { "epoch": 0.19, "learning_rate": 9.996075614973989e-07, "loss": 0.0166, "step": 380 }, { "epoch": 0.19, "logps_train/chosen": -59.44078826904297, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -169.880615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49366527795791626, "rewards_train/margins": 5.1311898827552795, "rewards_train/rejected": -4.637524604797363, "step": 380 }, { "epoch": 0.19, "learning_rate": 9.995967375709363e-07, "loss": 0.016, "step": 381 }, { "epoch": 0.19, "logps_train/chosen": -60.499183654785156, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -174.60667419433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42283567786216736, "rewards_train/margins": 5.096198409795761, "rewards_train/rejected": -4.673362731933594, "step": 381 }, { "epoch": 0.19, "learning_rate": 9.995857664599037e-07, "loss": 0.0163, "step": 382 }, { "epoch": 0.19, "logps_train/chosen": -57.37626647949219, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -173.39385986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5495806932449341, "rewards_train/margins": 5.375246167182922, "rewards_train/rejected": -4.825665473937988, "step": 382 }, { "epoch": 0.19, "learning_rate": 9.995746481675332e-07, "loss": 0.0118, "step": 383 }, { "epoch": 0.19, "logps_train/chosen": -60.61353302001953, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -172.4790496826172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43537527322769165, "rewards_train/margins": 5.10046774148941, "rewards_train/rejected": -4.665092468261719, "step": 383 }, { "epoch": 0.19, "learning_rate": 9.995633826971006e-07, "loss": 0.011, "step": 384 }, { "epoch": 0.19, "logps_train/chosen": -63.018985748291016, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -179.08758544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5537651181221008, "rewards_train/margins": 5.346020042896271, "rewards_train/rejected": -4.79225492477417, "step": 384 }, { "epoch": 0.19, "learning_rate": 9.995519700519244e-07, "loss": 0.0174, "step": 385 }, { "epoch": 0.19, "logps_train/chosen": -61.20558166503906, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -176.27964782714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5400375127792358, "rewards_train/margins": 5.236506819725037, "rewards_train/rejected": -4.696469306945801, "step": 385 }, { "epoch": 0.19, "learning_rate": 9.99540410235367e-07, "loss": 0.0135, "step": 386 }, { "epoch": 0.19, "logps_train/chosen": -59.13234329223633, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -171.5950469970703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5959944725036621, "rewards_train/margins": 5.328985691070557, "rewards_train/rejected": -4.7329912185668945, "step": 386 }, { "epoch": 0.19, "learning_rate": 9.995287032508337e-07, "loss": 0.0121, "step": 387 }, { "epoch": 0.19, "logps_train/chosen": -58.87471008300781, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -174.40542602539062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49670857191085815, "rewards_train/margins": 5.226020038127899, "rewards_train/rejected": -4.729311466217041, "step": 387 }, { "epoch": 0.19, "learning_rate": 9.995168491017741e-07, "loss": 0.015, "step": 388 }, { "epoch": 0.19, "logps_train/chosen": -57.63078308105469, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -168.47584533691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42510560154914856, "rewards_train/margins": 5.100278407335281, "rewards_train/rejected": -4.675172805786133, "step": 388 }, { "epoch": 0.19, "learning_rate": 9.995048477916798e-07, "loss": 0.0176, "step": 389 }, { "epoch": 0.19, "logps_train/chosen": -60.06989288330078, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -178.45132446289062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6473076939582825, "rewards_train/margins": 5.558602154254913, "rewards_train/rejected": -4.911294460296631, "step": 389 }, { "epoch": 0.19, "learning_rate": 9.994926993240869e-07, "loss": 0.0132, "step": 390 }, { "epoch": 0.19, "logps_train/chosen": -57.836944580078125, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -172.18609619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5478488206863403, "rewards_train/margins": 5.383060574531555, "rewards_train/rejected": -4.835211753845215, "step": 390 }, { "epoch": 0.19, "learning_rate": 9.994804037025743e-07, "loss": 0.0098, "step": 391 }, { "epoch": 0.19, "logps_train/chosen": -60.20853042602539, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -179.18276977539062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5162563323974609, "rewards_train/margins": 5.565783977508545, "rewards_train/rejected": -5.049527645111084, "step": 391 }, { "epoch": 0.19, "learning_rate": 9.994679609307646e-07, "loss": 0.0099, "step": 392 }, { "epoch": 0.19, "logps_train/chosen": -58.23832321166992, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -169.20742797851562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37421470880508423, "rewards_train/margins": 5.0372434258461, "rewards_train/rejected": -4.663028717041016, "step": 392 }, { "epoch": 0.19, "learning_rate": 9.994553710123232e-07, "loss": 0.0174, "step": 393 }, { "epoch": 0.19, "logps_train/chosen": -60.01699447631836, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -171.9663543701172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4950779378414154, "rewards_train/margins": 5.151478618383408, "rewards_train/rejected": -4.656400680541992, "step": 393 }, { "epoch": 0.19, "learning_rate": 9.99442633950959e-07, "loss": 0.0186, "step": 394 }, { "epoch": 0.19, "logps_train/chosen": -58.087989807128906, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -173.16387939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.397987961769104, "rewards_train/margins": 5.339130997657776, "rewards_train/rejected": -4.941143035888672, "step": 394 }, { "epoch": 0.19, "learning_rate": 9.994297497504252e-07, "loss": 0.0163, "step": 395 }, { "epoch": 0.19, "logps_train/chosen": -61.76984405517578, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -180.89935302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4749443531036377, "rewards_train/margins": 5.670642137527466, "rewards_train/rejected": -5.195697784423828, "step": 395 }, { "epoch": 0.19, "learning_rate": 9.994167184145167e-07, "loss": 0.0097, "step": 396 }, { "epoch": 0.19, "logps_train/chosen": -60.57452392578125, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -174.8724365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.31666886806488037, "rewards_train/margins": 5.227104544639587, "rewards_train/rejected": -4.910435676574707, "step": 396 }, { "epoch": 0.2, "learning_rate": 9.994035399470732e-07, "loss": 0.016, "step": 397 }, { "epoch": 0.2, "logps_train/chosen": -58.8746223449707, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -177.7852783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4201548397541046, "rewards_train/margins": 5.491113930940628, "rewards_train/rejected": -5.070959091186523, "step": 397 }, { "epoch": 0.2, "learning_rate": 9.993902143519771e-07, "loss": 0.0115, "step": 398 }, { "epoch": 0.2, "logps_train/chosen": -59.93946075439453, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -179.0106658935547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4030753970146179, "rewards_train/margins": 5.523966014385223, "rewards_train/rejected": -5.1208906173706055, "step": 398 }, { "epoch": 0.2, "learning_rate": 9.99376741633154e-07, "loss": 0.0122, "step": 399 }, { "epoch": 0.2, "logps_train/chosen": -59.35581970214844, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -174.999755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5391005277633667, "rewards_train/margins": 5.580238223075867, "rewards_train/rejected": -5.0411376953125, "step": 399 }, { "epoch": 0.2, "learning_rate": 9.99363121794573e-07, "loss": 0.0098, "step": 400 }, { "epoch": 0.2, "logps_train/chosen": -65.0812759399414, "logps_train/ref_chosen": -70.25, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -182.46060180664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5183860063552856, "rewards_train/margins": 5.65604841709137, "rewards_train/rejected": -5.137662410736084, "step": 400 }, { "epoch": 0.2, "learning_rate": 9.99349354840247e-07, "loss": 0.0104, "step": 401 }, { "epoch": 0.2, "logps_train/chosen": -59.44144821166992, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -174.85482788085938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46435123682022095, "rewards_train/margins": 5.493877947330475, "rewards_train/rejected": -5.029526710510254, "step": 401 }, { "epoch": 0.2, "learning_rate": 9.993354407742312e-07, "loss": 0.0077, "step": 402 }, { "epoch": 0.2, "logps_train/chosen": -60.80363082885742, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -179.33865356445312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.40064284205436707, "rewards_train/margins": 5.54930254817009, "rewards_train/rejected": -5.148659706115723, "step": 402 }, { "epoch": 0.2, "learning_rate": 9.993213796006254e-07, "loss": 0.0125, "step": 403 }, { "epoch": 0.2, "logps_train/chosen": -56.189552307128906, "logps_train/ref_chosen": -60.125, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -171.07308959960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39256787300109863, "rewards_train/margins": 5.200169801712036, "rewards_train/rejected": -4.8076019287109375, "step": 403 }, { "epoch": 0.2, "learning_rate": 9.993071713235719e-07, "loss": 0.0167, "step": 404 }, { "epoch": 0.2, "logps_train/chosen": -57.449195861816406, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -172.26795959472656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49048101902008057, "rewards_train/margins": 5.383732438087463, "rewards_train/rejected": -4.893251419067383, "step": 404 }, { "epoch": 0.2, "learning_rate": 9.992928159472564e-07, "loss": 0.0163, "step": 405 }, { "epoch": 0.2, "logps_train/chosen": -60.365150451660156, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -174.6410675048828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6057702302932739, "rewards_train/margins": 5.494681477546692, "rewards_train/rejected": -4.888911247253418, "step": 405 }, { "epoch": 0.2, "learning_rate": 9.992783134759081e-07, "loss": 0.0128, "step": 406 }, { "epoch": 0.2, "logps_train/chosen": -57.141075134277344, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -171.2978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5147011280059814, "rewards_train/margins": 5.447025537490845, "rewards_train/rejected": -4.932324409484863, "step": 406 }, { "epoch": 0.2, "learning_rate": 9.992636639137998e-07, "loss": 0.014, "step": 407 }, { "epoch": 0.2, "logps_train/chosen": -59.909637451171875, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -169.532470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5092313885688782, "rewards_train/margins": 5.2414830327034, "rewards_train/rejected": -4.7322516441345215, "step": 407 }, { "epoch": 0.2, "learning_rate": 9.992488672652473e-07, "loss": 0.0112, "step": 408 }, { "epoch": 0.2, "logps_train/chosen": -59.398033142089844, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -177.89291381835938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4799719452857971, "rewards_train/margins": 5.858912527561188, "rewards_train/rejected": -5.378940582275391, "step": 408 }, { "epoch": 0.2, "learning_rate": 9.992339235346095e-07, "loss": 0.0076, "step": 409 }, { "epoch": 0.2, "logps_train/chosen": -57.00935745239258, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -169.90887451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5542399287223816, "rewards_train/margins": 5.360265910625458, "rewards_train/rejected": -4.806025981903076, "step": 409 }, { "epoch": 0.2, "learning_rate": 9.992188327262893e-07, "loss": 0.02, "step": 410 }, { "epoch": 0.2, "logps_train/chosen": -60.84886932373047, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -179.25535583496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4944591224193573, "rewards_train/margins": 5.650708168745041, "rewards_train/rejected": -5.156249046325684, "step": 410 }, { "epoch": 0.2, "learning_rate": 9.992035948447322e-07, "loss": 0.0094, "step": 411 }, { "epoch": 0.2, "logps_train/chosen": -60.561859130859375, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -179.79623413085938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5647125244140625, "rewards_train/margins": 5.738964080810547, "rewards_train/rejected": -5.174251556396484, "step": 411 }, { "epoch": 0.2, "learning_rate": 9.991882098944276e-07, "loss": 0.0098, "step": 412 }, { "epoch": 0.2, "logps_train/chosen": -59.173866271972656, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -177.84803771972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4822227954864502, "rewards_train/margins": 5.59808087348938, "rewards_train/rejected": -5.11585807800293, "step": 412 }, { "epoch": 0.2, "learning_rate": 9.99172677879908e-07, "loss": 0.0084, "step": 413 }, { "epoch": 0.2, "logps_train/chosen": -59.58989715576172, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -176.84771728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4888128936290741, "rewards_train/margins": 5.700830906629562, "rewards_train/rejected": -5.212018013000488, "step": 413 }, { "epoch": 0.2, "learning_rate": 9.991569988057492e-07, "loss": 0.0079, "step": 414 }, { "epoch": 0.2, "logps_train/chosen": -63.505638122558594, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -176.53573608398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.32018837332725525, "rewards_train/margins": 5.636066287755966, "rewards_train/rejected": -5.315877914428711, "step": 414 }, { "epoch": 0.2, "learning_rate": 9.991411726765705e-07, "loss": 0.0147, "step": 415 }, { "epoch": 0.2, "logps_train/chosen": -59.625091552734375, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -174.2752685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38939523696899414, "rewards_train/margins": 5.408230304718018, "rewards_train/rejected": -5.018835067749023, "step": 415 }, { "epoch": 0.2, "learning_rate": 9.99125199497034e-07, "loss": 0.0174, "step": 416 }, { "epoch": 0.2, "logps_train/chosen": -61.724613189697266, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -183.74853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5159419178962708, "rewards_train/margins": 5.799973785877228, "rewards_train/rejected": -5.284031867980957, "step": 416 }, { "epoch": 0.21, "learning_rate": 9.991090792718458e-07, "loss": 0.0107, "step": 417 }, { "epoch": 0.21, "logps_train/chosen": -58.952125549316406, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -175.31625366210938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5594016313552856, "rewards_train/margins": 5.673057675361633, "rewards_train/rejected": -5.113656044006348, "step": 417 }, { "epoch": 0.21, "learning_rate": 9.990928120057548e-07, "loss": 0.0084, "step": 418 }, { "epoch": 0.21, "logps_train/chosen": -57.32978820800781, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -171.77232360839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5280561447143555, "rewards_train/margins": 5.508803844451904, "rewards_train/rejected": -4.980747699737549, "step": 418 }, { "epoch": 0.21, "learning_rate": 9.99076397703554e-07, "loss": 0.0139, "step": 419 }, { "epoch": 0.21, "logps_train/chosen": -59.929237365722656, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -176.93472290039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3478965163230896, "rewards_train/margins": 5.597423017024994, "rewards_train/rejected": -5.249526500701904, "step": 419 }, { "epoch": 0.21, "learning_rate": 9.990598363700785e-07, "loss": 0.0185, "step": 420 }, { "epoch": 0.21, "logps_train/chosen": -59.1562385559082, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -183.97120666503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5675791501998901, "rewards_train/margins": 5.947805285453796, "rewards_train/rejected": -5.380226135253906, "step": 420 }, { "epoch": 0.21, "learning_rate": 9.990431280102079e-07, "loss": 0.0115, "step": 421 }, { "epoch": 0.21, "logps_train/chosen": -57.54547119140625, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -172.92376708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.371429443359375, "rewards_train/margins": 5.499791622161865, "rewards_train/rejected": -5.12836217880249, "step": 421 }, { "epoch": 0.21, "learning_rate": 9.990262726288643e-07, "loss": 0.0119, "step": 422 }, { "epoch": 0.21, "logps_train/chosen": -60.82299041748047, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -181.9785919189453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5771247744560242, "rewards_train/margins": 5.590950191020966, "rewards_train/rejected": -5.013825416564941, "step": 422 }, { "epoch": 0.21, "learning_rate": 9.990092702310133e-07, "loss": 0.0135, "step": 423 }, { "epoch": 0.21, "logps_train/chosen": -57.05014419555664, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -174.051025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6157374382019043, "rewards_train/margins": 5.768250942230225, "rewards_train/rejected": -5.15251350402832, "step": 423 }, { "epoch": 0.21, "learning_rate": 9.989921208216642e-07, "loss": 0.0074, "step": 424 }, { "epoch": 0.21, "logps_train/chosen": -61.37645721435547, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -181.05215454101562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33642640709877014, "rewards_train/margins": 5.517569690942764, "rewards_train/rejected": -5.181143283843994, "step": 424 }, { "epoch": 0.21, "learning_rate": 9.989748244058694e-07, "loss": 0.0115, "step": 425 }, { "epoch": 0.21, "logps_train/chosen": -59.51203155517578, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -182.77175903320312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5988215208053589, "rewards_train/margins": 5.7317599058151245, "rewards_train/rejected": -5.132938385009766, "step": 425 }, { "epoch": 0.21, "learning_rate": 9.989573809887243e-07, "loss": 0.0082, "step": 426 }, { "epoch": 0.21, "logps_train/chosen": -59.03778839111328, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -175.36148071289062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6319880485534668, "rewards_train/margins": 5.8051958084106445, "rewards_train/rejected": -5.173207759857178, "step": 426 }, { "epoch": 0.21, "learning_rate": 9.989397905753677e-07, "loss": 0.015, "step": 427 }, { "epoch": 0.21, "logps_train/chosen": -59.03270721435547, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -177.02423095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4873540997505188, "rewards_train/margins": 5.762482464313507, "rewards_train/rejected": -5.275128364562988, "step": 427 }, { "epoch": 0.21, "learning_rate": 9.98922053170982e-07, "loss": 0.0087, "step": 428 }, { "epoch": 0.21, "logps_train/chosen": -62.644996643066406, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -180.77674865722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3926777243614197, "rewards_train/margins": 5.639932096004486, "rewards_train/rejected": -5.247254371643066, "step": 428 }, { "epoch": 0.21, "learning_rate": 9.989041687807932e-07, "loss": 0.017, "step": 429 }, { "epoch": 0.21, "logps_train/chosen": -59.62749481201172, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -178.402587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5300729274749756, "rewards_train/margins": 5.668184995651245, "rewards_train/rejected": -5.1381120681762695, "step": 429 }, { "epoch": 0.21, "learning_rate": 9.988861374100697e-07, "loss": 0.0161, "step": 430 }, { "epoch": 0.21, "logps_train/chosen": -58.4517822265625, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -176.96221923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.47694092988967896, "rewards_train/margins": 5.68205052614212, "rewards_train/rejected": -5.205109596252441, "step": 430 }, { "epoch": 0.21, "learning_rate": 9.988679590641236e-07, "loss": 0.0111, "step": 431 }, { "epoch": 0.21, "logps_train/chosen": -60.203392028808594, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -181.70693969726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48283499479293823, "rewards_train/margins": 5.8401495814323425, "rewards_train/rejected": -5.357314586639404, "step": 431 }, { "epoch": 0.21, "learning_rate": 9.988496337483106e-07, "loss": 0.0124, "step": 432 }, { "epoch": 0.21, "logps_train/chosen": -56.86886215209961, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -177.67906188964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6532992720603943, "rewards_train/margins": 5.784193456172943, "rewards_train/rejected": -5.130894184112549, "step": 432 }, { "epoch": 0.21, "learning_rate": 9.988311614680295e-07, "loss": 0.0114, "step": 433 }, { "epoch": 0.21, "logps_train/chosen": -56.64514923095703, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -170.67752075195312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5971060395240784, "rewards_train/margins": 5.4942527413368225, "rewards_train/rejected": -4.897146701812744, "step": 433 }, { "epoch": 0.21, "learning_rate": 9.988125422287218e-07, "loss": 0.0124, "step": 434 }, { "epoch": 0.21, "logps_train/chosen": -57.300418853759766, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -180.44024658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6326533555984497, "rewards_train/margins": 6.018573880195618, "rewards_train/rejected": -5.385920524597168, "step": 434 }, { "epoch": 0.21, "learning_rate": 9.987937760358737e-07, "loss": 0.0063, "step": 435 }, { "epoch": 0.21, "logps_train/chosen": -59.53476333618164, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -185.17726135253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5686182379722595, "rewards_train/margins": 6.170718133449554, "rewards_train/rejected": -5.602099895477295, "step": 435 }, { "epoch": 0.21, "learning_rate": 9.987748628950133e-07, "loss": 0.0081, "step": 436 }, { "epoch": 0.21, "logps_train/chosen": -57.20252227783203, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -180.1641082763672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4118274450302124, "rewards_train/margins": 5.837467551231384, "rewards_train/rejected": -5.425640106201172, "step": 436 }, { "epoch": 0.22, "learning_rate": 9.987558028117128e-07, "loss": 0.009, "step": 437 }, { "epoch": 0.22, "logps_train/chosen": -57.79777908325195, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -179.85610961914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5226146578788757, "rewards_train/margins": 5.849339663982391, "rewards_train/rejected": -5.326725006103516, "step": 437 }, { "epoch": 0.22, "learning_rate": 9.987365957915872e-07, "loss": 0.0087, "step": 438 }, { "epoch": 0.22, "logps_train/chosen": -59.10101318359375, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -178.34625244140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48071929812431335, "rewards_train/margins": 5.791420191526413, "rewards_train/rejected": -5.3107008934021, "step": 438 }, { "epoch": 0.22, "learning_rate": 9.987172418402953e-07, "loss": 0.0088, "step": 439 }, { "epoch": 0.22, "logps_train/chosen": -61.835304260253906, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -186.26327514648438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41969215869903564, "rewards_train/margins": 5.932591557502747, "rewards_train/rejected": -5.512899398803711, "step": 439 }, { "epoch": 0.22, "learning_rate": 9.986977409635384e-07, "loss": 0.0097, "step": 440 }, { "epoch": 0.22, "logps_train/chosen": -60.78521728515625, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -187.79737854003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5520442724227905, "rewards_train/margins": 6.339691519737244, "rewards_train/rejected": -5.787647247314453, "step": 440 }, { "epoch": 0.22, "learning_rate": 9.986780931670621e-07, "loss": 0.005, "step": 441 }, { "epoch": 0.22, "logps_train/chosen": -59.19746398925781, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -178.11618041992188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4743698537349701, "rewards_train/margins": 5.696143478155136, "rewards_train/rejected": -5.221773624420166, "step": 441 }, { "epoch": 0.22, "learning_rate": 9.986582984566545e-07, "loss": 0.0098, "step": 442 }, { "epoch": 0.22, "logps_train/chosen": -60.269508361816406, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -177.06605529785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3529807925224304, "rewards_train/margins": 5.75460547208786, "rewards_train/rejected": -5.40162467956543, "step": 442 }, { "epoch": 0.22, "learning_rate": 9.986383568381476e-07, "loss": 0.0121, "step": 443 }, { "epoch": 0.22, "logps_train/chosen": -60.063682556152344, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -176.97303771972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6090613007545471, "rewards_train/margins": 5.8866382241249084, "rewards_train/rejected": -5.277576923370361, "step": 443 }, { "epoch": 0.22, "learning_rate": 9.98618268317416e-07, "loss": 0.0077, "step": 444 }, { "epoch": 0.22, "logps_train/chosen": -58.287803649902344, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -179.3868865966797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5818154811859131, "rewards_train/margins": 6.049606084823608, "rewards_train/rejected": -5.467790603637695, "step": 444 }, { "epoch": 0.22, "learning_rate": 9.985980329003777e-07, "loss": 0.0076, "step": 445 }, { "epoch": 0.22, "logps_train/chosen": -58.4831428527832, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -180.56857299804688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4577893614768982, "rewards_train/margins": 5.973534047603607, "rewards_train/rejected": -5.515744686126709, "step": 445 }, { "epoch": 0.22, "learning_rate": 9.985776505929947e-07, "loss": 0.0092, "step": 446 }, { "epoch": 0.22, "logps_train/chosen": -58.59177780151367, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -174.25123596191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.40078315138816833, "rewards_train/margins": 5.869607537984848, "rewards_train/rejected": -5.46882438659668, "step": 446 }, { "epoch": 0.22, "learning_rate": 9.985571214012715e-07, "loss": 0.012, "step": 447 }, { "epoch": 0.22, "logps_train/chosen": -60.64326095581055, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -176.1422576904297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5660449266433716, "rewards_train/margins": 5.873091816902161, "rewards_train/rejected": -5.307046890258789, "step": 447 }, { "epoch": 0.22, "learning_rate": 9.985364453312564e-07, "loss": 0.0124, "step": 448 }, { "epoch": 0.22, "logps_train/chosen": -59.050445556640625, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -178.09197998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5244477987289429, "rewards_train/margins": 5.862650752067566, "rewards_train/rejected": -5.338202953338623, "step": 448 }, { "epoch": 0.22, "learning_rate": 9.985156223890403e-07, "loss": 0.0108, "step": 449 }, { "epoch": 0.22, "logps_train/chosen": -60.18879318237305, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -181.70880126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5630055665969849, "rewards_train/margins": 6.032617688179016, "rewards_train/rejected": -5.469612121582031, "step": 449 }, { "epoch": 0.22, "learning_rate": 9.984946525807581e-07, "loss": 0.0053, "step": 450 }, { "epoch": 0.22, "logps_train/chosen": -62.35398864746094, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -181.42518615722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4312761127948761, "rewards_train/margins": 5.830826550722122, "rewards_train/rejected": -5.399550437927246, "step": 450 }, { "epoch": 0.22, "learning_rate": 9.984735359125872e-07, "loss": 0.0096, "step": 451 }, { "epoch": 0.22, "logps_train/chosen": -57.67344665527344, "logps_train/ref_chosen": -61.21875, "logps_train/ref_rejected": -118.8125, "logps_train/rejected": -170.81671142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35550713539123535, "rewards_train/margins": 5.55573296546936, "rewards_train/rejected": -5.200225830078125, "step": 451 }, { "epoch": 0.22, "learning_rate": 9.984522723907494e-07, "loss": 0.0146, "step": 452 }, { "epoch": 0.22, "logps_train/chosen": -60.44126892089844, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -179.544677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4654434621334076, "rewards_train/margins": 5.951845496892929, "rewards_train/rejected": -5.4864020347595215, "step": 452 }, { "epoch": 0.22, "learning_rate": 9.984308620215087e-07, "loss": 0.0125, "step": 453 }, { "epoch": 0.22, "logps_train/chosen": -57.43424987792969, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -174.7727508544922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5322587490081787, "rewards_train/margins": 6.029553174972534, "rewards_train/rejected": -5.4972944259643555, "step": 453 }, { "epoch": 0.22, "learning_rate": 9.984093048111727e-07, "loss": 0.0093, "step": 454 }, { "epoch": 0.22, "logps_train/chosen": -59.370933532714844, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -179.57742309570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.7030920386314392, "rewards_train/margins": 6.097503125667572, "rewards_train/rejected": -5.394411087036133, "step": 454 }, { "epoch": 0.22, "learning_rate": 9.983876007660921e-07, "loss": 0.0105, "step": 455 }, { "epoch": 0.22, "logps_train/chosen": -58.7022819519043, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -182.95068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5433948636054993, "rewards_train/margins": 6.1352885365486145, "rewards_train/rejected": -5.591893672943115, "step": 455 }, { "epoch": 0.22, "learning_rate": 9.983657498926616e-07, "loss": 0.0074, "step": 456 }, { "epoch": 0.22, "logps_train/chosen": -60.659950256347656, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -180.52255249023438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4402063190937042, "rewards_train/margins": 5.977959424257278, "rewards_train/rejected": -5.537753105163574, "step": 456 }, { "epoch": 0.23, "learning_rate": 9.983437521973182e-07, "loss": 0.0068, "step": 457 }, { "epoch": 0.23, "logps_train/chosen": -58.61870574951172, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -179.40328979492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5731387138366699, "rewards_train/margins": 5.908486843109131, "rewards_train/rejected": -5.335348129272461, "step": 457 }, { "epoch": 0.23, "learning_rate": 9.983216076865428e-07, "loss": 0.0096, "step": 458 }, { "epoch": 0.23, "logps_train/chosen": -57.28035354614258, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -177.38970947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4935954511165619, "rewards_train/margins": 5.865330070257187, "rewards_train/rejected": -5.371734619140625, "step": 458 }, { "epoch": 0.23, "learning_rate": 9.982993163668592e-07, "loss": 0.0112, "step": 459 }, { "epoch": 0.23, "logps_train/chosen": -61.722068786621094, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -184.10165405273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3575778603553772, "rewards_train/margins": 6.1273141503334045, "rewards_train/rejected": -5.769736289978027, "step": 459 }, { "epoch": 0.23, "learning_rate": 9.982768782448345e-07, "loss": 0.0064, "step": 460 }, { "epoch": 0.23, "logps_train/chosen": -59.315853118896484, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -183.46923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5100650191307068, "rewards_train/margins": 6.22471421957016, "rewards_train/rejected": -5.714649200439453, "step": 460 }, { "epoch": 0.23, "learning_rate": 9.982542933270794e-07, "loss": 0.0083, "step": 461 }, { "epoch": 0.23, "logps_train/chosen": -59.001007080078125, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -176.1083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5646453499794006, "rewards_train/margins": 5.913669645786285, "rewards_train/rejected": -5.349024295806885, "step": 461 }, { "epoch": 0.23, "learning_rate": 9.982315616202471e-07, "loss": 0.0095, "step": 462 }, { "epoch": 0.23, "logps_train/chosen": -57.49510955810547, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -181.21580505371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.47558677196502686, "rewards_train/margins": 6.283984541893005, "rewards_train/rejected": -5.8083977699279785, "step": 462 }, { "epoch": 0.23, "learning_rate": 9.98208683131035e-07, "loss": 0.0106, "step": 463 }, { "epoch": 0.23, "logps_train/chosen": -61.35098648071289, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -187.0602264404297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.40328046679496765, "rewards_train/margins": 6.18303319811821, "rewards_train/rejected": -5.779752731323242, "step": 463 }, { "epoch": 0.23, "learning_rate": 9.98185657866183e-07, "loss": 0.0077, "step": 464 }, { "epoch": 0.23, "logps_train/chosen": -62.0744514465332, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -180.3380889892578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37639278173446655, "rewards_train/margins": 5.938814580440521, "rewards_train/rejected": -5.562421798706055, "step": 464 }, { "epoch": 0.23, "learning_rate": 9.981624858324746e-07, "loss": 0.0162, "step": 465 }, { "epoch": 0.23, "logps_train/chosen": -59.08588790893555, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -184.74761962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5102591514587402, "rewards_train/margins": 6.336729049682617, "rewards_train/rejected": -5.826469898223877, "step": 465 }, { "epoch": 0.23, "learning_rate": 9.981391670367364e-07, "loss": 0.008, "step": 466 }, { "epoch": 0.23, "logps_train/chosen": -58.093135833740234, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -189.42391967773438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.7197390794754028, "rewards_train/margins": 6.612913250923157, "rewards_train/rejected": -5.893174171447754, "step": 466 }, { "epoch": 0.23, "learning_rate": 9.981157014858383e-07, "loss": 0.0032, "step": 467 }, { "epoch": 0.23, "logps_train/chosen": -59.05282211303711, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -181.1962432861328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5707430243492126, "rewards_train/margins": 6.187828481197357, "rewards_train/rejected": -5.6170854568481445, "step": 467 }, { "epoch": 0.23, "learning_rate": 9.980920891866933e-07, "loss": 0.0088, "step": 468 }, { "epoch": 0.23, "logps_train/chosen": -61.596954345703125, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -177.49757385253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.348751962184906, "rewards_train/margins": 5.924680650234222, "rewards_train/rejected": -5.575928688049316, "step": 468 }, { "epoch": 0.23, "learning_rate": 9.980683301462577e-07, "loss": 0.0112, "step": 469 }, { "epoch": 0.23, "logps_train/chosen": -58.05292510986328, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -188.30886840820312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.7065730094909668, "rewards_train/margins": 6.560213565826416, "rewards_train/rejected": -5.853640556335449, "step": 469 }, { "epoch": 0.23, "learning_rate": 9.980444243715313e-07, "loss": 0.0058, "step": 470 }, { "epoch": 0.23, "logps_train/chosen": -60.623016357421875, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -183.09799194335938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5766140222549438, "rewards_train/margins": 6.294225811958313, "rewards_train/rejected": -5.717611789703369, "step": 470 }, { "epoch": 0.23, "learning_rate": 9.98020371869557e-07, "loss": 0.0085, "step": 471 }, { "epoch": 0.23, "logps_train/chosen": -59.76312255859375, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -182.8634490966797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38731080293655396, "rewards_train/margins": 6.104856312274933, "rewards_train/rejected": -5.717545509338379, "step": 471 }, { "epoch": 0.23, "learning_rate": 9.979961726474204e-07, "loss": 0.0103, "step": 472 }, { "epoch": 0.23, "logps_train/chosen": -59.76484680175781, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -180.44229125976562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.45325183868408203, "rewards_train/margins": 6.014570713043213, "rewards_train/rejected": -5.561318874359131, "step": 472 }, { "epoch": 0.23, "learning_rate": 9.97971826712251e-07, "loss": 0.0068, "step": 473 }, { "epoch": 0.23, "logps_train/chosen": -57.27423858642578, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -181.71063232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44982218742370605, "rewards_train/margins": 6.356140375137329, "rewards_train/rejected": -5.906318187713623, "step": 473 }, { "epoch": 0.23, "learning_rate": 9.979473340712213e-07, "loss": 0.0059, "step": 474 }, { "epoch": 0.23, "logps_train/chosen": -56.727195739746094, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -178.39682006835938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6868510246276855, "rewards_train/margins": 6.109832763671875, "rewards_train/rejected": -5.4229817390441895, "step": 474 }, { "epoch": 0.23, "learning_rate": 9.97922694731547e-07, "loss": 0.0103, "step": 475 }, { "epoch": 0.23, "logps_train/chosen": -60.76487350463867, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -186.7904052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5261982083320618, "rewards_train/margins": 6.470130741596222, "rewards_train/rejected": -5.94393253326416, "step": 475 }, { "epoch": 0.23, "learning_rate": 9.978979087004868e-07, "loss": 0.0072, "step": 476 }, { "epoch": 0.23, "logps_train/chosen": -59.02516555786133, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -189.25381469726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5959696769714355, "rewards_train/margins": 6.4893693923950195, "rewards_train/rejected": -5.893399715423584, "step": 476 }, { "epoch": 0.23, "learning_rate": 9.97872975985343e-07, "loss": 0.0062, "step": 477 }, { "epoch": 0.23, "logps_train/chosen": -59.369171142578125, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -178.6986846923828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4935516119003296, "rewards_train/margins": 6.105558753013611, "rewards_train/rejected": -5.612007141113281, "step": 477 }, { "epoch": 0.24, "learning_rate": 9.97847896593461e-07, "loss": 0.0086, "step": 478 }, { "epoch": 0.24, "logps_train/chosen": -57.956565856933594, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -178.56411743164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49589622020721436, "rewards_train/margins": 6.02745521068573, "rewards_train/rejected": -5.531558990478516, "step": 478 }, { "epoch": 0.24, "learning_rate": 9.978226705322294e-07, "loss": 0.0099, "step": 479 }, { "epoch": 0.24, "logps_train/chosen": -59.14466857910156, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -187.494384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48062610626220703, "rewards_train/margins": 6.213953018188477, "rewards_train/rejected": -5.7333269119262695, "step": 479 }, { "epoch": 0.24, "learning_rate": 9.977972978090798e-07, "loss": 0.0112, "step": 480 }, { "epoch": 0.24, "logps_train/chosen": -56.425880432128906, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -187.91529846191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6190329790115356, "rewards_train/margins": 6.728434681892395, "rewards_train/rejected": -6.109401702880859, "step": 480 }, { "epoch": 0.24, "learning_rate": 9.977717784314874e-07, "loss": 0.0032, "step": 481 }, { "epoch": 0.24, "logps_train/chosen": -55.511680603027344, "logps_train/ref_chosen": -61.0, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -179.6056365966797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5496129989624023, "rewards_train/margins": 6.3647661209106445, "rewards_train/rejected": -5.815153121948242, "step": 481 }, { "epoch": 0.24, "learning_rate": 9.9774611240697e-07, "loss": 0.0105, "step": 482 }, { "epoch": 0.24, "logps_train/chosen": -62.64140701293945, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -196.03305053710938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42072251439094543, "rewards_train/margins": 6.941410571336746, "rewards_train/rejected": -6.520688056945801, "step": 482 }, { "epoch": 0.24, "learning_rate": 9.977202997430893e-07, "loss": 0.0041, "step": 483 }, { "epoch": 0.24, "logps_train/chosen": -57.733341217041016, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -182.89627075195312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4380428194999695, "rewards_train/margins": 6.286117732524872, "rewards_train/rejected": -5.848074913024902, "step": 483 }, { "epoch": 0.24, "learning_rate": 9.976943404474498e-07, "loss": 0.0076, "step": 484 }, { "epoch": 0.24, "logps_train/chosen": -61.03010559082031, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -188.22686767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.36681342124938965, "rewards_train/margins": 6.480321645736694, "rewards_train/rejected": -6.113508224487305, "step": 484 }, { "epoch": 0.24, "learning_rate": 9.976682345276994e-07, "loss": 0.0058, "step": 485 }, { "epoch": 0.24, "logps_train/chosen": -56.23101043701172, "logps_train/ref_chosen": -60.25, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -185.35919189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.40258270502090454, "rewards_train/margins": 6.354027330875397, "rewards_train/rejected": -5.951444625854492, "step": 485 }, { "epoch": 0.24, "learning_rate": 9.976419819915288e-07, "loss": 0.0065, "step": 486 }, { "epoch": 0.24, "logps_train/chosen": -62.230308532714844, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -187.8936767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44606107473373413, "rewards_train/margins": 6.501199185848236, "rewards_train/rejected": -6.055138111114502, "step": 486 }, { "epoch": 0.24, "learning_rate": 9.976155828466723e-07, "loss": 0.0083, "step": 487 }, { "epoch": 0.24, "logps_train/chosen": -56.481414794921875, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -181.09469604492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5492703914642334, "rewards_train/margins": 6.2077624797821045, "rewards_train/rejected": -5.658492088317871, "step": 487 }, { "epoch": 0.24, "learning_rate": 9.975890371009074e-07, "loss": 0.006, "step": 488 }, { "epoch": 0.24, "logps_train/chosen": -59.780181884765625, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -193.4112091064453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4922456741333008, "rewards_train/margins": 6.972624778747559, "rewards_train/rejected": -6.480379104614258, "step": 488 }, { "epoch": 0.24, "learning_rate": 9.975623447620548e-07, "loss": 0.0032, "step": 489 }, { "epoch": 0.24, "logps_train/chosen": -61.157203674316406, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -189.43992614746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5003440976142883, "rewards_train/margins": 6.508888065814972, "rewards_train/rejected": -6.008543968200684, "step": 489 }, { "epoch": 0.24, "learning_rate": 9.975355058379774e-07, "loss": 0.0071, "step": 490 }, { "epoch": 0.24, "logps_train/chosen": -58.08416748046875, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -186.45065307617188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.514093279838562, "rewards_train/margins": 6.518094897270203, "rewards_train/rejected": -6.004001617431641, "step": 490 }, { "epoch": 0.24, "learning_rate": 9.975085203365833e-07, "loss": 0.0051, "step": 491 }, { "epoch": 0.24, "logps_train/chosen": -60.774810791015625, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -188.38900756835938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4484221637248993, "rewards_train/margins": 6.586932867765427, "rewards_train/rejected": -6.138510704040527, "step": 491 }, { "epoch": 0.24, "learning_rate": 9.974813882658218e-07, "loss": 0.0066, "step": 492 }, { "epoch": 0.24, "logps_train/chosen": -58.0513801574707, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -184.96847534179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48983269929885864, "rewards_train/margins": 6.598497688770294, "rewards_train/rejected": -6.1086649894714355, "step": 492 }, { "epoch": 0.24, "learning_rate": 9.974541096336864e-07, "loss": 0.0122, "step": 493 }, { "epoch": 0.24, "logps_train/chosen": -57.61227035522461, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -184.8523406982422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5176060199737549, "rewards_train/margins": 6.403230905532837, "rewards_train/rejected": -5.885624885559082, "step": 493 }, { "epoch": 0.24, "learning_rate": 9.974266844482136e-07, "loss": 0.0067, "step": 494 }, { "epoch": 0.24, "logps_train/chosen": -59.14228057861328, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -188.49761962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5707817077636719, "rewards_train/margins": 6.629429817199707, "rewards_train/rejected": -6.058648109436035, "step": 494 }, { "epoch": 0.24, "learning_rate": 9.973991127174832e-07, "loss": 0.0047, "step": 495 }, { "epoch": 0.24, "logps_train/chosen": -59.176082611083984, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -187.02516174316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6052435040473938, "rewards_train/margins": 6.607466042041779, "rewards_train/rejected": -6.002222537994385, "step": 495 }, { "epoch": 0.24, "learning_rate": 9.973713944496178e-07, "loss": 0.0053, "step": 496 }, { "epoch": 0.24, "logps_train/chosen": -61.42981719970703, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -194.76797485351562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5456410646438599, "rewards_train/margins": 6.915115475654602, "rewards_train/rejected": -6.369474411010742, "step": 496 }, { "epoch": 0.24, "learning_rate": 9.973435296527833e-07, "loss": 0.0026, "step": 497 }, { "epoch": 0.24, "logps_train/chosen": -58.36516571044922, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -186.0380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5418034195899963, "rewards_train/margins": 6.468562185764313, "rewards_train/rejected": -5.926758766174316, "step": 497 }, { "epoch": 0.25, "learning_rate": 9.97315518335189e-07, "loss": 0.0061, "step": 498 }, { "epoch": 0.25, "logps_train/chosen": -59.60642623901367, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -189.81712341308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5198019742965698, "rewards_train/margins": 6.82050883769989, "rewards_train/rejected": -6.30070686340332, "step": 498 }, { "epoch": 0.25, "learning_rate": 9.972873605050875e-07, "loss": 0.0068, "step": 499 }, { "epoch": 0.25, "logps_train/chosen": -58.38212585449219, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -182.16427612304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3831254243850708, "rewards_train/margins": 6.208488583564758, "rewards_train/rejected": -5.8253631591796875, "step": 499 }, { "epoch": 0.25, "learning_rate": 9.972590561707738e-07, "loss": 0.016, "step": 500 }, { "epoch": 0.25, "logps_train/chosen": -57.97902297973633, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -178.261962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38676556944847107, "rewards_train/margins": 6.238351911306381, "rewards_train/rejected": -5.85158634185791, "step": 500 }, { "epoch": 0.25, "learning_rate": 9.97230605340587e-07, "loss": 0.0067, "step": 501 }, { "epoch": 0.25, "logps_train/chosen": -59.83732604980469, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -187.2430419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38775205612182617, "rewards_train/margins": 6.704684734344482, "rewards_train/rejected": -6.316932678222656, "step": 501 }, { "epoch": 0.25, "learning_rate": 9.972020080229085e-07, "loss": 0.0084, "step": 502 }, { "epoch": 0.25, "logps_train/chosen": -59.53102111816406, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -181.42166137695312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4905502498149872, "rewards_train/margins": 6.409278124570847, "rewards_train/rejected": -5.918727874755859, "step": 502 }, { "epoch": 0.25, "learning_rate": 9.971732642261633e-07, "loss": 0.007, "step": 503 }, { "epoch": 0.25, "logps_train/chosen": -56.4586067199707, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -180.9408416748047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.508948802947998, "rewards_train/margins": 6.277252674102783, "rewards_train/rejected": -5.768303871154785, "step": 503 }, { "epoch": 0.25, "learning_rate": 9.9714437395882e-07, "loss": 0.0082, "step": 504 }, { "epoch": 0.25, "logps_train/chosen": -57.60889434814453, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -187.1592559814453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4901847839355469, "rewards_train/margins": 6.701520919799805, "rewards_train/rejected": -6.211336135864258, "step": 504 }, { "epoch": 0.25, "learning_rate": 9.971153372293892e-07, "loss": 0.0044, "step": 505 }, { "epoch": 0.25, "logps_train/chosen": -58.334354400634766, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -178.87059020996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41946977376937866, "rewards_train/margins": 6.190806329250336, "rewards_train/rejected": -5.771336555480957, "step": 505 }, { "epoch": 0.25, "learning_rate": 9.970861540464258e-07, "loss": 0.0093, "step": 506 }, { "epoch": 0.25, "logps_train/chosen": -61.15740966796875, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -185.45223999023438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.409015029668808, "rewards_train/margins": 6.468790382146835, "rewards_train/rejected": -6.059775352478027, "step": 506 }, { "epoch": 0.25, "learning_rate": 9.97056824418527e-07, "loss": 0.0046, "step": 507 }, { "epoch": 0.25, "logps_train/chosen": -59.163570404052734, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -192.1129150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5894535779953003, "rewards_train/margins": 6.9265254735946655, "rewards_train/rejected": -6.337071895599365, "step": 507 }, { "epoch": 0.25, "learning_rate": 9.970273483543338e-07, "loss": 0.0028, "step": 508 }, { "epoch": 0.25, "logps_train/chosen": -59.86774444580078, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -188.9924774169922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.460296094417572, "rewards_train/margins": 6.429709851741791, "rewards_train/rejected": -5.969413757324219, "step": 508 }, { "epoch": 0.25, "learning_rate": 9.9699772586253e-07, "loss": 0.009, "step": 509 }, { "epoch": 0.25, "logps_train/chosen": -60.77268981933594, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -185.81390380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4150652289390564, "rewards_train/margins": 6.42301744222641, "rewards_train/rejected": -6.0079522132873535, "step": 509 }, { "epoch": 0.25, "learning_rate": 9.969679569518426e-07, "loss": 0.0077, "step": 510 }, { "epoch": 0.25, "logps_train/chosen": -59.44605255126953, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -183.70065307617188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48769456148147583, "rewards_train/margins": 6.390475571155548, "rewards_train/rejected": -5.902781009674072, "step": 510 }, { "epoch": 0.25, "learning_rate": 9.969380416310416e-07, "loss": 0.0099, "step": 511 }, { "epoch": 0.25, "logps_train/chosen": -59.725830078125, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -189.05081176757812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.45866715908050537, "rewards_train/margins": 6.610770583152771, "rewards_train/rejected": -6.152103424072266, "step": 511 }, { "epoch": 0.25, "learning_rate": 9.969079799089404e-07, "loss": 0.0065, "step": 512 }, { "epoch": 0.25, "logps_train/chosen": -60.139495849609375, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -133.25, "logps_train/rejected": -197.49502563476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6361966729164124, "rewards_train/margins": 7.054448664188385, "rewards_train/rejected": -6.418251991271973, "step": 512 }, { "epoch": 0.25, "learning_rate": 9.968777717943952e-07, "loss": 0.003, "step": 513 }, { "epoch": 0.25, "logps_train/chosen": -59.51324462890625, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -184.52227783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4802183508872986, "rewards_train/margins": 6.32941859960556, "rewards_train/rejected": -5.849200248718262, "step": 513 }, { "epoch": 0.25, "learning_rate": 9.968474172963056e-07, "loss": 0.0097, "step": 514 }, { "epoch": 0.25, "logps_train/chosen": -57.340797424316406, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -190.23435974121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5095725059509277, "rewards_train/margins": 6.725194931030273, "rewards_train/rejected": -6.215622425079346, "step": 514 }, { "epoch": 0.25, "learning_rate": 9.968169164236144e-07, "loss": 0.0056, "step": 515 }, { "epoch": 0.25, "logps_train/chosen": -58.80620193481445, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -185.95799255371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3927198052406311, "rewards_train/margins": 6.652630388736725, "rewards_train/rejected": -6.259910583496094, "step": 515 }, { "epoch": 0.25, "learning_rate": 9.96786269185307e-07, "loss": 0.0076, "step": 516 }, { "epoch": 0.25, "logps_train/chosen": -61.56425476074219, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -194.09439086914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22350633144378662, "rewards_train/margins": 6.849058985710144, "rewards_train/rejected": -6.625552654266357, "step": 516 }, { "epoch": 0.25, "learning_rate": 9.967554755904125e-07, "loss": 0.0039, "step": 517 }, { "epoch": 0.25, "logps_train/chosen": -59.491668701171875, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -193.47254943847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.45991551876068115, "rewards_train/margins": 6.869329810142517, "rewards_train/rejected": -6.409414291381836, "step": 517 }, { "epoch": 0.26, "learning_rate": 9.967245356480032e-07, "loss": 0.0059, "step": 518 }, { "epoch": 0.26, "logps_train/chosen": -59.00788116455078, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -192.39456176757812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5778249502182007, "rewards_train/margins": 6.974506735801697, "rewards_train/rejected": -6.396681785583496, "step": 518 }, { "epoch": 0.26, "learning_rate": 9.966934493671936e-07, "loss": 0.0042, "step": 519 }, { "epoch": 0.26, "logps_train/chosen": -58.051639556884766, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -189.9342041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6457637548446655, "rewards_train/margins": 6.985228896141052, "rewards_train/rejected": -6.339465141296387, "step": 519 }, { "epoch": 0.26, "learning_rate": 9.966622167571425e-07, "loss": 0.0067, "step": 520 }, { "epoch": 0.26, "logps_train/chosen": -61.8983154296875, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -188.76246643066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42318129539489746, "rewards_train/margins": 6.495229482650757, "rewards_train/rejected": -6.072048187255859, "step": 520 }, { "epoch": 0.26, "learning_rate": 9.966308378270509e-07, "loss": 0.0078, "step": 521 }, { "epoch": 0.26, "logps_train/chosen": -58.6904296875, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -186.37457275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5333494544029236, "rewards_train/margins": 6.681450307369232, "rewards_train/rejected": -6.148100852966309, "step": 521 }, { "epoch": 0.26, "learning_rate": 9.965993125861633e-07, "loss": 0.0101, "step": 522 }, { "epoch": 0.26, "logps_train/chosen": -56.95146560668945, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -180.8916778564453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44835910201072693, "rewards_train/margins": 6.219361871480942, "rewards_train/rejected": -5.771002769470215, "step": 522 }, { "epoch": 0.26, "learning_rate": 9.965676410437674e-07, "loss": 0.0069, "step": 523 }, { "epoch": 0.26, "logps_train/chosen": -58.86500549316406, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -176.08657836914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2828110158443451, "rewards_train/margins": 5.909339755773544, "rewards_train/rejected": -5.626528739929199, "step": 523 }, { "epoch": 0.26, "learning_rate": 9.965358232091936e-07, "loss": 0.0121, "step": 524 }, { "epoch": 0.26, "logps_train/chosen": -58.583946228027344, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -192.06436157226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5477092266082764, "rewards_train/margins": 6.946820020675659, "rewards_train/rejected": -6.399110794067383, "step": 524 }, { "epoch": 0.26, "learning_rate": 9.965038590918157e-07, "loss": 0.0042, "step": 525 }, { "epoch": 0.26, "logps_train/chosen": -55.84759521484375, "logps_train/ref_chosen": -61.125, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -191.58583068847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5287657380104065, "rewards_train/margins": 7.11947637796402, "rewards_train/rejected": -6.590710639953613, "step": 525 }, { "epoch": 0.26, "learning_rate": 9.964717487010508e-07, "loss": 0.0023, "step": 526 }, { "epoch": 0.26, "logps_train/chosen": -57.44929504394531, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -185.89920043945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4862712025642395, "rewards_train/margins": 6.690644562244415, "rewards_train/rejected": -6.204373359680176, "step": 526 }, { "epoch": 0.26, "learning_rate": 9.964394920463585e-07, "loss": 0.0084, "step": 527 }, { "epoch": 0.26, "logps_train/chosen": -59.90095520019531, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -191.58029174804688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44676968455314636, "rewards_train/margins": 6.932095497846603, "rewards_train/rejected": -6.485325813293457, "step": 527 }, { "epoch": 0.26, "learning_rate": 9.96407089137242e-07, "loss": 0.0039, "step": 528 }, { "epoch": 0.26, "logps_train/chosen": -60.837242126464844, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -179.71542358398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3168618679046631, "rewards_train/margins": 6.279175043106079, "rewards_train/rejected": -5.962313175201416, "step": 528 }, { "epoch": 0.26, "learning_rate": 9.963745399832475e-07, "loss": 0.0143, "step": 529 }, { "epoch": 0.26, "logps_train/chosen": -58.2175407409668, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -194.28121948242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4669666588306427, "rewards_train/margins": 7.162667781114578, "rewards_train/rejected": -6.6957011222839355, "step": 529 }, { "epoch": 0.26, "learning_rate": 9.96341844593964e-07, "loss": 0.0027, "step": 530 }, { "epoch": 0.26, "logps_train/chosen": -59.41041946411133, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -195.08462524414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5556867122650146, "rewards_train/margins": 7.113172769546509, "rewards_train/rejected": -6.557486057281494, "step": 530 }, { "epoch": 0.26, "learning_rate": 9.96309002979024e-07, "loss": 0.0065, "step": 531 }, { "epoch": 0.26, "logps_train/chosen": -58.674537658691406, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -119.5, "logps_train/rejected": -180.68826293945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5046901702880859, "rewards_train/margins": 6.623662948608398, "rewards_train/rejected": -6.1189727783203125, "step": 531 }, { "epoch": 0.26, "learning_rate": 9.962760151481027e-07, "loss": 0.006, "step": 532 }, { "epoch": 0.26, "logps_train/chosen": -58.70928955078125, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -186.37857055664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3504090905189514, "rewards_train/margins": 6.715755999088287, "rewards_train/rejected": -6.365346908569336, "step": 532 }, { "epoch": 0.26, "learning_rate": 9.962428811109186e-07, "loss": 0.0079, "step": 533 }, { "epoch": 0.26, "logps_train/chosen": -57.629005432128906, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -189.4168701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4875389039516449, "rewards_train/margins": 7.0379177033901215, "rewards_train/rejected": -6.550378799438477, "step": 533 }, { "epoch": 0.26, "learning_rate": 9.962096008772332e-07, "loss": 0.0046, "step": 534 }, { "epoch": 0.26, "logps_train/chosen": -58.43973159790039, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -187.55845642089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4972868263721466, "rewards_train/margins": 6.890485793352127, "rewards_train/rejected": -6.3931989669799805, "step": 534 }, { "epoch": 0.26, "learning_rate": 9.96176174456851e-07, "loss": 0.0076, "step": 535 }, { "epoch": 0.26, "logps_train/chosen": -62.40812683105469, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -191.3367919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5425853729248047, "rewards_train/margins": 6.847845554351807, "rewards_train/rejected": -6.305260181427002, "step": 535 }, { "epoch": 0.26, "learning_rate": 9.961426018596198e-07, "loss": 0.005, "step": 536 }, { "epoch": 0.26, "logps_train/chosen": -56.95254898071289, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -191.93994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4608485698699951, "rewards_train/margins": 6.976914167404175, "rewards_train/rejected": -6.51606559753418, "step": 536 }, { "epoch": 0.26, "learning_rate": 9.961088830954303e-07, "loss": 0.0051, "step": 537 }, { "epoch": 0.26, "logps_train/chosen": -56.77735137939453, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -119.6875, "logps_train/rejected": -183.10433959960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5256098508834839, "rewards_train/margins": 6.8668540716171265, "rewards_train/rejected": -6.341244220733643, "step": 537 }, { "epoch": 0.26, "learning_rate": 9.960750181742161e-07, "loss": 0.0037, "step": 538 }, { "epoch": 0.26, "logps_train/chosen": -59.73454284667969, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -186.36129760742188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44363561272621155, "rewards_train/margins": 6.753787666559219, "rewards_train/rejected": -6.310152053833008, "step": 538 }, { "epoch": 0.27, "learning_rate": 9.960410071059542e-07, "loss": 0.0062, "step": 539 }, { "epoch": 0.27, "logps_train/chosen": -60.801124572753906, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -192.15069580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4009421169757843, "rewards_train/margins": 6.870211869478226, "rewards_train/rejected": -6.469269752502441, "step": 539 }, { "epoch": 0.27, "learning_rate": 9.960068499006644e-07, "loss": 0.0046, "step": 540 }, { "epoch": 0.27, "logps_train/chosen": -58.99559020996094, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -191.14678955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5159195065498352, "rewards_train/margins": 6.954963505268097, "rewards_train/rejected": -6.439043998718262, "step": 540 }, { "epoch": 0.27, "learning_rate": 9.959725465684097e-07, "loss": 0.0049, "step": 541 }, { "epoch": 0.27, "logps_train/chosen": -62.10329818725586, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -197.5949249267578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4550999701023102, "rewards_train/margins": 7.2030209600925446, "rewards_train/rejected": -6.747920989990234, "step": 541 }, { "epoch": 0.27, "learning_rate": 9.95938097119296e-07, "loss": 0.005, "step": 542 }, { "epoch": 0.27, "logps_train/chosen": -58.12018585205078, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -184.05799865722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5379325151443481, "rewards_train/margins": 6.717218518257141, "rewards_train/rejected": -6.179286003112793, "step": 542 }, { "epoch": 0.27, "learning_rate": 9.959035015634725e-07, "loss": 0.008, "step": 543 }, { "epoch": 0.27, "logps_train/chosen": -61.47998809814453, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -191.08880615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4384758472442627, "rewards_train/margins": 6.91234564781189, "rewards_train/rejected": -6.473869800567627, "step": 543 }, { "epoch": 0.27, "learning_rate": 9.958687599111312e-07, "loss": 0.0041, "step": 544 }, { "epoch": 0.27, "logps_train/chosen": -58.34717559814453, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -191.78182983398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4731433689594269, "rewards_train/margins": 7.105330735445023, "rewards_train/rejected": -6.632187366485596, "step": 544 }, { "epoch": 0.27, "learning_rate": 9.958338721725073e-07, "loss": 0.0045, "step": 545 }, { "epoch": 0.27, "logps_train/chosen": -60.34191131591797, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -204.01107788085938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5369510650634766, "rewards_train/margins": 7.651634216308594, "rewards_train/rejected": -7.114683151245117, "step": 545 }, { "epoch": 0.27, "learning_rate": 9.95798838357879e-07, "loss": 0.0018, "step": 546 }, { "epoch": 0.27, "logps_train/chosen": -61.72746276855469, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -194.69967651367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5355542898178101, "rewards_train/margins": 7.296830534934998, "rewards_train/rejected": -6.7612762451171875, "step": 546 }, { "epoch": 0.27, "learning_rate": 9.95763658477567e-07, "loss": 0.0049, "step": 547 }, { "epoch": 0.27, "logps_train/chosen": -56.52395248413086, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -119.6875, "logps_train/rejected": -183.7753143310547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.45068085193634033, "rewards_train/margins": 6.856923699378967, "rewards_train/rejected": -6.406242847442627, "step": 547 }, { "epoch": 0.27, "learning_rate": 9.957283325419363e-07, "loss": 0.0057, "step": 548 }, { "epoch": 0.27, "logps_train/chosen": -56.55970001220703, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -184.52503967285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5249134302139282, "rewards_train/margins": 6.849585175514221, "rewards_train/rejected": -6.324671745300293, "step": 548 }, { "epoch": 0.27, "learning_rate": 9.956928605613935e-07, "loss": 0.0088, "step": 549 }, { "epoch": 0.27, "logps_train/chosen": -58.58210754394531, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -196.29193115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6399828195571899, "rewards_train/margins": 7.34622585773468, "rewards_train/rejected": -6.70624303817749, "step": 549 }, { "epoch": 0.27, "learning_rate": 9.956572425463894e-07, "loss": 0.0033, "step": 550 }, { "epoch": 0.27, "logps_train/chosen": -59.594573974609375, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -196.49136352539062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4928130507469177, "rewards_train/margins": 7.287603318691254, "rewards_train/rejected": -6.794790267944336, "step": 550 }, { "epoch": 0.27, "learning_rate": 9.956214785074168e-07, "loss": 0.0029, "step": 551 }, { "epoch": 0.27, "logps_train/chosen": -54.52848815917969, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -183.19448852539062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.7351638674736023, "rewards_train/margins": 6.906176745891571, "rewards_train/rejected": -6.171012878417969, "step": 551 }, { "epoch": 0.27, "learning_rate": 9.955855684550125e-07, "loss": 0.0059, "step": 552 }, { "epoch": 0.27, "logps_train/chosen": -59.658809661865234, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -186.82693481445312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4558963477611542, "rewards_train/margins": 6.865152329206467, "rewards_train/rejected": -6.4092559814453125, "step": 552 }, { "epoch": 0.27, "learning_rate": 9.955495123997556e-07, "loss": 0.0034, "step": 553 }, { "epoch": 0.27, "logps_train/chosen": -59.85333251953125, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -191.81175231933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3962099850177765, "rewards_train/margins": 6.79335144162178, "rewards_train/rejected": -6.397141456604004, "step": 553 }, { "epoch": 0.27, "learning_rate": 9.955133103522683e-07, "loss": 0.0079, "step": 554 }, { "epoch": 0.27, "logps_train/chosen": -60.228729248046875, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -192.61325073242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5828155279159546, "rewards_train/margins": 7.283641695976257, "rewards_train/rejected": -6.700826168060303, "step": 554 }, { "epoch": 0.27, "learning_rate": 9.954769623232165e-07, "loss": 0.0029, "step": 555 }, { "epoch": 0.27, "logps_train/chosen": -59.284523010253906, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -188.3362579345703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42022940516471863, "rewards_train/margins": 6.921335190534592, "rewards_train/rejected": -6.501105785369873, "step": 555 }, { "epoch": 0.27, "learning_rate": 9.95440468323308e-07, "loss": 0.003, "step": 556 }, { "epoch": 0.27, "logps_train/chosen": -59.79234313964844, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -186.305908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2856829762458801, "rewards_train/margins": 6.585706055164337, "rewards_train/rejected": -6.300023078918457, "step": 556 }, { "epoch": 0.27, "learning_rate": 9.954038283632945e-07, "loss": 0.0077, "step": 557 }, { "epoch": 0.27, "logps_train/chosen": -57.830650329589844, "logps_train/ref_chosen": -61.71875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -192.6937255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38915175199508667, "rewards_train/margins": 7.20969694852829, "rewards_train/rejected": -6.820545196533203, "step": 557 }, { "epoch": 0.27, "learning_rate": 9.953670424539702e-07, "loss": 0.0059, "step": 558 }, { "epoch": 0.27, "logps_train/chosen": -59.281524658203125, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -192.25357055664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5105193853378296, "rewards_train/margins": 7.016490817070007, "rewards_train/rejected": -6.505971431732178, "step": 558 }, { "epoch": 0.28, "learning_rate": 9.953301106061728e-07, "loss": 0.0064, "step": 559 }, { "epoch": 0.28, "logps_train/chosen": -58.980926513671875, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -189.50396728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5288116931915283, "rewards_train/margins": 7.1256935596466064, "rewards_train/rejected": -6.596881866455078, "step": 559 }, { "epoch": 0.28, "learning_rate": 9.952930328307824e-07, "loss": 0.0063, "step": 560 }, { "epoch": 0.28, "logps_train/chosen": -62.328765869140625, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -199.20603942871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4637542963027954, "rewards_train/margins": 7.435433030128479, "rewards_train/rejected": -6.971678733825684, "step": 560 }, { "epoch": 0.28, "learning_rate": 9.95255809138722e-07, "loss": 0.0028, "step": 561 }, { "epoch": 0.28, "logps_train/chosen": -58.80804443359375, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -194.1377410888672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.549127459526062, "rewards_train/margins": 7.2792099714279175, "rewards_train/rejected": -6.7300825119018555, "step": 561 }, { "epoch": 0.28, "learning_rate": 9.95218439540959e-07, "loss": 0.0044, "step": 562 }, { "epoch": 0.28, "logps_train/chosen": -61.255306243896484, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -196.99142456054688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5354557037353516, "rewards_train/margins": 7.358426094055176, "rewards_train/rejected": -6.822970390319824, "step": 562 }, { "epoch": 0.28, "learning_rate": 9.951809240485016e-07, "loss": 0.0029, "step": 563 }, { "epoch": 0.28, "logps_train/chosen": -61.2450065612793, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -196.5452880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5705191493034363, "rewards_train/margins": 7.377881705760956, "rewards_train/rejected": -6.8073625564575195, "step": 563 }, { "epoch": 0.28, "learning_rate": 9.951432626724027e-07, "loss": 0.0037, "step": 564 }, { "epoch": 0.28, "logps_train/chosen": -58.316341400146484, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -187.9486083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35293635725975037, "rewards_train/margins": 6.93109855055809, "rewards_train/rejected": -6.57816219329834, "step": 564 }, { "epoch": 0.28, "learning_rate": 9.951054554237577e-07, "loss": 0.0044, "step": 565 }, { "epoch": 0.28, "logps_train/chosen": -60.84728240966797, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -191.338623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5740118622779846, "rewards_train/margins": 6.922034323215485, "rewards_train/rejected": -6.3480224609375, "step": 565 }, { "epoch": 0.28, "learning_rate": 9.950675023137044e-07, "loss": 0.006, "step": 566 }, { "epoch": 0.28, "logps_train/chosen": -60.407752990722656, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -189.7023468017578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39442965388298035, "rewards_train/margins": 7.159830063581467, "rewards_train/rejected": -6.765400409698486, "step": 566 }, { "epoch": 0.28, "learning_rate": 9.950294033534245e-07, "loss": 0.0091, "step": 567 }, { "epoch": 0.28, "logps_train/chosen": -61.08946990966797, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -194.20944213867188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.31893426179885864, "rewards_train/margins": 7.212924778461456, "rewards_train/rejected": -6.893990516662598, "step": 567 }, { "epoch": 0.28, "learning_rate": 9.949911585541422e-07, "loss": 0.0044, "step": 568 }, { "epoch": 0.28, "logps_train/chosen": -62.172462463378906, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -197.40936279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.36844688653945923, "rewards_train/margins": 7.296297609806061, "rewards_train/rejected": -6.927850723266602, "step": 568 }, { "epoch": 0.28, "learning_rate": 9.949527679271242e-07, "loss": 0.0025, "step": 569 }, { "epoch": 0.28, "logps_train/chosen": -59.98933029174805, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -195.18453979492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.47428470849990845, "rewards_train/margins": 7.353577196598053, "rewards_train/rejected": -6.8792924880981445, "step": 569 }, { "epoch": 0.28, "learning_rate": 9.94914231483681e-07, "loss": 0.0033, "step": 570 }, { "epoch": 0.28, "logps_train/chosen": -59.814857482910156, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -200.45751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5881435871124268, "rewards_train/margins": 7.4078209400177, "rewards_train/rejected": -6.819677352905273, "step": 570 }, { "epoch": 0.28, "learning_rate": 9.94875549235166e-07, "loss": 0.0027, "step": 571 }, { "epoch": 0.28, "logps_train/chosen": -58.15707015991211, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -191.54251098632812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42884892225265503, "rewards_train/margins": 7.026948630809784, "rewards_train/rejected": -6.598099708557129, "step": 571 }, { "epoch": 0.28, "learning_rate": 9.948367211929745e-07, "loss": 0.0088, "step": 572 }, { "epoch": 0.28, "logps_train/chosen": -56.462554931640625, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -188.5362548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48548251390457153, "rewards_train/margins": 7.248582184314728, "rewards_train/rejected": -6.763099670410156, "step": 572 }, { "epoch": 0.28, "learning_rate": 9.94797747368546e-07, "loss": 0.0035, "step": 573 }, { "epoch": 0.28, "logps_train/chosen": -64.29530334472656, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -205.7431182861328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3799915015697479, "rewards_train/margins": 7.773151367902756, "rewards_train/rejected": -7.393159866333008, "step": 573 }, { "epoch": 0.28, "learning_rate": 9.947586277733622e-07, "loss": 0.0022, "step": 574 }, { "epoch": 0.28, "logps_train/chosen": -60.47852325439453, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -187.14093017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3961908519268036, "rewards_train/margins": 6.996662348508835, "rewards_train/rejected": -6.600471496582031, "step": 574 }, { "epoch": 0.28, "learning_rate": 9.947193624189483e-07, "loss": 0.0041, "step": 575 }, { "epoch": 0.28, "logps_train/chosen": -59.101993560791016, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -191.07252502441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5939998626708984, "rewards_train/margins": 7.254817008972168, "rewards_train/rejected": -6.6608171463012695, "step": 575 }, { "epoch": 0.28, "learning_rate": 9.94679951316872e-07, "loss": 0.0066, "step": 576 }, { "epoch": 0.28, "logps_train/chosen": -59.54004669189453, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -188.83657836914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39160045981407166, "rewards_train/margins": 6.972622245550156, "rewards_train/rejected": -6.581021785736084, "step": 576 }, { "epoch": 0.28, "learning_rate": 9.94640394478744e-07, "loss": 0.0054, "step": 577 }, { "epoch": 0.28, "logps_train/chosen": -58.45018005371094, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -192.09408569335938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.60185706615448, "rewards_train/margins": 7.504331469535828, "rewards_train/rejected": -6.902474403381348, "step": 577 }, { "epoch": 0.28, "learning_rate": 9.946006919162182e-07, "loss": 0.0027, "step": 578 }, { "epoch": 0.28, "logps_train/chosen": -60.47890090942383, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -199.28125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6060160398483276, "rewards_train/margins": 7.4558199644088745, "rewards_train/rejected": -6.849803924560547, "step": 578 }, { "epoch": 0.29, "learning_rate": 9.94560843640991e-07, "loss": 0.0053, "step": 579 }, { "epoch": 0.29, "logps_train/chosen": -63.42510223388672, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -196.3662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35651347041130066, "rewards_train/margins": 7.165985554456711, "rewards_train/rejected": -6.80947208404541, "step": 579 }, { "epoch": 0.29, "learning_rate": 9.945208496648022e-07, "loss": 0.0028, "step": 580 }, { "epoch": 0.29, "logps_train/chosen": -59.95926284790039, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -185.04278564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33376121520996094, "rewards_train/margins": 6.794876575469971, "rewards_train/rejected": -6.46111536026001, "step": 580 }, { "epoch": 0.29, "learning_rate": 9.94480709999434e-07, "loss": 0.0071, "step": 581 }, { "epoch": 0.29, "logps_train/chosen": -61.3454475402832, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -197.9508514404297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48557257652282715, "rewards_train/margins": 7.386029005050659, "rewards_train/rejected": -6.900456428527832, "step": 581 }, { "epoch": 0.29, "learning_rate": 9.944404246567121e-07, "loss": 0.0035, "step": 582 }, { "epoch": 0.29, "logps_train/chosen": -62.0455436706543, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -199.3655242919922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3828968107700348, "rewards_train/margins": 7.520034700632095, "rewards_train/rejected": -7.1371378898620605, "step": 582 }, { "epoch": 0.29, "learning_rate": 9.943999936485048e-07, "loss": 0.0025, "step": 583 }, { "epoch": 0.29, "logps_train/chosen": -60.82963943481445, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -205.2822723388672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4312448501586914, "rewards_train/margins": 7.880956172943115, "rewards_train/rejected": -7.449711322784424, "step": 583 }, { "epoch": 0.29, "learning_rate": 9.943594169867232e-07, "loss": 0.003, "step": 584 }, { "epoch": 0.29, "logps_train/chosen": -60.654380798339844, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -193.15322875976562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.338858962059021, "rewards_train/margins": 7.132991671562195, "rewards_train/rejected": -6.794132709503174, "step": 584 }, { "epoch": 0.29, "learning_rate": 9.943186946833216e-07, "loss": 0.004, "step": 585 }, { "epoch": 0.29, "logps_train/chosen": -59.32011795043945, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -196.17898559570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6048535108566284, "rewards_train/margins": 7.639645934104919, "rewards_train/rejected": -7.034792423248291, "step": 585 }, { "epoch": 0.29, "learning_rate": 9.94277826750297e-07, "loss": 0.0019, "step": 586 }, { "epoch": 0.29, "logps_train/chosen": -56.85908126831055, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -192.40121459960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5198535323143005, "rewards_train/margins": 7.380677163600922, "rewards_train/rejected": -6.860823631286621, "step": 586 }, { "epoch": 0.29, "learning_rate": 9.94236813199689e-07, "loss": 0.0034, "step": 587 }, { "epoch": 0.29, "logps_train/chosen": -58.9096794128418, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -202.28762817382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.47133690118789673, "rewards_train/margins": 7.716504633426666, "rewards_train/rejected": -7.2451677322387695, "step": 587 }, { "epoch": 0.29, "learning_rate": 9.941956540435812e-07, "loss": 0.0017, "step": 588 }, { "epoch": 0.29, "logps_train/chosen": -62.2088508605957, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -201.8490447998047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.45807015895843506, "rewards_train/margins": 7.660649657249451, "rewards_train/rejected": -7.202579498291016, "step": 588 }, { "epoch": 0.29, "learning_rate": 9.94154349294099e-07, "loss": 0.0029, "step": 589 }, { "epoch": 0.29, "logps_train/chosen": -59.35250473022461, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -189.3775634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42998385429382324, "rewards_train/margins": 7.082634210586548, "rewards_train/rejected": -6.652650356292725, "step": 589 }, { "epoch": 0.29, "learning_rate": 9.94112898963411e-07, "loss": 0.004, "step": 590 }, { "epoch": 0.29, "logps_train/chosen": -60.2504997253418, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -197.28863525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5166983008384705, "rewards_train/margins": 7.439800679683685, "rewards_train/rejected": -6.923102378845215, "step": 590 }, { "epoch": 0.29, "learning_rate": 9.94071303063729e-07, "loss": 0.0031, "step": 591 }, { "epoch": 0.29, "logps_train/chosen": -58.25567626953125, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -192.8437042236328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6022399067878723, "rewards_train/margins": 7.637831032276154, "rewards_train/rejected": -7.035591125488281, "step": 591 }, { "epoch": 0.29, "learning_rate": 9.940295616073069e-07, "loss": 0.0027, "step": 592 }, { "epoch": 0.29, "logps_train/chosen": -58.881839752197266, "logps_train/ref_chosen": -60.875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -188.51220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19941385090351105, "rewards_train/margins": 6.955370977520943, "rewards_train/rejected": -6.755957126617432, "step": 592 }, { "epoch": 0.29, "learning_rate": 9.939876746064425e-07, "loss": 0.0073, "step": 593 }, { "epoch": 0.29, "logps_train/chosen": -60.09261703491211, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -203.52310180664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2457185983657837, "rewards_train/margins": 7.5825992822647095, "rewards_train/rejected": -7.336880683898926, "step": 593 }, { "epoch": 0.29, "learning_rate": 9.939456420734758e-07, "loss": 0.0025, "step": 594 }, { "epoch": 0.29, "logps_train/chosen": -57.527008056640625, "logps_train/ref_chosen": -60.15625, "logps_train/ref_rejected": -119.0, "logps_train/rejected": -186.5145263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2634609639644623, "rewards_train/margins": 7.013106197118759, "rewards_train/rejected": -6.749645233154297, "step": 594 }, { "epoch": 0.29, "learning_rate": 9.9390346402079e-07, "loss": 0.0053, "step": 595 }, { "epoch": 0.29, "logps_train/chosen": -58.953582763671875, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -194.70077514648438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3595735430717468, "rewards_train/margins": 7.3903438448905945, "rewards_train/rejected": -7.030770301818848, "step": 595 }, { "epoch": 0.29, "learning_rate": 9.938611404608111e-07, "loss": 0.0045, "step": 596 }, { "epoch": 0.29, "logps_train/chosen": -60.35564041137695, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -199.07814025878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5236644744873047, "rewards_train/margins": 7.675424098968506, "rewards_train/rejected": -7.151759624481201, "step": 596 }, { "epoch": 0.29, "learning_rate": 9.938186714060074e-07, "loss": 0.0034, "step": 597 }, { "epoch": 0.29, "logps_train/chosen": -59.7989501953125, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -202.32073974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4839233458042145, "rewards_train/margins": 7.7664856016635895, "rewards_train/rejected": -7.282562255859375, "step": 597 }, { "epoch": 0.29, "learning_rate": 9.937760568688911e-07, "loss": 0.0041, "step": 598 }, { "epoch": 0.29, "logps_train/chosen": -59.5618896484375, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -196.57647705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35855692625045776, "rewards_train/margins": 7.470501124858856, "rewards_train/rejected": -7.111944198608398, "step": 598 }, { "epoch": 0.29, "learning_rate": 9.937332968620167e-07, "loss": 0.0033, "step": 599 }, { "epoch": 0.29, "logps_train/chosen": -58.867652893066406, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -198.28118896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6995629668235779, "rewards_train/margins": 7.710397660732269, "rewards_train/rejected": -7.010834693908691, "step": 599 }, { "epoch": 0.3, "learning_rate": 9.936903913979815e-07, "loss": 0.0019, "step": 600 }, { "epoch": 0.3, "logps_train/chosen": -58.54576873779297, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -187.0011749267578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4103156328201294, "rewards_train/margins": 7.123324036598206, "rewards_train/rejected": -6.713008403778076, "step": 600 }, { "epoch": 0.3, "learning_rate": 9.936473404894255e-07, "loss": 0.0084, "step": 601 }, { "epoch": 0.3, "logps_train/chosen": -60.300437927246094, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -190.46376037597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3151710331439972, "rewards_train/margins": 7.170629292726517, "rewards_train/rejected": -6.8554582595825195, "step": 601 }, { "epoch": 0.3, "learning_rate": 9.93604144149032e-07, "loss": 0.0058, "step": 602 }, { "epoch": 0.3, "logps_train/chosen": -62.421791076660156, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -197.53421020507812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3041589558124542, "rewards_train/margins": 7.404309064149857, "rewards_train/rejected": -7.100150108337402, "step": 602 }, { "epoch": 0.3, "learning_rate": 9.935608023895267e-07, "loss": 0.017, "step": 603 }, { "epoch": 0.3, "logps_train/chosen": -64.47624206542969, "logps_train/ref_chosen": -68.75, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -203.34425354003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42488545179367065, "rewards_train/margins": 7.75804203748703, "rewards_train/rejected": -7.333156585693359, "step": 603 }, { "epoch": 0.3, "learning_rate": 9.935173152236789e-07, "loss": 0.0014, "step": 604 }, { "epoch": 0.3, "logps_train/chosen": -58.621788024902344, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -196.4969024658203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4681437611579895, "rewards_train/margins": 7.736487686634064, "rewards_train/rejected": -7.268343925476074, "step": 604 }, { "epoch": 0.3, "learning_rate": 9.934736826643e-07, "loss": 0.0027, "step": 605 }, { "epoch": 0.3, "logps_train/chosen": -57.614341735839844, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -191.77639770507812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5401775240898132, "rewards_train/margins": 7.369527041912079, "rewards_train/rejected": -6.829349517822266, "step": 605 }, { "epoch": 0.3, "learning_rate": 9.93429904724244e-07, "loss": 0.0036, "step": 606 }, { "epoch": 0.3, "logps_train/chosen": -58.99812316894531, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -191.185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43202367424964905, "rewards_train/margins": 7.053412109613419, "rewards_train/rejected": -6.6213884353637695, "step": 606 }, { "epoch": 0.3, "learning_rate": 9.933859814164088e-07, "loss": 0.0048, "step": 607 }, { "epoch": 0.3, "logps_train/chosen": -58.548683166503906, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -188.21533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5454734563827515, "rewards_train/margins": 7.240201354026794, "rewards_train/rejected": -6.694727897644043, "step": 607 }, { "epoch": 0.3, "learning_rate": 9.933419127537338e-07, "loss": 0.0078, "step": 608 }, { "epoch": 0.3, "logps_train/chosen": -60.80663299560547, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -197.23162841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46088936924934387, "rewards_train/margins": 7.609248667955399, "rewards_train/rejected": -7.148359298706055, "step": 608 }, { "epoch": 0.3, "learning_rate": 9.932976987492028e-07, "loss": 0.0021, "step": 609 }, { "epoch": 0.3, "logps_train/chosen": -63.10047149658203, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -198.75502014160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37066540122032166, "rewards_train/margins": 7.319605201482773, "rewards_train/rejected": -6.948939800262451, "step": 609 }, { "epoch": 0.3, "learning_rate": 9.93253339415841e-07, "loss": 0.0029, "step": 610 }, { "epoch": 0.3, "logps_train/chosen": -62.41194534301758, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -195.01065063476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.530997633934021, "rewards_train/margins": 7.405207991600037, "rewards_train/rejected": -6.874210357666016, "step": 610 }, { "epoch": 0.3, "learning_rate": 9.93208834766717e-07, "loss": 0.0054, "step": 611 }, { "epoch": 0.3, "logps_train/chosen": -58.01100158691406, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -193.37220764160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5816876888275146, "rewards_train/margins": 7.572131872177124, "rewards_train/rejected": -6.990444183349609, "step": 611 }, { "epoch": 0.3, "learning_rate": 9.931641848149422e-07, "loss": 0.002, "step": 612 }, { "epoch": 0.3, "logps_train/chosen": -59.68783187866211, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -197.14474487304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.45895126461982727, "rewards_train/margins": 7.756041795015335, "rewards_train/rejected": -7.297090530395508, "step": 612 }, { "epoch": 0.3, "learning_rate": 9.931193895736708e-07, "loss": 0.0028, "step": 613 }, { "epoch": 0.3, "logps_train/chosen": -58.937232971191406, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -198.436279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3430930972099304, "rewards_train/margins": 7.717043101787567, "rewards_train/rejected": -7.373950004577637, "step": 613 }, { "epoch": 0.3, "learning_rate": 9.930744490560999e-07, "loss": 0.0033, "step": 614 }, { "epoch": 0.3, "logps_train/chosen": -58.78969192504883, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -194.7872314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5099465847015381, "rewards_train/margins": 7.698680639266968, "rewards_train/rejected": -7.18873405456543, "step": 614 }, { "epoch": 0.3, "learning_rate": 9.930293632754688e-07, "loss": 0.0038, "step": 615 }, { "epoch": 0.3, "logps_train/chosen": -58.30712127685547, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -117.125, "logps_train/rejected": -181.76487731933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4240240156650543, "rewards_train/margins": 6.890257269144058, "rewards_train/rejected": -6.466233253479004, "step": 615 }, { "epoch": 0.3, "learning_rate": 9.92984132245061e-07, "loss": 0.0078, "step": 616 }, { "epoch": 0.3, "logps_train/chosen": -56.87347412109375, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -195.4182586669922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6157286167144775, "rewards_train/margins": 7.594078779220581, "rewards_train/rejected": -6.9783501625061035, "step": 616 }, { "epoch": 0.3, "learning_rate": 9.929387559782008e-07, "loss": 0.0039, "step": 617 }, { "epoch": 0.3, "logps_train/chosen": -56.99860382080078, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -196.15203857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5201348066329956, "rewards_train/margins": 7.703990340232849, "rewards_train/rejected": -7.1838555335998535, "step": 617 }, { "epoch": 0.3, "learning_rate": 9.92893234488257e-07, "loss": 0.0029, "step": 618 }, { "epoch": 0.3, "logps_train/chosen": -59.02521514892578, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -197.28451538085938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4446460008621216, "rewards_train/margins": 7.739308953285217, "rewards_train/rejected": -7.294662952423096, "step": 618 }, { "epoch": 0.3, "learning_rate": 9.928475677886405e-07, "loss": 0.0018, "step": 619 }, { "epoch": 0.3, "logps_train/chosen": -61.71499252319336, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -198.1263427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4950047433376312, "rewards_train/margins": 7.431613057851791, "rewards_train/rejected": -6.93660831451416, "step": 619 }, { "epoch": 0.31, "learning_rate": 9.928017558928049e-07, "loss": 0.0051, "step": 620 }, { "epoch": 0.31, "logps_train/chosen": -61.23023986816406, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -204.4913787841797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6230210661888123, "rewards_train/margins": 7.982413232326508, "rewards_train/rejected": -7.359392166137695, "step": 620 }, { "epoch": 0.31, "learning_rate": 9.927557988142466e-07, "loss": 0.0023, "step": 621 }, { "epoch": 0.31, "logps_train/chosen": -59.89525604248047, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -198.59164428710938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49602124094963074, "rewards_train/margins": 7.695370644330978, "rewards_train/rejected": -7.199349403381348, "step": 621 }, { "epoch": 0.31, "learning_rate": 9.92709696566505e-07, "loss": 0.003, "step": 622 }, { "epoch": 0.31, "logps_train/chosen": -58.606201171875, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -189.9799346923828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3541748523712158, "rewards_train/margins": 7.303340196609497, "rewards_train/rejected": -6.949165344238281, "step": 622 }, { "epoch": 0.31, "learning_rate": 9.926634491631622e-07, "loss": 0.0042, "step": 623 }, { "epoch": 0.31, "logps_train/chosen": -61.47829818725586, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -198.38351440429688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43132030963897705, "rewards_train/margins": 7.531878113746643, "rewards_train/rejected": -7.100557804107666, "step": 623 }, { "epoch": 0.31, "learning_rate": 9.926170566178428e-07, "loss": 0.003, "step": 624 }, { "epoch": 0.31, "logps_train/chosen": -59.16145324707031, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -190.87115478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43170613050460815, "rewards_train/margins": 7.39401787519455, "rewards_train/rejected": -6.962311744689941, "step": 624 }, { "epoch": 0.31, "learning_rate": 9.925705189442143e-07, "loss": 0.0019, "step": 625 }, { "epoch": 0.31, "logps_train/chosen": -61.3663444519043, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -200.12686157226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38099271059036255, "rewards_train/margins": 7.7795684933662415, "rewards_train/rejected": -7.398575782775879, "step": 625 }, { "epoch": 0.31, "learning_rate": 9.925238361559876e-07, "loss": 0.0035, "step": 626 }, { "epoch": 0.31, "logps_train/chosen": -60.893768310546875, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -205.49368286132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4457058310508728, "rewards_train/margins": 7.8992738127708435, "rewards_train/rejected": -7.453567981719971, "step": 626 }, { "epoch": 0.31, "learning_rate": 9.92477008266915e-07, "loss": 0.0026, "step": 627 }, { "epoch": 0.31, "logps_train/chosen": -58.68482971191406, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -190.5235595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3465072512626648, "rewards_train/margins": 7.280893981456757, "rewards_train/rejected": -6.934386730194092, "step": 627 }, { "epoch": 0.31, "learning_rate": 9.924300352907928e-07, "loss": 0.0026, "step": 628 }, { "epoch": 0.31, "logps_train/chosen": -59.23417663574219, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -195.12718200683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5727493762969971, "rewards_train/margins": 7.7482616901397705, "rewards_train/rejected": -7.175512313842773, "step": 628 }, { "epoch": 0.31, "learning_rate": 9.923829172414592e-07, "loss": 0.0038, "step": 629 }, { "epoch": 0.31, "logps_train/chosen": -60.921630859375, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -198.41781616210938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3799561560153961, "rewards_train/margins": 7.5867771208286285, "rewards_train/rejected": -7.206820964813232, "step": 629 }, { "epoch": 0.31, "learning_rate": 9.923356541327959e-07, "loss": 0.0037, "step": 630 }, { "epoch": 0.31, "logps_train/chosen": -60.76845932006836, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -204.61279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5967381000518799, "rewards_train/margins": 7.834189176559448, "rewards_train/rejected": -7.237451076507568, "step": 630 }, { "epoch": 0.31, "learning_rate": 9.922882459787267e-07, "loss": 0.0017, "step": 631 }, { "epoch": 0.31, "logps_train/chosen": -61.96162414550781, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -200.50613403320312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.36550748348236084, "rewards_train/margins": 7.770419001579285, "rewards_train/rejected": -7.404911518096924, "step": 631 }, { "epoch": 0.31, "learning_rate": 9.922406927932182e-07, "loss": 0.0021, "step": 632 }, { "epoch": 0.31, "logps_train/chosen": -65.40208435058594, "logps_train/ref_chosen": -68.625, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -206.73565673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.318727046251297, "rewards_train/margins": 7.883113890886307, "rewards_train/rejected": -7.56438684463501, "step": 632 }, { "epoch": 0.31, "learning_rate": 9.921929945902803e-07, "loss": 0.0018, "step": 633 }, { "epoch": 0.31, "logps_train/chosen": -58.320621490478516, "logps_train/ref_chosen": -61.1875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -196.80836486816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2856133282184601, "rewards_train/margins": 7.637544423341751, "rewards_train/rejected": -7.351931095123291, "step": 633 }, { "epoch": 0.31, "learning_rate": 9.92145151383965e-07, "loss": 0.004, "step": 634 }, { "epoch": 0.31, "logps_train/chosen": -60.00213623046875, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -200.59280395507812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.449249267578125, "rewards_train/margins": 7.736849784851074, "rewards_train/rejected": -7.287600517272949, "step": 634 }, { "epoch": 0.31, "learning_rate": 9.920971631883672e-07, "loss": 0.003, "step": 635 }, { "epoch": 0.31, "logps_train/chosen": -61.42536926269531, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -205.01524353027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.30614447593688965, "rewards_train/margins": 8.018947839736938, "rewards_train/rejected": -7.712803363800049, "step": 635 }, { "epoch": 0.31, "learning_rate": 9.920490300176246e-07, "loss": 0.0027, "step": 636 }, { "epoch": 0.31, "logps_train/chosen": -60.58007049560547, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -136.0, "logps_train/rejected": -215.97532653808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5044928789138794, "rewards_train/margins": 8.509739995002747, "rewards_train/rejected": -8.005247116088867, "step": 636 }, { "epoch": 0.31, "learning_rate": 9.920007518859173e-07, "loss": 0.0009, "step": 637 }, { "epoch": 0.31, "logps_train/chosen": -58.85047149658203, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -201.777099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5835073590278625, "rewards_train/margins": 8.044322788715363, "rewards_train/rejected": -7.4608154296875, "step": 637 }, { "epoch": 0.31, "learning_rate": 9.919523288074687e-07, "loss": 0.0019, "step": 638 }, { "epoch": 0.31, "logps_train/chosen": -56.760711669921875, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -202.32418823242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5914580821990967, "rewards_train/margins": 7.977296590805054, "rewards_train/rejected": -7.385838508605957, "step": 638 }, { "epoch": 0.31, "learning_rate": 9.919037607965446e-07, "loss": 0.0035, "step": 639 }, { "epoch": 0.31, "logps_train/chosen": -59.92212677001953, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -192.85682678222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3900623321533203, "rewards_train/margins": 7.641955375671387, "rewards_train/rejected": -7.251893043518066, "step": 639 }, { "epoch": 0.32, "learning_rate": 9.918550478674533e-07, "loss": 0.0034, "step": 640 }, { "epoch": 0.32, "logps_train/chosen": -62.8553581237793, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -191.52005004882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37789201736450195, "rewards_train/margins": 7.262513637542725, "rewards_train/rejected": -6.884621620178223, "step": 640 }, { "epoch": 0.32, "learning_rate": 9.918061900345458e-07, "loss": 0.0048, "step": 641 }, { "epoch": 0.32, "logps_train/chosen": -59.427215576171875, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -196.78529357910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.630813479423523, "rewards_train/margins": 7.902312159538269, "rewards_train/rejected": -7.271498680114746, "step": 641 }, { "epoch": 0.32, "learning_rate": 9.917571873122163e-07, "loss": 0.0041, "step": 642 }, { "epoch": 0.32, "logps_train/chosen": -61.19932556152344, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -195.5887451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4559951722621918, "rewards_train/margins": 7.4939723908901215, "rewards_train/rejected": -7.03797721862793, "step": 642 }, { "epoch": 0.32, "learning_rate": 9.917080397149011e-07, "loss": 0.0045, "step": 643 }, { "epoch": 0.32, "logps_train/chosen": -60.783851623535156, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -204.01712036132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42188379168510437, "rewards_train/margins": 7.92974779009819, "rewards_train/rejected": -7.507863998413086, "step": 643 }, { "epoch": 0.32, "learning_rate": 9.916587472570795e-07, "loss": 0.0027, "step": 644 }, { "epoch": 0.32, "logps_train/chosen": -59.7789306640625, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -196.7850799560547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3412473797798157, "rewards_train/margins": 7.570145428180695, "rewards_train/rejected": -7.228898048400879, "step": 644 }, { "epoch": 0.32, "learning_rate": 9.916093099532733e-07, "loss": 0.0031, "step": 645 }, { "epoch": 0.32, "logps_train/chosen": -60.44807434082031, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -203.967529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5247483253479004, "rewards_train/margins": 8.275993824005127, "rewards_train/rejected": -7.751245498657227, "step": 645 }, { "epoch": 0.32, "learning_rate": 9.915597278180471e-07, "loss": 0.0021, "step": 646 }, { "epoch": 0.32, "logps_train/chosen": -60.204566955566406, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -199.819091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41465070843696594, "rewards_train/margins": 7.863552361726761, "rewards_train/rejected": -7.448901653289795, "step": 646 }, { "epoch": 0.32, "learning_rate": 9.915100008660082e-07, "loss": 0.002, "step": 647 }, { "epoch": 0.32, "logps_train/chosen": -57.035560607910156, "logps_train/ref_chosen": -61.46875, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -194.13693237304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44258660078048706, "rewards_train/margins": 7.650811851024628, "rewards_train/rejected": -7.208225250244141, "step": 647 }, { "epoch": 0.32, "learning_rate": 9.914601291118065e-07, "loss": 0.0055, "step": 648 }, { "epoch": 0.32, "logps_train/chosen": -59.052040100097656, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -197.86973571777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4164758622646332, "rewards_train/margins": 7.773273557424545, "rewards_train/rejected": -7.356797695159912, "step": 648 }, { "epoch": 0.32, "learning_rate": 9.914101125701345e-07, "loss": 0.003, "step": 649 }, { "epoch": 0.32, "logps_train/chosen": -60.81354522705078, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -201.88375854492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4445246458053589, "rewards_train/margins": 8.032998919487, "rewards_train/rejected": -7.588474273681641, "step": 649 }, { "epoch": 0.32, "learning_rate": 9.913599512557273e-07, "loss": 0.0014, "step": 650 }, { "epoch": 0.32, "logps_train/chosen": -61.49370574951172, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -197.47866821289062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37934044003486633, "rewards_train/margins": 7.8620703518390656, "rewards_train/rejected": -7.482729911804199, "step": 650 }, { "epoch": 0.32, "learning_rate": 9.913096451833628e-07, "loss": 0.0039, "step": 651 }, { "epoch": 0.32, "logps_train/chosen": -59.56284713745117, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -205.41212463378906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.598793625831604, "rewards_train/margins": 8.231802582740784, "rewards_train/rejected": -7.63300895690918, "step": 651 }, { "epoch": 0.32, "learning_rate": 9.912591943678619e-07, "loss": 0.0018, "step": 652 }, { "epoch": 0.32, "logps_train/chosen": -59.85948181152344, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -203.20718383789062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34266549348831177, "rewards_train/margins": 8.203961193561554, "rewards_train/rejected": -7.861295700073242, "step": 652 }, { "epoch": 0.32, "learning_rate": 9.912085988240872e-07, "loss": 0.0019, "step": 653 }, { "epoch": 0.32, "logps_train/chosen": -59.76287078857422, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -196.396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4959297776222229, "rewards_train/margins": 7.81384950876236, "rewards_train/rejected": -7.317919731140137, "step": 653 }, { "epoch": 0.32, "learning_rate": 9.91157858566945e-07, "loss": 0.0024, "step": 654 }, { "epoch": 0.32, "logps_train/chosen": -60.356048583984375, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -204.25357055664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.527725100517273, "rewards_train/margins": 8.049518465995789, "rewards_train/rejected": -7.521793365478516, "step": 654 }, { "epoch": 0.32, "learning_rate": 9.91106973611383e-07, "loss": 0.0022, "step": 655 }, { "epoch": 0.32, "logps_train/chosen": -58.07948303222656, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -199.30648803710938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3885364532470703, "rewards_train/margins": 7.825141906738281, "rewards_train/rejected": -7.436605453491211, "step": 655 }, { "epoch": 0.32, "learning_rate": 9.91055943972393e-07, "loss": 0.0026, "step": 656 }, { "epoch": 0.32, "logps_train/chosen": -62.82630157470703, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -206.28173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3420766592025757, "rewards_train/margins": 8.136656165122986, "rewards_train/rejected": -7.79457950592041, "step": 656 }, { "epoch": 0.32, "learning_rate": 9.910047696650085e-07, "loss": 0.0013, "step": 657 }, { "epoch": 0.32, "logps_train/chosen": -61.21526336669922, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -195.88339233398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35264354944229126, "rewards_train/margins": 7.62994772195816, "rewards_train/rejected": -7.277304172515869, "step": 657 }, { "epoch": 0.32, "learning_rate": 9.909534507043054e-07, "loss": 0.0022, "step": 658 }, { "epoch": 0.32, "logps_train/chosen": -63.46612548828125, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -205.28079223632812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4247252941131592, "rewards_train/margins": 8.04528546333313, "rewards_train/rejected": -7.620560169219971, "step": 658 }, { "epoch": 0.32, "learning_rate": 9.909019871054031e-07, "loss": 0.0035, "step": 659 }, { "epoch": 0.32, "logps_train/chosen": -62.09235763549805, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -201.25482177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22467544674873352, "rewards_train/margins": 7.906504422426224, "rewards_train/rejected": -7.68182897567749, "step": 659 }, { "epoch": 0.32, "learning_rate": 9.908503788834629e-07, "loss": 0.0017, "step": 660 }, { "epoch": 0.32, "logps_train/chosen": -59.94357681274414, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -197.45138549804688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.543874979019165, "rewards_train/margins": 7.704345941543579, "rewards_train/rejected": -7.160470962524414, "step": 660 }, { "epoch": 0.33, "learning_rate": 9.907986260536888e-07, "loss": 0.0052, "step": 661 }, { "epoch": 0.33, "logps_train/chosen": -59.96937561035156, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -208.16079711914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3867049515247345, "rewards_train/margins": 8.338429003953934, "rewards_train/rejected": -7.951724052429199, "step": 661 }, { "epoch": 0.33, "learning_rate": 9.907467286313275e-07, "loss": 0.0016, "step": 662 }, { "epoch": 0.33, "logps_train/chosen": -58.46159362792969, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -196.58245849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5274003148078918, "rewards_train/margins": 7.82763797044754, "rewards_train/rejected": -7.300237655639648, "step": 662 }, { "epoch": 0.33, "learning_rate": 9.906946866316688e-07, "loss": 0.0014, "step": 663 }, { "epoch": 0.33, "logps_train/chosen": -57.62977600097656, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -200.7900848388672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38482508063316345, "rewards_train/margins": 8.182827323675156, "rewards_train/rejected": -7.798002243041992, "step": 663 }, { "epoch": 0.33, "learning_rate": 9.906425000700442e-07, "loss": 0.002, "step": 664 }, { "epoch": 0.33, "logps_train/chosen": -61.94861602783203, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -210.27920532226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44268739223480225, "rewards_train/margins": 8.272073149681091, "rewards_train/rejected": -7.829385757446289, "step": 664 }, { "epoch": 0.33, "learning_rate": 9.905901689618286e-07, "loss": 0.0016, "step": 665 }, { "epoch": 0.33, "logps_train/chosen": -64.02577209472656, "logps_train/ref_chosen": -69.875, "logps_train/ref_rejected": -134.375, "logps_train/rejected": -215.21588134765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5883404016494751, "rewards_train/margins": 8.67770254611969, "rewards_train/rejected": -8.089362144470215, "step": 665 }, { "epoch": 0.33, "learning_rate": 9.905376933224385e-07, "loss": 0.0011, "step": 666 }, { "epoch": 0.33, "logps_train/chosen": -57.99982452392578, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -197.23837280273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5749198794364929, "rewards_train/margins": 7.947878062725067, "rewards_train/rejected": -7.372958183288574, "step": 666 }, { "epoch": 0.33, "learning_rate": 9.904850731673342e-07, "loss": 0.0015, "step": 667 }, { "epoch": 0.33, "logps_train/chosen": -59.14429473876953, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -198.07772827148438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.484300822019577, "rewards_train/margins": 7.9774753749370575, "rewards_train/rejected": -7.4931745529174805, "step": 667 }, { "epoch": 0.33, "learning_rate": 9.904323085120174e-07, "loss": 0.0033, "step": 668 }, { "epoch": 0.33, "logps_train/chosen": -57.68123245239258, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -197.93731689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5037030577659607, "rewards_train/margins": 7.888742387294769, "rewards_train/rejected": -7.385039329528809, "step": 668 }, { "epoch": 0.33, "learning_rate": 9.903793993720334e-07, "loss": 0.0036, "step": 669 }, { "epoch": 0.33, "logps_train/chosen": -61.440574645996094, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -206.3563995361328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41502445936203003, "rewards_train/margins": 8.029376685619354, "rewards_train/rejected": -7.614352226257324, "step": 669 }, { "epoch": 0.33, "learning_rate": 9.903263457629692e-07, "loss": 0.0019, "step": 670 }, { "epoch": 0.33, "logps_train/chosen": -60.69160079956055, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -203.84335327148438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3950977623462677, "rewards_train/margins": 8.101015597581863, "rewards_train/rejected": -7.705917835235596, "step": 670 }, { "epoch": 0.33, "learning_rate": 9.90273147700455e-07, "loss": 0.002, "step": 671 }, { "epoch": 0.33, "logps_train/chosen": -57.91585922241211, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -203.33941650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42052358388900757, "rewards_train/margins": 8.207442224025726, "rewards_train/rejected": -7.786918640136719, "step": 671 }, { "epoch": 0.33, "learning_rate": 9.902198052001634e-07, "loss": 0.0021, "step": 672 }, { "epoch": 0.33, "logps_train/chosen": -60.67845153808594, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -201.6732177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.49621739983558655, "rewards_train/margins": 7.9479619562625885, "rewards_train/rejected": -7.451744556427002, "step": 672 }, { "epoch": 0.33, "learning_rate": 9.90166318277809e-07, "loss": 0.0023, "step": 673 }, { "epoch": 0.33, "logps_train/chosen": -58.50000762939453, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -206.95855712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6579583883285522, "rewards_train/margins": 8.702203154563904, "rewards_train/rejected": -8.044244766235352, "step": 673 }, { "epoch": 0.33, "learning_rate": 9.901126869491499e-07, "loss": 0.0027, "step": 674 }, { "epoch": 0.33, "logps_train/chosen": -56.759132385253906, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -198.47207641601562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4617573022842407, "rewards_train/margins": 7.958574891090393, "rewards_train/rejected": -7.496817588806152, "step": 674 }, { "epoch": 0.33, "learning_rate": 9.90058911229986e-07, "loss": 0.0068, "step": 675 }, { "epoch": 0.33, "logps_train/chosen": -61.087120056152344, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -205.69024658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5371862053871155, "rewards_train/margins": 8.33667904138565, "rewards_train/rejected": -7.799492835998535, "step": 675 }, { "epoch": 0.33, "learning_rate": 9.900049911361602e-07, "loss": 0.0017, "step": 676 }, { "epoch": 0.33, "logps_train/chosen": -61.67120361328125, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -204.6476287841797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41151702404022217, "rewards_train/margins": 8.246348261833191, "rewards_train/rejected": -7.834831237792969, "step": 676 }, { "epoch": 0.33, "learning_rate": 9.899509266835574e-07, "loss": 0.0039, "step": 677 }, { "epoch": 0.33, "logps_train/chosen": -59.76266098022461, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -193.7924346923828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3562047481536865, "rewards_train/margins": 7.577488660812378, "rewards_train/rejected": -7.221283912658691, "step": 677 }, { "epoch": 0.33, "learning_rate": 9.898967178881057e-07, "loss": 0.0022, "step": 678 }, { "epoch": 0.33, "logps_train/chosen": -61.26494598388672, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -210.93092346191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35412082076072693, "rewards_train/margins": 8.47348365187645, "rewards_train/rejected": -8.119362831115723, "step": 678 }, { "epoch": 0.33, "learning_rate": 9.89842364765775e-07, "loss": 0.0008, "step": 679 }, { "epoch": 0.33, "logps_train/chosen": -58.35857391357422, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -119.375, "logps_train/rejected": -193.50408935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41433805227279663, "rewards_train/margins": 7.82753986120224, "rewards_train/rejected": -7.413201808929443, "step": 679 }, { "epoch": 0.33, "learning_rate": 9.897878673325783e-07, "loss": 0.0028, "step": 680 }, { "epoch": 0.33, "logps_train/chosen": -58.7158203125, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -203.913818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4541991353034973, "rewards_train/margins": 8.23859816789627, "rewards_train/rejected": -7.784399032592773, "step": 680 }, { "epoch": 0.34, "learning_rate": 9.89733225604571e-07, "loss": 0.0035, "step": 681 }, { "epoch": 0.34, "logps_train/chosen": -60.30031967163086, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -200.86363220214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.347702294588089, "rewards_train/margins": 8.055354863405228, "rewards_train/rejected": -7.707652568817139, "step": 681 }, { "epoch": 0.34, "learning_rate": 9.89678439597851e-07, "loss": 0.0026, "step": 682 }, { "epoch": 0.34, "logps_train/chosen": -59.97907257080078, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -196.55255126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4485529065132141, "rewards_train/margins": 7.935546696186066, "rewards_train/rejected": -7.486993789672852, "step": 682 }, { "epoch": 0.34, "learning_rate": 9.896235093285582e-07, "loss": 0.0031, "step": 683 }, { "epoch": 0.34, "logps_train/chosen": -59.60807418823242, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -207.1087646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4651205241680145, "rewards_train/margins": 8.412519663572311, "rewards_train/rejected": -7.947399139404297, "step": 683 }, { "epoch": 0.34, "learning_rate": 9.895684348128758e-07, "loss": 0.0029, "step": 684 }, { "epoch": 0.34, "logps_train/chosen": -59.923805236816406, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -197.55899047851562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39079809188842773, "rewards_train/margins": 7.90431547164917, "rewards_train/rejected": -7.513517379760742, "step": 684 }, { "epoch": 0.34, "learning_rate": 9.89513216067029e-07, "loss": 0.0035, "step": 685 }, { "epoch": 0.34, "logps_train/chosen": -60.744354248046875, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -208.01708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46289390325546265, "rewards_train/margins": 8.355081021785736, "rewards_train/rejected": -7.892187118530273, "step": 685 }, { "epoch": 0.34, "learning_rate": 9.894578531072857e-07, "loss": 0.0052, "step": 686 }, { "epoch": 0.34, "logps_train/chosen": -60.30813980102539, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -201.5885009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.438424289226532, "rewards_train/margins": 8.098837077617645, "rewards_train/rejected": -7.660412788391113, "step": 686 }, { "epoch": 0.34, "learning_rate": 9.89402345949956e-07, "loss": 0.002, "step": 687 }, { "epoch": 0.34, "logps_train/chosen": -60.140037536621094, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -203.86904907226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33790045976638794, "rewards_train/margins": 8.047558605670929, "rewards_train/rejected": -7.709658145904541, "step": 687 }, { "epoch": 0.34, "learning_rate": 9.893466946113928e-07, "loss": 0.0038, "step": 688 }, { "epoch": 0.34, "logps_train/chosen": -61.341365814208984, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -205.6275634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3296820819377899, "rewards_train/margins": 8.213141173124313, "rewards_train/rejected": -7.883459091186523, "step": 688 }, { "epoch": 0.34, "learning_rate": 9.892908991079915e-07, "loss": 0.0034, "step": 689 }, { "epoch": 0.34, "logps_train/chosen": -58.57880401611328, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -205.98834228515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4789116382598877, "rewards_train/margins": 8.441417455673218, "rewards_train/rejected": -7.96250581741333, "step": 689 }, { "epoch": 0.34, "learning_rate": 9.892349594561898e-07, "loss": 0.0029, "step": 690 }, { "epoch": 0.34, "logps_train/chosen": -58.84619903564453, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -200.04058837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34479907155036926, "rewards_train/margins": 8.095733672380447, "rewards_train/rejected": -7.750934600830078, "step": 690 }, { "epoch": 0.34, "learning_rate": 9.891788756724674e-07, "loss": 0.0022, "step": 691 }, { "epoch": 0.34, "logps_train/chosen": -59.6818733215332, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -207.59744262695312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4515390694141388, "rewards_train/margins": 8.439701110124588, "rewards_train/rejected": -7.988162040710449, "step": 691 }, { "epoch": 0.34, "learning_rate": 9.891226477733477e-07, "loss": 0.0011, "step": 692 }, { "epoch": 0.34, "logps_train/chosen": -60.70337677001953, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -200.01943969726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37062937021255493, "rewards_train/margins": 7.916908323764801, "rewards_train/rejected": -7.546278953552246, "step": 692 }, { "epoch": 0.34, "learning_rate": 9.890662757753953e-07, "loss": 0.0041, "step": 693 }, { "epoch": 0.34, "logps_train/chosen": -60.76410675048828, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -201.88873291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3126027286052704, "rewards_train/margins": 8.102208822965622, "rewards_train/rejected": -7.789606094360352, "step": 693 }, { "epoch": 0.34, "learning_rate": 9.89009759695218e-07, "loss": 0.0022, "step": 694 }, { "epoch": 0.34, "logps_train/chosen": -59.14018630981445, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -201.9950408935547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5048288702964783, "rewards_train/margins": 8.39471572637558, "rewards_train/rejected": -7.889886856079102, "step": 694 }, { "epoch": 0.34, "learning_rate": 9.88953099549466e-07, "loss": 0.0013, "step": 695 }, { "epoch": 0.34, "logps_train/chosen": -55.349674224853516, "logps_train/ref_chosen": -59.71875, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -194.9095458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4363706111907959, "rewards_train/margins": 7.9501283168792725, "rewards_train/rejected": -7.513757705688477, "step": 695 }, { "epoch": 0.34, "learning_rate": 9.888962953548314e-07, "loss": 0.0029, "step": 696 }, { "epoch": 0.34, "logps_train/chosen": -58.871002197265625, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -206.73361206054688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37810948491096497, "rewards_train/margins": 8.33096268773079, "rewards_train/rejected": -7.952853202819824, "step": 696 }, { "epoch": 0.34, "learning_rate": 9.888393471280492e-07, "loss": 0.0013, "step": 697 }, { "epoch": 0.34, "logps_train/chosen": -60.809776306152344, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -202.4693603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41907134652137756, "rewards_train/margins": 8.141398578882217, "rewards_train/rejected": -7.72232723236084, "step": 697 }, { "epoch": 0.34, "learning_rate": 9.887822548858967e-07, "loss": 0.002, "step": 698 }, { "epoch": 0.34, "logps_train/chosen": -57.31993865966797, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -196.20977783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.47039860486984253, "rewards_train/margins": 8.007684409618378, "rewards_train/rejected": -7.537285804748535, "step": 698 }, { "epoch": 0.34, "learning_rate": 9.88725018645194e-07, "loss": 0.0022, "step": 699 }, { "epoch": 0.34, "logps_train/chosen": -60.66489028930664, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -198.39053344726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3012356758117676, "rewards_train/margins": 7.944928169250488, "rewards_train/rejected": -7.643692493438721, "step": 699 }, { "epoch": 0.34, "learning_rate": 9.886676384228025e-07, "loss": 0.0045, "step": 700 }, { "epoch": 0.34, "logps_train/chosen": -60.407440185546875, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -216.44503784179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5038846731185913, "rewards_train/margins": 9.016553282737732, "rewards_train/rejected": -8.51266860961914, "step": 700 }, { "epoch": 0.35, "learning_rate": 9.886101142356276e-07, "loss": 0.0008, "step": 701 }, { "epoch": 0.35, "logps_train/chosen": -59.849388122558594, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -197.71310424804688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4551003575325012, "rewards_train/margins": 8.064594566822052, "rewards_train/rejected": -7.609494209289551, "step": 701 }, { "epoch": 0.35, "learning_rate": 9.88552446100616e-07, "loss": 0.0042, "step": 702 }, { "epoch": 0.35, "logps_train/chosen": -61.82737731933594, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -205.842041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43152010440826416, "rewards_train/margins": 8.125244736671448, "rewards_train/rejected": -7.693724632263184, "step": 702 }, { "epoch": 0.35, "learning_rate": 9.884946340347573e-07, "loss": 0.0026, "step": 703 }, { "epoch": 0.35, "logps_train/chosen": -61.168792724609375, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -213.53411865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2715485692024231, "rewards_train/margins": 8.605379402637482, "rewards_train/rejected": -8.333830833435059, "step": 703 }, { "epoch": 0.35, "learning_rate": 9.88436678055083e-07, "loss": 0.0029, "step": 704 }, { "epoch": 0.35, "logps_train/chosen": -59.47576141357422, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -200.3251190185547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3247382640838623, "rewards_train/margins": 8.046410322189331, "rewards_train/rejected": -7.721672058105469, "step": 704 }, { "epoch": 0.35, "learning_rate": 9.883785781786676e-07, "loss": 0.0034, "step": 705 }, { "epoch": 0.35, "logps_train/chosen": -60.17035675048828, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -207.85418701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4200248718261719, "rewards_train/margins": 8.536205291748047, "rewards_train/rejected": -8.116180419921875, "step": 705 }, { "epoch": 0.35, "learning_rate": 9.883203344226275e-07, "loss": 0.0022, "step": 706 }, { "epoch": 0.35, "logps_train/chosen": -60.94786834716797, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -205.20440673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2768440842628479, "rewards_train/margins": 8.324138700962067, "rewards_train/rejected": -8.047294616699219, "step": 706 }, { "epoch": 0.35, "learning_rate": 9.882619468041218e-07, "loss": 0.0023, "step": 707 }, { "epoch": 0.35, "logps_train/chosen": -58.87146759033203, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -198.22796630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4884880781173706, "rewards_train/margins": 8.052448153495789, "rewards_train/rejected": -7.563960075378418, "step": 707 }, { "epoch": 0.35, "learning_rate": 9.88203415340352e-07, "loss": 0.0029, "step": 708 }, { "epoch": 0.35, "logps_train/chosen": -59.45738983154297, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -206.4313507080078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5566539764404297, "rewards_train/margins": 8.585140228271484, "rewards_train/rejected": -8.028486251831055, "step": 708 }, { "epoch": 0.35, "learning_rate": 9.881447400485617e-07, "loss": 0.0016, "step": 709 }, { "epoch": 0.35, "logps_train/chosen": -55.36030960083008, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -201.5955810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6005902290344238, "rewards_train/margins": 8.125481128692627, "rewards_train/rejected": -7.524890899658203, "step": 709 }, { "epoch": 0.35, "learning_rate": 9.88085920946037e-07, "loss": 0.0023, "step": 710 }, { "epoch": 0.35, "logps_train/chosen": -59.91864013671875, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -212.39898681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3847962021827698, "rewards_train/margins": 8.551258146762848, "rewards_train/rejected": -8.166461944580078, "step": 710 }, { "epoch": 0.35, "learning_rate": 9.880269580501065e-07, "loss": 0.0019, "step": 711 }, { "epoch": 0.35, "logps_train/chosen": -63.17888641357422, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -201.7763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22742398083209991, "rewards_train/margins": 7.9990061074495316, "rewards_train/rejected": -7.771582126617432, "step": 711 }, { "epoch": 0.35, "learning_rate": 9.879678513781414e-07, "loss": 0.003, "step": 712 }, { "epoch": 0.35, "logps_train/chosen": -58.46746063232422, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -203.12359619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4931466579437256, "rewards_train/margins": 8.362292528152466, "rewards_train/rejected": -7.86914587020874, "step": 712 }, { "epoch": 0.35, "learning_rate": 9.879086009475543e-07, "loss": 0.0056, "step": 713 }, { "epoch": 0.35, "logps_train/chosen": -58.14927673339844, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -199.9190673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5193010568618774, "rewards_train/margins": 8.207496285438538, "rewards_train/rejected": -7.68819522857666, "step": 713 }, { "epoch": 0.35, "learning_rate": 9.878492067758012e-07, "loss": 0.0035, "step": 714 }, { "epoch": 0.35, "logps_train/chosen": -60.86881637573242, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -204.7222900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42583799362182617, "rewards_train/margins": 8.317646980285645, "rewards_train/rejected": -7.891808986663818, "step": 714 }, { "epoch": 0.35, "learning_rate": 9.877896688803797e-07, "loss": 0.0023, "step": 715 }, { "epoch": 0.35, "logps_train/chosen": -59.213775634765625, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -207.15277099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4637296795845032, "rewards_train/margins": 8.721487820148468, "rewards_train/rejected": -8.257758140563965, "step": 715 }, { "epoch": 0.35, "learning_rate": 9.877299872788302e-07, "loss": 0.0037, "step": 716 }, { "epoch": 0.35, "logps_train/chosen": -57.24147033691406, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -200.71661376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4134509265422821, "rewards_train/margins": 8.31856033205986, "rewards_train/rejected": -7.905109405517578, "step": 716 }, { "epoch": 0.35, "learning_rate": 9.876701619887356e-07, "loss": 0.0017, "step": 717 }, { "epoch": 0.35, "logps_train/chosen": -62.47156524658203, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -208.921142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3139275908470154, "rewards_train/margins": 8.348961770534515, "rewards_train/rejected": -8.0350341796875, "step": 717 }, { "epoch": 0.35, "learning_rate": 9.876101930277207e-07, "loss": 0.0015, "step": 718 }, { "epoch": 0.35, "logps_train/chosen": -61.02459716796875, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -210.17764282226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37444472312927246, "rewards_train/margins": 8.577171087265015, "rewards_train/rejected": -8.202726364135742, "step": 718 }, { "epoch": 0.35, "learning_rate": 9.875500804134525e-07, "loss": 0.0014, "step": 719 }, { "epoch": 0.35, "logps_train/chosen": -61.97587203979492, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -196.44528198242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2815144956111908, "rewards_train/margins": 7.706803649663925, "rewards_train/rejected": -7.425289154052734, "step": 719 }, { "epoch": 0.35, "learning_rate": 9.874898241636406e-07, "loss": 0.0025, "step": 720 }, { "epoch": 0.35, "logps_train/chosen": -57.54962158203125, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -207.49627685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44118043780326843, "rewards_train/margins": 8.555845469236374, "rewards_train/rejected": -8.114665031433105, "step": 720 }, { "epoch": 0.35, "learning_rate": 9.874294242960372e-07, "loss": 0.0014, "step": 721 }, { "epoch": 0.35, "logps_train/chosen": -61.40202713012695, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -211.47776794433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4977854788303375, "rewards_train/margins": 8.695856004953384, "rewards_train/rejected": -8.198070526123047, "step": 721 }, { "epoch": 0.36, "learning_rate": 9.873688808284364e-07, "loss": 0.0019, "step": 722 }, { "epoch": 0.36, "logps_train/chosen": -64.18836975097656, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -134.875, "logps_train/rejected": -223.3043212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3938097655773163, "rewards_train/margins": 9.23449656367302, "rewards_train/rejected": -8.840686798095703, "step": 722 }, { "epoch": 0.36, "learning_rate": 9.873081937786745e-07, "loss": 0.0008, "step": 723 }, { "epoch": 0.36, "logps_train/chosen": -57.554630279541016, "logps_train/ref_chosen": -60.78125, "logps_train/ref_rejected": -117.8125, "logps_train/rejected": -193.6439666748047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.32363834977149963, "rewards_train/margins": 7.905954748392105, "rewards_train/rejected": -7.5823163986206055, "step": 723 }, { "epoch": 0.36, "learning_rate": 9.872473631646306e-07, "loss": 0.0045, "step": 724 }, { "epoch": 0.36, "logps_train/chosen": -59.25739288330078, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -203.3929901123047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4783622622489929, "rewards_train/margins": 8.32820862531662, "rewards_train/rejected": -7.849846363067627, "step": 724 }, { "epoch": 0.36, "learning_rate": 9.871863890042255e-07, "loss": 0.0017, "step": 725 }, { "epoch": 0.36, "logps_train/chosen": -64.86290740966797, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -214.76910400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14818227291107178, "rewards_train/margins": 8.522944808006287, "rewards_train/rejected": -8.374762535095215, "step": 725 }, { "epoch": 0.36, "learning_rate": 9.87125271315423e-07, "loss": 0.0014, "step": 726 }, { "epoch": 0.36, "logps_train/chosen": -63.65525817871094, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -212.31072998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.32465946674346924, "rewards_train/margins": 8.558955311775208, "rewards_train/rejected": -8.234295845031738, "step": 726 }, { "epoch": 0.36, "learning_rate": 9.870640101162283e-07, "loss": 0.0016, "step": 727 }, { "epoch": 0.36, "logps_train/chosen": -62.58756637573242, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -203.716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24270808696746826, "rewards_train/margins": 8.140168070793152, "rewards_train/rejected": -7.897459983825684, "step": 727 }, { "epoch": 0.36, "learning_rate": 9.870026054246899e-07, "loss": 0.0022, "step": 728 }, { "epoch": 0.36, "logps_train/chosen": -61.426414489746094, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -208.9432373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4296974539756775, "rewards_train/margins": 8.78173679113388, "rewards_train/rejected": -8.352039337158203, "step": 728 }, { "epoch": 0.36, "learning_rate": 9.869410572588977e-07, "loss": 0.003, "step": 729 }, { "epoch": 0.36, "logps_train/chosen": -63.44178009033203, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -205.13079833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11851692199707031, "rewards_train/margins": 8.163480758666992, "rewards_train/rejected": -8.044963836669922, "step": 729 }, { "epoch": 0.36, "learning_rate": 9.868793656369843e-07, "loss": 0.0017, "step": 730 }, { "epoch": 0.36, "logps_train/chosen": -62.74378967285156, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -206.8262939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24290591478347778, "rewards_train/margins": 8.262547791004181, "rewards_train/rejected": -8.019641876220703, "step": 730 }, { "epoch": 0.36, "learning_rate": 9.868175305771243e-07, "loss": 0.004, "step": 731 }, { "epoch": 0.36, "logps_train/chosen": -60.43424606323242, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -211.32485961914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4484698176383972, "rewards_train/margins": 8.722995460033417, "rewards_train/rejected": -8.27452564239502, "step": 731 }, { "epoch": 0.36, "learning_rate": 9.86755552097535e-07, "loss": 0.0015, "step": 732 }, { "epoch": 0.36, "logps_train/chosen": -60.476478576660156, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -204.89727783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.36841660737991333, "rewards_train/margins": 8.289442837238312, "rewards_train/rejected": -7.921026229858398, "step": 732 }, { "epoch": 0.36, "learning_rate": 9.866934302164755e-07, "loss": 0.0023, "step": 733 }, { "epoch": 0.36, "logps_train/chosen": -62.072330474853516, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -211.2813720703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34327971935272217, "rewards_train/margins": 8.711946368217468, "rewards_train/rejected": -8.368666648864746, "step": 733 }, { "epoch": 0.36, "learning_rate": 9.866311649522473e-07, "loss": 0.0026, "step": 734 }, { "epoch": 0.36, "logps_train/chosen": -62.128231048583984, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -134.375, "logps_train/rejected": -218.27313232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5226750373840332, "rewards_train/margins": 8.906336307525635, "rewards_train/rejected": -8.383661270141602, "step": 734 }, { "epoch": 0.36, "learning_rate": 9.865687563231943e-07, "loss": 0.0024, "step": 735 }, { "epoch": 0.36, "logps_train/chosen": -62.698150634765625, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -207.36529541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28775349259376526, "rewards_train/margins": 8.396939665079117, "rewards_train/rejected": -8.109186172485352, "step": 735 }, { "epoch": 0.36, "learning_rate": 9.865062043477024e-07, "loss": 0.002, "step": 736 }, { "epoch": 0.36, "logps_train/chosen": -59.717750549316406, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -212.00367736816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5005882978439331, "rewards_train/margins": 8.952811121940613, "rewards_train/rejected": -8.45222282409668, "step": 736 }, { "epoch": 0.36, "learning_rate": 9.864435090441998e-07, "loss": 0.0015, "step": 737 }, { "epoch": 0.36, "logps_train/chosen": -58.867698669433594, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -204.65643310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3770486116409302, "rewards_train/margins": 8.519450306892395, "rewards_train/rejected": -8.142401695251465, "step": 737 }, { "epoch": 0.36, "learning_rate": 9.863806704311573e-07, "loss": 0.0016, "step": 738 }, { "epoch": 0.36, "logps_train/chosen": -61.625282287597656, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -216.657958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5567099452018738, "rewards_train/margins": 9.041646659374237, "rewards_train/rejected": -8.484936714172363, "step": 738 }, { "epoch": 0.36, "learning_rate": 9.86317688527087e-07, "loss": 0.0013, "step": 739 }, { "epoch": 0.36, "logps_train/chosen": -59.33039093017578, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -211.35000610351562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3732595443725586, "rewards_train/margins": 8.917145729064941, "rewards_train/rejected": -8.543886184692383, "step": 739 }, { "epoch": 0.36, "learning_rate": 9.862545633505442e-07, "loss": 0.0015, "step": 740 }, { "epoch": 0.36, "logps_train/chosen": -62.05091857910156, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -214.26593017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23704728484153748, "rewards_train/margins": 8.758561223745346, "rewards_train/rejected": -8.521513938903809, "step": 740 }, { "epoch": 0.36, "learning_rate": 9.86191294920126e-07, "loss": 0.0011, "step": 741 }, { "epoch": 0.36, "logps_train/chosen": -60.73317337036133, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -206.35635375976562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3375224471092224, "rewards_train/margins": 8.543079316616058, "rewards_train/rejected": -8.205556869506836, "step": 741 }, { "epoch": 0.37, "learning_rate": 9.861278832544716e-07, "loss": 0.002, "step": 742 }, { "epoch": 0.37, "logps_train/chosen": -57.295806884765625, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -202.99758911132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5397067070007324, "rewards_train/margins": 8.508947372436523, "rewards_train/rejected": -7.969240665435791, "step": 742 }, { "epoch": 0.37, "learning_rate": 9.860643283722625e-07, "loss": 0.0036, "step": 743 }, { "epoch": 0.37, "logps_train/chosen": -57.488372802734375, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -205.86724853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4499421715736389, "rewards_train/margins": 8.743991434574127, "rewards_train/rejected": -8.294049263000488, "step": 743 }, { "epoch": 0.37, "learning_rate": 9.860006302922223e-07, "loss": 0.0012, "step": 744 }, { "epoch": 0.37, "logps_train/chosen": -58.439918518066406, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -204.16677856445312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4866233468055725, "rewards_train/margins": 8.727813303470612, "rewards_train/rejected": -8.241189956665039, "step": 744 }, { "epoch": 0.37, "learning_rate": 9.859367890331172e-07, "loss": 0.0014, "step": 745 }, { "epoch": 0.37, "logps_train/chosen": -62.54285430908203, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -211.67498779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34688639640808105, "rewards_train/margins": 8.795536756515503, "rewards_train/rejected": -8.448650360107422, "step": 745 }, { "epoch": 0.37, "learning_rate": 9.858728046137548e-07, "loss": 0.0007, "step": 746 }, { "epoch": 0.37, "logps_train/chosen": -56.615135192871094, "logps_train/ref_chosen": -61.15625, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -201.74005126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4536232352256775, "rewards_train/margins": 8.505167424678802, "rewards_train/rejected": -8.051544189453125, "step": 746 }, { "epoch": 0.37, "learning_rate": 9.85808677052986e-07, "loss": 0.0042, "step": 747 }, { "epoch": 0.37, "logps_train/chosen": -58.72490692138672, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -201.61196899414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42116159200668335, "rewards_train/margins": 8.331479012966156, "rewards_train/rejected": -7.910317420959473, "step": 747 }, { "epoch": 0.37, "learning_rate": 9.857444063697024e-07, "loss": 0.0017, "step": 748 }, { "epoch": 0.37, "logps_train/chosen": -59.21885681152344, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -210.1364288330078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4813368618488312, "rewards_train/margins": 8.962363511323929, "rewards_train/rejected": -8.481026649475098, "step": 748 }, { "epoch": 0.37, "learning_rate": 9.856799925828391e-07, "loss": 0.0009, "step": 749 }, { "epoch": 0.37, "logps_train/chosen": -60.667320251464844, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -208.85699462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5086097121238708, "rewards_train/margins": 8.615305840969086, "rewards_train/rejected": -8.106696128845215, "step": 749 }, { "epoch": 0.37, "learning_rate": 9.85615435711373e-07, "loss": 0.0023, "step": 750 }, { "epoch": 0.37, "logps_train/chosen": -63.11549377441406, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -219.67910766601562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33522823452949524, "rewards_train/margins": 9.0547516644001, "rewards_train/rejected": -8.719523429870605, "step": 750 }, { "epoch": 0.37, "learning_rate": 9.855507357743224e-07, "loss": 0.0027, "step": 751 }, { "epoch": 0.37, "logps_train/chosen": -64.30821990966797, "logps_train/ref_chosen": -69.125, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -216.11923217773438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4801640510559082, "rewards_train/margins": 8.946238994598389, "rewards_train/rejected": -8.46607494354248, "step": 751 }, { "epoch": 0.37, "learning_rate": 9.854858927907489e-07, "loss": 0.0015, "step": 752 }, { "epoch": 0.37, "logps_train/chosen": -60.51630401611328, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -206.1034698486328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26360398530960083, "rewards_train/margins": 8.542751133441925, "rewards_train/rejected": -8.279147148132324, "step": 752 }, { "epoch": 0.37, "learning_rate": 9.854209067797552e-07, "loss": 0.0031, "step": 753 }, { "epoch": 0.37, "logps_train/chosen": -61.303184509277344, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -205.94544982910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1619182527065277, "rewards_train/margins": 8.707294076681137, "rewards_train/rejected": -8.54537582397461, "step": 753 }, { "epoch": 0.37, "learning_rate": 9.85355777760487e-07, "loss": 0.0027, "step": 754 }, { "epoch": 0.37, "logps_train/chosen": -60.1971435546875, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -202.72549438476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.40211188793182373, "rewards_train/margins": 8.40327537059784, "rewards_train/rejected": -8.001163482666016, "step": 754 }, { "epoch": 0.37, "learning_rate": 9.852905057521317e-07, "loss": 0.0012, "step": 755 }, { "epoch": 0.37, "logps_train/chosen": -61.501487731933594, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -215.7353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.388181209564209, "rewards_train/margins": 8.915624141693115, "rewards_train/rejected": -8.527442932128906, "step": 755 }, { "epoch": 0.37, "learning_rate": 9.852250907739186e-07, "loss": 0.0009, "step": 756 }, { "epoch": 0.37, "logps_train/chosen": -62.47796630859375, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -212.390380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3447815179824829, "rewards_train/margins": 8.890557885169983, "rewards_train/rejected": -8.5457763671875, "step": 756 }, { "epoch": 0.37, "learning_rate": 9.851595328451196e-07, "loss": 0.002, "step": 757 }, { "epoch": 0.37, "logps_train/chosen": -60.553733825683594, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -211.35391235351562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21020278334617615, "rewards_train/margins": 8.653552621603012, "rewards_train/rejected": -8.443349838256836, "step": 757 }, { "epoch": 0.37, "learning_rate": 9.850938319850486e-07, "loss": 0.0041, "step": 758 }, { "epoch": 0.37, "logps_train/chosen": -59.796844482421875, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -212.81541442871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3734407424926758, "rewards_train/margins": 9.08330249786377, "rewards_train/rejected": -8.709861755371094, "step": 758 }, { "epoch": 0.37, "learning_rate": 9.850279882130612e-07, "loss": 0.0021, "step": 759 }, { "epoch": 0.37, "logps_train/chosen": -63.13201141357422, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -211.11912536621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11970912665128708, "rewards_train/margins": 8.843487851321697, "rewards_train/rejected": -8.72377872467041, "step": 759 }, { "epoch": 0.37, "learning_rate": 9.849620015485557e-07, "loss": 0.0023, "step": 760 }, { "epoch": 0.37, "logps_train/chosen": -60.81391906738281, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -209.11672973632812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3148483335971832, "rewards_train/margins": 8.785553365945816, "rewards_train/rejected": -8.470705032348633, "step": 760 }, { "epoch": 0.37, "learning_rate": 9.848958720109724e-07, "loss": 0.002, "step": 761 }, { "epoch": 0.37, "logps_train/chosen": -61.14708709716797, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -209.07135009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38324055075645447, "rewards_train/margins": 8.890131801366806, "rewards_train/rejected": -8.506891250610352, "step": 761 }, { "epoch": 0.38, "learning_rate": 9.84829599619793e-07, "loss": 0.006, "step": 762 }, { "epoch": 0.38, "logps_train/chosen": -61.730770111083984, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -208.67062377929688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3022158443927765, "rewards_train/margins": 8.84886959195137, "rewards_train/rejected": -8.546653747558594, "step": 762 }, { "epoch": 0.38, "learning_rate": 9.84763184394542e-07, "loss": 0.0007, "step": 763 }, { "epoch": 0.38, "logps_train/chosen": -60.869964599609375, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -208.2559051513672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1116364598274231, "rewards_train/margins": 8.758028328418732, "rewards_train/rejected": -8.646391868591309, "step": 763 }, { "epoch": 0.38, "learning_rate": 9.846966263547858e-07, "loss": 0.0022, "step": 764 }, { "epoch": 0.38, "logps_train/chosen": -65.80349731445312, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -221.2965087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.18776541948318481, "rewards_train/margins": 9.143442451953888, "rewards_train/rejected": -8.955677032470703, "step": 764 }, { "epoch": 0.38, "learning_rate": 9.84629925520133e-07, "loss": 0.001, "step": 765 }, { "epoch": 0.38, "logps_train/chosen": -60.860862731933594, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -209.85940551757812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33537378907203674, "rewards_train/margins": 8.867163568735123, "rewards_train/rejected": -8.531789779663086, "step": 765 }, { "epoch": 0.38, "learning_rate": 9.84563081910234e-07, "loss": 0.0016, "step": 766 }, { "epoch": 0.38, "logps_train/chosen": -62.024810791015625, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -213.50689697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27339768409729004, "rewards_train/margins": 9.079849481582642, "rewards_train/rejected": -8.806451797485352, "step": 766 }, { "epoch": 0.38, "learning_rate": 9.844960955447813e-07, "loss": 0.0008, "step": 767 }, { "epoch": 0.38, "logps_train/chosen": -62.86653137207031, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -218.85533142089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43414735794067383, "rewards_train/margins": 9.288821697235107, "rewards_train/rejected": -8.854674339294434, "step": 767 }, { "epoch": 0.38, "learning_rate": 9.844289664435095e-07, "loss": 0.0009, "step": 768 }, { "epoch": 0.38, "logps_train/chosen": -61.816619873046875, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -211.4818878173828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19846491515636444, "rewards_train/margins": 8.83766894042492, "rewards_train/rejected": -8.639204025268555, "step": 768 }, { "epoch": 0.38, "learning_rate": 9.843616946261954e-07, "loss": 0.0007, "step": 769 }, { "epoch": 0.38, "logps_train/chosen": -63.59496307373047, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -134.5, "logps_train/rejected": -225.56964111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.36003512144088745, "rewards_train/margins": 9.467878520488739, "rewards_train/rejected": -9.107843399047852, "step": 769 }, { "epoch": 0.38, "learning_rate": 9.842942801126578e-07, "loss": 0.0004, "step": 770 }, { "epoch": 0.38, "logps_train/chosen": -61.86162185668945, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -216.09671020507812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4107617139816284, "rewards_train/margins": 9.145237803459167, "rewards_train/rejected": -8.734476089477539, "step": 770 }, { "epoch": 0.38, "learning_rate": 9.842267229227573e-07, "loss": 0.0018, "step": 771 }, { "epoch": 0.38, "logps_train/chosen": -61.22445297241211, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -209.13392639160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3806794285774231, "rewards_train/margins": 8.69612342119217, "rewards_train/rejected": -8.315443992614746, "step": 771 }, { "epoch": 0.38, "learning_rate": 9.841590230763968e-07, "loss": 0.0009, "step": 772 }, { "epoch": 0.38, "logps_train/chosen": -60.19348907470703, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -205.034912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2091178447008133, "rewards_train/margins": 8.566075280308723, "rewards_train/rejected": -8.35695743560791, "step": 772 }, { "epoch": 0.38, "learning_rate": 9.84091180593521e-07, "loss": 0.0034, "step": 773 }, { "epoch": 0.38, "logps_train/chosen": -62.96300506591797, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -218.2657470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3633672595024109, "rewards_train/margins": 9.029590785503387, "rewards_train/rejected": -8.666223526000977, "step": 773 }, { "epoch": 0.38, "learning_rate": 9.840231954941169e-07, "loss": 0.0018, "step": 774 }, { "epoch": 0.38, "logps_train/chosen": -61.84612274169922, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -202.28024291992188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22212591767311096, "rewards_train/margins": 8.196879774332047, "rewards_train/rejected": -7.9747538566589355, "step": 774 }, { "epoch": 0.38, "learning_rate": 9.839550677982135e-07, "loss": 0.004, "step": 775 }, { "epoch": 0.38, "logps_train/chosen": -58.14816665649414, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -209.6063232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4282498359680176, "rewards_train/margins": 8.937904834747314, "rewards_train/rejected": -8.509654998779297, "step": 775 }, { "epoch": 0.38, "learning_rate": 9.838867975258813e-07, "loss": 0.0022, "step": 776 }, { "epoch": 0.38, "logps_train/chosen": -60.518531799316406, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -218.78465270996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4260767996311188, "rewards_train/margins": 9.50039091706276, "rewards_train/rejected": -9.07431411743164, "step": 776 }, { "epoch": 0.38, "learning_rate": 9.838183846972337e-07, "loss": 0.0013, "step": 777 }, { "epoch": 0.38, "logps_train/chosen": -63.58489990234375, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -215.48007202148438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3535218834877014, "rewards_train/margins": 9.056803286075592, "rewards_train/rejected": -8.70328140258789, "step": 777 }, { "epoch": 0.38, "learning_rate": 9.83749829332425e-07, "loss": 0.0011, "step": 778 }, { "epoch": 0.38, "logps_train/chosen": -61.30426025390625, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -210.9999237060547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3205506205558777, "rewards_train/margins": 9.040514647960663, "rewards_train/rejected": -8.719964027404785, "step": 778 }, { "epoch": 0.38, "learning_rate": 9.836811314516524e-07, "loss": 0.0026, "step": 779 }, { "epoch": 0.38, "logps_train/chosen": -60.73907470703125, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -209.64007568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39884668588638306, "rewards_train/margins": 8.892542898654938, "rewards_train/rejected": -8.493696212768555, "step": 779 }, { "epoch": 0.38, "learning_rate": 9.836122910751547e-07, "loss": 0.0017, "step": 780 }, { "epoch": 0.38, "logps_train/chosen": -62.15573501586914, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -211.32508850097656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21396753191947937, "rewards_train/margins": 8.917960375547409, "rewards_train/rejected": -8.70399284362793, "step": 780 }, { "epoch": 0.38, "learning_rate": 9.835433082232128e-07, "loss": 0.0014, "step": 781 }, { "epoch": 0.38, "logps_train/chosen": -62.442298889160156, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -209.68289184570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3270101845264435, "rewards_train/margins": 8.843298941850662, "rewards_train/rejected": -8.516288757324219, "step": 781 }, { "epoch": 0.39, "learning_rate": 9.834741829161495e-07, "loss": 0.0016, "step": 782 }, { "epoch": 0.39, "logps_train/chosen": -64.47712707519531, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -214.35260009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2750900983810425, "rewards_train/margins": 8.891356348991394, "rewards_train/rejected": -8.616266250610352, "step": 782 }, { "epoch": 0.39, "learning_rate": 9.834049151743295e-07, "loss": 0.0016, "step": 783 }, { "epoch": 0.39, "logps_train/chosen": -63.98161315917969, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -213.64889526367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3386058211326599, "rewards_train/margins": 8.967069566249847, "rewards_train/rejected": -8.628463745117188, "step": 783 }, { "epoch": 0.39, "learning_rate": 9.833355050181597e-07, "loss": 0.0012, "step": 784 }, { "epoch": 0.39, "logps_train/chosen": -62.28568649291992, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -205.78497314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12609468400478363, "rewards_train/margins": 8.357717379927635, "rewards_train/rejected": -8.231622695922852, "step": 784 }, { "epoch": 0.39, "learning_rate": 9.832659524680885e-07, "loss": 0.0028, "step": 785 }, { "epoch": 0.39, "logps_train/chosen": -63.6889533996582, "logps_train/ref_chosen": -68.75, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -215.3228302001953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.50380939245224, "rewards_train/margins": 8.989413678646088, "rewards_train/rejected": -8.485604286193848, "step": 785 }, { "epoch": 0.39, "learning_rate": 9.831962575446069e-07, "loss": 0.0017, "step": 786 }, { "epoch": 0.39, "logps_train/chosen": -60.307247161865234, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -212.50457763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.30550581216812134, "rewards_train/margins": 9.00240045785904, "rewards_train/rejected": -8.696894645690918, "step": 786 }, { "epoch": 0.39, "learning_rate": 9.831264202682472e-07, "loss": 0.0028, "step": 787 }, { "epoch": 0.39, "logps_train/chosen": -60.10148620605469, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -212.247314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3815992474555969, "rewards_train/margins": 8.895590603351593, "rewards_train/rejected": -8.513991355895996, "step": 787 }, { "epoch": 0.39, "learning_rate": 9.83056440659584e-07, "loss": 0.0011, "step": 788 }, { "epoch": 0.39, "logps_train/chosen": -61.129634857177734, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -212.1220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4110599756240845, "rewards_train/margins": 9.103931069374084, "rewards_train/rejected": -8.69287109375, "step": 788 }, { "epoch": 0.39, "learning_rate": 9.82986318739234e-07, "loss": 0.0041, "step": 789 }, { "epoch": 0.39, "logps_train/chosen": -65.55523681640625, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -219.39691162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.006719857454299927, "rewards_train/margins": 9.240977972745895, "rewards_train/rejected": -9.247697830200195, "step": 789 }, { "epoch": 0.39, "learning_rate": 9.829160545278554e-07, "loss": 0.0012, "step": 790 }, { "epoch": 0.39, "logps_train/chosen": -59.464447021484375, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -202.80984497070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35834038257598877, "rewards_train/margins": 8.619013905525208, "rewards_train/rejected": -8.260673522949219, "step": 790 }, { "epoch": 0.39, "learning_rate": 9.828456480461486e-07, "loss": 0.0018, "step": 791 }, { "epoch": 0.39, "logps_train/chosen": -59.682960510253906, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -214.27548217773438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24420374631881714, "rewards_train/margins": 8.994944751262665, "rewards_train/rejected": -8.750741004943848, "step": 791 }, { "epoch": 0.39, "learning_rate": 9.827750993148556e-07, "loss": 0.0016, "step": 792 }, { "epoch": 0.39, "logps_train/chosen": -62.173179626464844, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -208.0142822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27120712399482727, "rewards_train/margins": 8.821073800325394, "rewards_train/rejected": -8.549866676330566, "step": 792 }, { "epoch": 0.39, "learning_rate": 9.827044083547607e-07, "loss": 0.001, "step": 793 }, { "epoch": 0.39, "logps_train/chosen": -63.515724182128906, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -211.73403930664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01991199143230915, "rewards_train/margins": 8.739800678566098, "rewards_train/rejected": -8.719888687133789, "step": 793 }, { "epoch": 0.39, "learning_rate": 9.826335751866901e-07, "loss": 0.0013, "step": 794 }, { "epoch": 0.39, "logps_train/chosen": -62.67654037475586, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -218.76002502441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5278292894363403, "rewards_train/margins": 9.499632954597473, "rewards_train/rejected": -8.971803665161133, "step": 794 }, { "epoch": 0.39, "learning_rate": 9.825625998315114e-07, "loss": 0.0005, "step": 795 }, { "epoch": 0.39, "logps_train/chosen": -61.50309371948242, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -216.90594482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3193683624267578, "rewards_train/margins": 9.243457794189453, "rewards_train/rejected": -8.924089431762695, "step": 795 }, { "epoch": 0.39, "learning_rate": 9.824914823101347e-07, "loss": 0.0011, "step": 796 }, { "epoch": 0.39, "logps_train/chosen": -61.16676330566406, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -213.00469970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38967156410217285, "rewards_train/margins": 9.008305788040161, "rewards_train/rejected": -8.618634223937988, "step": 796 }, { "epoch": 0.39, "learning_rate": 9.824202226435115e-07, "loss": 0.0014, "step": 797 }, { "epoch": 0.39, "logps_train/chosen": -59.36849594116211, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -206.5579071044922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2868565320968628, "rewards_train/margins": 8.894454836845398, "rewards_train/rejected": -8.607598304748535, "step": 797 }, { "epoch": 0.39, "learning_rate": 9.823488208526354e-07, "loss": 0.002, "step": 798 }, { "epoch": 0.39, "logps_train/chosen": -64.01417541503906, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -214.8851318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11967667937278748, "rewards_train/margins": 9.083482831716537, "rewards_train/rejected": -8.96380615234375, "step": 798 }, { "epoch": 0.39, "learning_rate": 9.82277276958542e-07, "loss": 0.0017, "step": 799 }, { "epoch": 0.39, "logps_train/chosen": -61.040279388427734, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -217.40078735351562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.36628445982933044, "rewards_train/margins": 9.244156926870346, "rewards_train/rejected": -8.877872467041016, "step": 799 }, { "epoch": 0.39, "learning_rate": 9.822055909823084e-07, "loss": 0.0007, "step": 800 }, { "epoch": 0.39, "logps_train/chosen": -58.51567459106445, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -210.9755096435547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.436909019947052, "rewards_train/margins": 9.170643150806427, "rewards_train/rejected": -8.733734130859375, "step": 800 }, { "epoch": 0.39, "learning_rate": 9.821337629450539e-07, "loss": 0.0011, "step": 801 }, { "epoch": 0.39, "logps_train/chosen": -60.53398895263672, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -219.57666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.40133732557296753, "rewards_train/margins": 9.406855285167694, "rewards_train/rejected": -9.005517959594727, "step": 801 }, { "epoch": 0.39, "learning_rate": 9.820617928679396e-07, "loss": 0.001, "step": 802 }, { "epoch": 0.39, "logps_train/chosen": -59.26292419433594, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -209.0196533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3232191205024719, "rewards_train/margins": 8.903699696063995, "rewards_train/rejected": -8.580480575561523, "step": 802 }, { "epoch": 0.4, "learning_rate": 9.81989680772168e-07, "loss": 0.0069, "step": 803 }, { "epoch": 0.4, "logps_train/chosen": -62.886863708496094, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -221.81988525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3787939250469208, "rewards_train/margins": 9.463613718748093, "rewards_train/rejected": -9.084819793701172, "step": 803 }, { "epoch": 0.4, "learning_rate": 9.819174266789845e-07, "loss": 0.001, "step": 804 }, { "epoch": 0.4, "logps_train/chosen": -61.911376953125, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -215.98208618164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3359372317790985, "rewards_train/margins": 9.294986456632614, "rewards_train/rejected": -8.959049224853516, "step": 804 }, { "epoch": 0.4, "learning_rate": 9.81845030609675e-07, "loss": 0.0017, "step": 805 }, { "epoch": 0.4, "logps_train/chosen": -59.905677795410156, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -219.31631469726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5174400806427002, "rewards_train/margins": 9.443408250808716, "rewards_train/rejected": -8.925968170166016, "step": 805 }, { "epoch": 0.4, "learning_rate": 9.81772492585568e-07, "loss": 0.0014, "step": 806 }, { "epoch": 0.4, "logps_train/chosen": -62.99930191040039, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -214.3626708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2982141375541687, "rewards_train/margins": 9.187508761882782, "rewards_train/rejected": -8.889294624328613, "step": 806 }, { "epoch": 0.4, "learning_rate": 9.816998126280343e-07, "loss": 0.0018, "step": 807 }, { "epoch": 0.4, "logps_train/chosen": -61.952362060546875, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -219.48468017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21609216928482056, "rewards_train/margins": 9.329306662082672, "rewards_train/rejected": -9.113214492797852, "step": 807 }, { "epoch": 0.4, "learning_rate": 9.816269907584849e-07, "loss": 0.0011, "step": 808 }, { "epoch": 0.4, "logps_train/chosen": -60.53691101074219, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -209.7062530517578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21410660445690155, "rewards_train/margins": 9.168229147791862, "rewards_train/rejected": -8.954122543334961, "step": 808 }, { "epoch": 0.4, "learning_rate": 9.815540269983744e-07, "loss": 0.001, "step": 809 }, { "epoch": 0.4, "logps_train/chosen": -61.72981643676758, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -212.71531677246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21813169121742249, "rewards_train/margins": 8.963784843683243, "rewards_train/rejected": -8.74565315246582, "step": 809 }, { "epoch": 0.4, "learning_rate": 9.81480921369198e-07, "loss": 0.0021, "step": 810 }, { "epoch": 0.4, "logps_train/chosen": -61.75092315673828, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -212.99134826660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19883327186107635, "rewards_train/margins": 9.088153645396233, "rewards_train/rejected": -8.889320373535156, "step": 810 }, { "epoch": 0.4, "learning_rate": 9.814076738924932e-07, "loss": 0.001, "step": 811 }, { "epoch": 0.4, "logps_train/chosen": -62.75382995605469, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -224.13465881347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3491290807723999, "rewards_train/margins": 9.582223296165466, "rewards_train/rejected": -9.233094215393066, "step": 811 }, { "epoch": 0.4, "learning_rate": 9.813342845898393e-07, "loss": 0.0006, "step": 812 }, { "epoch": 0.4, "logps_train/chosen": -61.35431671142578, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -219.5992431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26686298847198486, "rewards_train/margins": 9.534843564033508, "rewards_train/rejected": -9.267980575561523, "step": 812 }, { "epoch": 0.4, "learning_rate": 9.812607534828573e-07, "loss": 0.0011, "step": 813 }, { "epoch": 0.4, "logps_train/chosen": -63.43159484863281, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -215.67311096191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2838425040245056, "rewards_train/margins": 9.245148122310638, "rewards_train/rejected": -8.961305618286133, "step": 813 }, { "epoch": 0.4, "learning_rate": 9.8118708059321e-07, "loss": 0.0035, "step": 814 }, { "epoch": 0.4, "logps_train/chosen": -61.010406494140625, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -213.86549377441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4188326597213745, "rewards_train/margins": 9.45186698436737, "rewards_train/rejected": -9.033034324645996, "step": 814 }, { "epoch": 0.4, "learning_rate": 9.811132659426015e-07, "loss": 0.0014, "step": 815 }, { "epoch": 0.4, "logps_train/chosen": -63.95943832397461, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -210.10440063476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08979809284210205, "rewards_train/margins": 8.946772694587708, "rewards_train/rejected": -8.856974601745605, "step": 815 }, { "epoch": 0.4, "learning_rate": 9.810393095527788e-07, "loss": 0.0023, "step": 816 }, { "epoch": 0.4, "logps_train/chosen": -62.258724212646484, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -213.1415252685547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2809144854545593, "rewards_train/margins": 9.065427958965302, "rewards_train/rejected": -8.784513473510742, "step": 816 }, { "epoch": 0.4, "learning_rate": 9.80965211445529e-07, "loss": 0.001, "step": 817 }, { "epoch": 0.4, "logps_train/chosen": -61.755775451660156, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -215.50640869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.32237139344215393, "rewards_train/margins": 9.329213052988052, "rewards_train/rejected": -9.006841659545898, "step": 817 }, { "epoch": 0.4, "learning_rate": 9.80890971642683e-07, "loss": 0.0025, "step": 818 }, { "epoch": 0.4, "logps_train/chosen": -65.9439697265625, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -227.5027618408203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13890418410301208, "rewards_train/margins": 9.55031356215477, "rewards_train/rejected": -9.411409378051758, "step": 818 }, { "epoch": 0.4, "learning_rate": 9.808165901661117e-07, "loss": 0.0006, "step": 819 }, { "epoch": 0.4, "logps_train/chosen": -60.82221603393555, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -215.59674072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3004443645477295, "rewards_train/margins": 9.320078611373901, "rewards_train/rejected": -9.019634246826172, "step": 819 }, { "epoch": 0.4, "learning_rate": 9.807420670377283e-07, "loss": 0.0006, "step": 820 }, { "epoch": 0.4, "logps_train/chosen": -61.15678024291992, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -117.875, "logps_train/rejected": -203.93487548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13136765360832214, "rewards_train/margins": 8.735842674970627, "rewards_train/rejected": -8.604475021362305, "step": 820 }, { "epoch": 0.4, "learning_rate": 9.806674022794884e-07, "loss": 0.0014, "step": 821 }, { "epoch": 0.4, "logps_train/chosen": -58.15728759765625, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -217.4231719970703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46093180775642395, "rewards_train/margins": 9.564968138933182, "rewards_train/rejected": -9.104036331176758, "step": 821 }, { "epoch": 0.4, "learning_rate": 9.80592595913388e-07, "loss": 0.0014, "step": 822 }, { "epoch": 0.4, "logps_train/chosen": -64.46955871582031, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -220.6480712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14586631953716278, "rewards_train/margins": 9.341384813189507, "rewards_train/rejected": -9.195518493652344, "step": 822 }, { "epoch": 0.41, "learning_rate": 9.80517647961466e-07, "loss": 0.0014, "step": 823 }, { "epoch": 0.41, "logps_train/chosen": -61.8009033203125, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -210.32833862304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3474001884460449, "rewards_train/margins": 8.89820146560669, "rewards_train/rejected": -8.550801277160645, "step": 823 }, { "epoch": 0.41, "learning_rate": 9.804425584458025e-07, "loss": 0.0006, "step": 824 }, { "epoch": 0.41, "logps_train/chosen": -60.91278839111328, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -206.40753173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3179498612880707, "rewards_train/margins": 8.866077989339828, "rewards_train/rejected": -8.548128128051758, "step": 824 }, { "epoch": 0.41, "learning_rate": 9.803673273885193e-07, "loss": 0.0014, "step": 825 }, { "epoch": 0.41, "logps_train/chosen": -61.99397277832031, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -211.1306915283203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37028026580810547, "rewards_train/margins": 9.02915096282959, "rewards_train/rejected": -8.658870697021484, "step": 825 }, { "epoch": 0.41, "learning_rate": 9.802919548117803e-07, "loss": 0.0027, "step": 826 }, { "epoch": 0.41, "logps_train/chosen": -60.5269660949707, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -217.39495849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41805538535118103, "rewards_train/margins": 9.46511921286583, "rewards_train/rejected": -9.047063827514648, "step": 826 }, { "epoch": 0.41, "learning_rate": 9.802164407377903e-07, "loss": 0.0016, "step": 827 }, { "epoch": 0.41, "logps_train/chosen": -59.76904296875, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -217.2385711669922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3568359613418579, "rewards_train/margins": 9.54299795627594, "rewards_train/rejected": -9.186161994934082, "step": 827 }, { "epoch": 0.41, "learning_rate": 9.801407851887964e-07, "loss": 0.0012, "step": 828 }, { "epoch": 0.41, "logps_train/chosen": -63.258216857910156, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -220.1197509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10664908587932587, "rewards_train/margins": 9.206710502505302, "rewards_train/rejected": -9.100061416625977, "step": 828 }, { "epoch": 0.41, "learning_rate": 9.800649881870875e-07, "loss": 0.0012, "step": 829 }, { "epoch": 0.41, "logps_train/chosen": -58.46400451660156, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -212.9442138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5483261346817017, "rewards_train/margins": 9.359838128089905, "rewards_train/rejected": -8.811511993408203, "step": 829 }, { "epoch": 0.41, "learning_rate": 9.799890497549938e-07, "loss": 0.0009, "step": 830 }, { "epoch": 0.41, "logps_train/chosen": -59.34223937988281, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -213.3636474609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.25200653076171875, "rewards_train/margins": 9.301994323730469, "rewards_train/rejected": -9.04998779296875, "step": 830 }, { "epoch": 0.41, "learning_rate": 9.79912969914887e-07, "loss": 0.0018, "step": 831 }, { "epoch": 0.41, "logps_train/chosen": -58.824058532714844, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -209.95375061035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28116849064826965, "rewards_train/margins": 9.174444705247879, "rewards_train/rejected": -8.89327621459961, "step": 831 }, { "epoch": 0.41, "learning_rate": 9.79836748689181e-07, "loss": 0.0039, "step": 832 }, { "epoch": 0.41, "logps_train/chosen": -65.09578704833984, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -224.00424194335938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.162003293633461, "rewards_train/margins": 9.526880994439125, "rewards_train/rejected": -9.364877700805664, "step": 832 }, { "epoch": 0.41, "learning_rate": 9.79760386100331e-07, "loss": 0.0005, "step": 833 }, { "epoch": 0.41, "logps_train/chosen": -60.95245361328125, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -218.51953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3232608437538147, "rewards_train/margins": 9.366326868534088, "rewards_train/rejected": -9.043066024780273, "step": 833 }, { "epoch": 0.41, "learning_rate": 9.79683882170834e-07, "loss": 0.001, "step": 834 }, { "epoch": 0.41, "logps_train/chosen": -62.93451690673828, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -218.36041259765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3040582537651062, "rewards_train/margins": 9.393617808818817, "rewards_train/rejected": -9.089559555053711, "step": 834 }, { "epoch": 0.41, "learning_rate": 9.796072369232281e-07, "loss": 0.0007, "step": 835 }, { "epoch": 0.41, "logps_train/chosen": -60.34468460083008, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -215.08226013183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.32554149627685547, "rewards_train/margins": 9.333133697509766, "rewards_train/rejected": -9.00759220123291, "step": 835 }, { "epoch": 0.41, "learning_rate": 9.795304503800944e-07, "loss": 0.0018, "step": 836 }, { "epoch": 0.41, "logps_train/chosen": -65.0211410522461, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -216.42800903320312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08685062825679779, "rewards_train/margins": 9.1153454631567, "rewards_train/rejected": -9.028494834899902, "step": 836 }, { "epoch": 0.41, "learning_rate": 9.794535225640541e-07, "loss": 0.0013, "step": 837 }, { "epoch": 0.41, "logps_train/chosen": -58.23817443847656, "logps_train/ref_chosen": -60.5625, "logps_train/ref_rejected": -119.375, "logps_train/rejected": -206.1077880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23321397602558136, "rewards_train/margins": 8.905907228589058, "rewards_train/rejected": -8.672693252563477, "step": 837 }, { "epoch": 0.41, "learning_rate": 9.79376453497771e-07, "loss": 0.0019, "step": 838 }, { "epoch": 0.41, "logps_train/chosen": -61.605587005615234, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -218.167236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3026249408721924, "rewards_train/margins": 9.507827043533325, "rewards_train/rejected": -9.205202102661133, "step": 838 }, { "epoch": 0.41, "learning_rate": 9.792992432039496e-07, "loss": 0.001, "step": 839 }, { "epoch": 0.41, "logps_train/chosen": -58.687034606933594, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -210.42872619628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43696069717407227, "rewards_train/margins": 9.346189975738525, "rewards_train/rejected": -8.909229278564453, "step": 839 }, { "epoch": 0.41, "learning_rate": 9.792218917053374e-07, "loss": 0.0035, "step": 840 }, { "epoch": 0.41, "logps_train/chosen": -62.67466354370117, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -220.7412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35426217317581177, "rewards_train/margins": 9.436586201190948, "rewards_train/rejected": -9.082324028015137, "step": 840 }, { "epoch": 0.41, "learning_rate": 9.79144399024722e-07, "loss": 0.0008, "step": 841 }, { "epoch": 0.41, "logps_train/chosen": -59.004600524902344, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -206.81716918945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2629188299179077, "rewards_train/margins": 8.861286520957947, "rewards_train/rejected": -8.598367691040039, "step": 841 }, { "epoch": 0.41, "learning_rate": 9.790667651849337e-07, "loss": 0.002, "step": 842 }, { "epoch": 0.41, "logps_train/chosen": -61.83757019042969, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -214.4355010986328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.20896783471107483, "rewards_train/margins": 9.273758560419083, "rewards_train/rejected": -9.064790725708008, "step": 842 }, { "epoch": 0.42, "learning_rate": 9.789889902088433e-07, "loss": 0.0013, "step": 843 }, { "epoch": 0.42, "logps_train/chosen": -60.24078369140625, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -221.07955932617188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.18856826424598694, "rewards_train/margins": 9.587294727563858, "rewards_train/rejected": -9.398726463317871, "step": 843 }, { "epoch": 0.42, "learning_rate": 9.789110741193647e-07, "loss": 0.0023, "step": 844 }, { "epoch": 0.42, "logps_train/chosen": -63.73194885253906, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -223.51698303222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22602330148220062, "rewards_train/margins": 9.64041768014431, "rewards_train/rejected": -9.41439437866211, "step": 844 }, { "epoch": 0.42, "learning_rate": 9.788330169394518e-07, "loss": 0.0023, "step": 845 }, { "epoch": 0.42, "logps_train/chosen": -60.538795471191406, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -212.32080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.40888893604278564, "rewards_train/margins": 9.141360402107239, "rewards_train/rejected": -8.732471466064453, "step": 845 }, { "epoch": 0.42, "learning_rate": 9.787548186921014e-07, "loss": 0.0027, "step": 846 }, { "epoch": 0.42, "logps_train/chosen": -61.88732147216797, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -216.09622192382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2631723880767822, "rewards_train/margins": 9.417130708694458, "rewards_train/rejected": -9.153958320617676, "step": 846 }, { "epoch": 0.42, "learning_rate": 9.786764794003505e-07, "loss": 0.001, "step": 847 }, { "epoch": 0.42, "logps_train/chosen": -61.205467224121094, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -222.34979248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3133890628814697, "rewards_train/margins": 9.555595636367798, "rewards_train/rejected": -9.242206573486328, "step": 847 }, { "epoch": 0.42, "learning_rate": 9.78597999087279e-07, "loss": 0.0005, "step": 848 }, { "epoch": 0.42, "logps_train/chosen": -60.147216796875, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -208.1363525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1414552927017212, "rewards_train/margins": 8.821253418922424, "rewards_train/rejected": -8.679798126220703, "step": 848 }, { "epoch": 0.42, "learning_rate": 9.785193777760075e-07, "loss": 0.0024, "step": 849 }, { "epoch": 0.42, "logps_train/chosen": -61.02197265625, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -209.84664916992188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16215819120407104, "rewards_train/margins": 9.044674098491669, "rewards_train/rejected": -8.882515907287598, "step": 849 }, { "epoch": 0.42, "learning_rate": 9.784406154896982e-07, "loss": 0.0021, "step": 850 }, { "epoch": 0.42, "logps_train/chosen": -63.679534912109375, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -209.7552490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13854074478149414, "rewards_train/margins": 8.899367809295654, "rewards_train/rejected": -8.76082706451416, "step": 850 }, { "epoch": 0.42, "learning_rate": 9.783617122515553e-07, "loss": 0.0031, "step": 851 }, { "epoch": 0.42, "logps_train/chosen": -61.556663513183594, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -215.27490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3056126534938812, "rewards_train/margins": 9.299118131399155, "rewards_train/rejected": -8.993505477905273, "step": 851 }, { "epoch": 0.42, "learning_rate": 9.782826680848242e-07, "loss": 0.0016, "step": 852 }, { "epoch": 0.42, "logps_train/chosen": -58.84837341308594, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -117.875, "logps_train/rejected": -207.83038330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.31062158942222595, "rewards_train/margins": 9.30440267920494, "rewards_train/rejected": -8.993781089782715, "step": 852 }, { "epoch": 0.42, "learning_rate": 9.782034830127915e-07, "loss": 0.0017, "step": 853 }, { "epoch": 0.42, "logps_train/chosen": -63.53216552734375, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -210.96658325195312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10308229178190231, "rewards_train/margins": 8.924449555575848, "rewards_train/rejected": -8.821367263793945, "step": 853 }, { "epoch": 0.42, "learning_rate": 9.78124157058786e-07, "loss": 0.0032, "step": 854 }, { "epoch": 0.42, "logps_train/chosen": -62.93506622314453, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -227.62020874023438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.44103899598121643, "rewards_train/margins": 10.0741528570652, "rewards_train/rejected": -9.633113861083984, "step": 854 }, { "epoch": 0.42, "learning_rate": 9.780446902461775e-07, "loss": 0.0009, "step": 855 }, { "epoch": 0.42, "logps_train/chosen": -60.514625549316406, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -213.38229370117188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38857656717300415, "rewards_train/margins": 9.115770399570465, "rewards_train/rejected": -8.727193832397461, "step": 855 }, { "epoch": 0.42, "learning_rate": 9.77965082598378e-07, "loss": 0.0046, "step": 856 }, { "epoch": 0.42, "logps_train/chosen": -62.14764404296875, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -223.80752563476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19568440318107605, "rewards_train/margins": 9.888204544782639, "rewards_train/rejected": -9.692520141601562, "step": 856 }, { "epoch": 0.42, "learning_rate": 9.778853341388394e-07, "loss": 0.0007, "step": 857 }, { "epoch": 0.42, "logps_train/chosen": -63.454925537109375, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -222.26528930664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2717922031879425, "rewards_train/margins": 9.556915074586868, "rewards_train/rejected": -9.285122871398926, "step": 857 }, { "epoch": 0.42, "learning_rate": 9.778054448910569e-07, "loss": 0.0022, "step": 858 }, { "epoch": 0.42, "logps_train/chosen": -63.86195755004883, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -221.9735107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11561109125614166, "rewards_train/margins": 9.485911384224892, "rewards_train/rejected": -9.37030029296875, "step": 858 }, { "epoch": 0.42, "learning_rate": 9.777254148785663e-07, "loss": 0.0015, "step": 859 }, { "epoch": 0.42, "logps_train/chosen": -59.70668411254883, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -206.5618896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2580423951148987, "rewards_train/margins": 8.847043097019196, "rewards_train/rejected": -8.589000701904297, "step": 859 }, { "epoch": 0.42, "learning_rate": 9.776452441249447e-07, "loss": 0.0017, "step": 860 }, { "epoch": 0.42, "logps_train/chosen": -63.625946044921875, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -222.3825225830078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15871915221214294, "rewards_train/margins": 9.53198155760765, "rewards_train/rejected": -9.373262405395508, "step": 860 }, { "epoch": 0.42, "learning_rate": 9.775649326538114e-07, "loss": 0.0025, "step": 861 }, { "epoch": 0.42, "logps_train/chosen": -62.98698043823242, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -225.53424072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19471010565757751, "rewards_train/margins": 9.836611121892929, "rewards_train/rejected": -9.641901016235352, "step": 861 }, { "epoch": 0.42, "learning_rate": 9.774844804888263e-07, "loss": 0.0012, "step": 862 }, { "epoch": 0.42, "logps_train/chosen": -62.91159439086914, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -218.04330444335938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14587660133838654, "rewards_train/margins": 9.36490412056446, "rewards_train/rejected": -9.219027519226074, "step": 862 }, { "epoch": 0.42, "learning_rate": 9.774038876536913e-07, "loss": 0.0018, "step": 863 }, { "epoch": 0.42, "logps_train/chosen": -61.52550506591797, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -215.66177368164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.31053611636161804, "rewards_train/margins": 9.48433181643486, "rewards_train/rejected": -9.173795700073242, "step": 863 }, { "epoch": 0.43, "learning_rate": 9.773231541721496e-07, "loss": 0.0021, "step": 864 }, { "epoch": 0.43, "logps_train/chosen": -64.42877197265625, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -118.875, "logps_train/rejected": -210.233642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08633479475975037, "rewards_train/margins": 9.05021420121193, "rewards_train/rejected": -9.13654899597168, "step": 864 }, { "epoch": 0.43, "learning_rate": 9.772422800679858e-07, "loss": 0.0028, "step": 865 }, { "epoch": 0.43, "logps_train/chosen": -62.74366760253906, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -223.09768676757812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43945154547691345, "rewards_train/margins": 9.655470222234726, "rewards_train/rejected": -9.216018676757812, "step": 865 }, { "epoch": 0.43, "learning_rate": 9.77161265365026e-07, "loss": 0.0005, "step": 866 }, { "epoch": 0.43, "logps_train/chosen": -61.830806732177734, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -211.9984130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22043490409851074, "rewards_train/margins": 9.182385206222534, "rewards_train/rejected": -8.961950302124023, "step": 866 }, { "epoch": 0.43, "learning_rate": 9.770801100871377e-07, "loss": 0.0018, "step": 867 }, { "epoch": 0.43, "logps_train/chosen": -63.04289245605469, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -227.9791259765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46865981817245483, "rewards_train/margins": 9.926825940608978, "rewards_train/rejected": -9.458166122436523, "step": 867 }, { "epoch": 0.43, "learning_rate": 9.769988142582297e-07, "loss": 0.0004, "step": 868 }, { "epoch": 0.43, "logps_train/chosen": -61.87571716308594, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -220.0655517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38952773809432983, "rewards_train/margins": 9.428700864315033, "rewards_train/rejected": -9.039173126220703, "step": 868 }, { "epoch": 0.43, "learning_rate": 9.769173779022525e-07, "loss": 0.0031, "step": 869 }, { "epoch": 0.43, "logps_train/chosen": -59.16820526123047, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -223.96884155273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4719248414039612, "rewards_train/margins": 10.267343580722809, "rewards_train/rejected": -9.795418739318848, "step": 869 }, { "epoch": 0.43, "learning_rate": 9.768358010431973e-07, "loss": 0.0002, "step": 870 }, { "epoch": 0.43, "logps_train/chosen": -64.09774780273438, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -223.53785705566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19628043472766876, "rewards_train/margins": 9.610613778233528, "rewards_train/rejected": -9.41433334350586, "step": 870 }, { "epoch": 0.43, "learning_rate": 9.767540837050976e-07, "loss": 0.0002, "step": 871 }, { "epoch": 0.43, "logps_train/chosen": -60.66926574707031, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -218.5087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23102283477783203, "rewards_train/margins": 9.653824806213379, "rewards_train/rejected": -9.422801971435547, "step": 871 }, { "epoch": 0.43, "learning_rate": 9.76672225912028e-07, "loss": 0.0033, "step": 872 }, { "epoch": 0.43, "logps_train/chosen": -62.06631851196289, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -216.4452667236328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26260650157928467, "rewards_train/margins": 9.401812434196472, "rewards_train/rejected": -9.139205932617188, "step": 872 }, { "epoch": 0.43, "learning_rate": 9.765902276881043e-07, "loss": 0.0017, "step": 873 }, { "epoch": 0.43, "logps_train/chosen": -63.41829299926758, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -212.16412353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1892741620540619, "rewards_train/margins": 9.139964431524277, "rewards_train/rejected": -8.950690269470215, "step": 873 }, { "epoch": 0.43, "learning_rate": 9.765080890574834e-07, "loss": 0.0016, "step": 874 }, { "epoch": 0.43, "logps_train/chosen": -60.33921813964844, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -224.51071166992188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41905632615089417, "rewards_train/margins": 9.988096624612808, "rewards_train/rejected": -9.569040298461914, "step": 874 }, { "epoch": 0.43, "learning_rate": 9.764258100443639e-07, "loss": 0.0006, "step": 875 }, { "epoch": 0.43, "logps_train/chosen": -64.85148620605469, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -134.125, "logps_train/rejected": -231.89141845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1919509470462799, "rewards_train/margins": 9.974209934473038, "rewards_train/rejected": -9.782258987426758, "step": 875 }, { "epoch": 0.43, "learning_rate": 9.763433906729862e-07, "loss": 0.0009, "step": 876 }, { "epoch": 0.43, "logps_train/chosen": -64.21578979492188, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -226.33343505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2639189064502716, "rewards_train/margins": 9.809078246355057, "rewards_train/rejected": -9.545159339904785, "step": 876 }, { "epoch": 0.43, "learning_rate": 9.762608309676315e-07, "loss": 0.0007, "step": 877 }, { "epoch": 0.43, "logps_train/chosen": -60.27043914794922, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -216.9915313720703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14861537516117096, "rewards_train/margins": 9.342592731118202, "rewards_train/rejected": -9.193977355957031, "step": 877 }, { "epoch": 0.43, "learning_rate": 9.76178130952622e-07, "loss": 0.0019, "step": 878 }, { "epoch": 0.43, "logps_train/chosen": -60.166656494140625, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -208.8782958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28304123878479004, "rewards_train/margins": 9.078635454177856, "rewards_train/rejected": -8.795594215393066, "step": 878 }, { "epoch": 0.43, "learning_rate": 9.76095290652322e-07, "loss": 0.002, "step": 879 }, { "epoch": 0.43, "logps_train/chosen": -56.69618606567383, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -208.58392333984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43292051553726196, "rewards_train/margins": 9.283353865146637, "rewards_train/rejected": -8.850433349609375, "step": 879 }, { "epoch": 0.43, "learning_rate": 9.76012310091137e-07, "loss": 0.0009, "step": 880 }, { "epoch": 0.43, "logps_train/chosen": -64.53993225097656, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -220.83523559570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06719829887151718, "rewards_train/margins": 9.344959758222103, "rewards_train/rejected": -9.277761459350586, "step": 880 }, { "epoch": 0.43, "learning_rate": 9.759291892935134e-07, "loss": 0.001, "step": 881 }, { "epoch": 0.43, "logps_train/chosen": -61.561279296875, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -223.29000854492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34328576922416687, "rewards_train/margins": 9.885225504636765, "rewards_train/rejected": -9.541939735412598, "step": 881 }, { "epoch": 0.43, "learning_rate": 9.75845928283939e-07, "loss": 0.0022, "step": 882 }, { "epoch": 0.43, "logps_train/chosen": -61.56937789916992, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -222.1331024169922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2731894850730896, "rewards_train/margins": 9.681812226772308, "rewards_train/rejected": -9.408622741699219, "step": 882 }, { "epoch": 0.43, "learning_rate": 9.757625270869434e-07, "loss": 0.0008, "step": 883 }, { "epoch": 0.43, "logps_train/chosen": -58.97132873535156, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -217.0521697998047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4081406891345978, "rewards_train/margins": 9.476639300584793, "rewards_train/rejected": -9.068498611450195, "step": 883 }, { "epoch": 0.44, "learning_rate": 9.75678985727097e-07, "loss": 0.002, "step": 884 }, { "epoch": 0.44, "logps_train/chosen": -60.694881439208984, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -217.51644897460938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37743571400642395, "rewards_train/margins": 9.715312033891678, "rewards_train/rejected": -9.337876319885254, "step": 884 }, { "epoch": 0.44, "learning_rate": 9.755953042290116e-07, "loss": 0.0004, "step": 885 }, { "epoch": 0.44, "logps_train/chosen": -67.89764404296875, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -224.2513427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0643371045589447, "rewards_train/margins": 9.796259254217148, "rewards_train/rejected": -9.731922149658203, "step": 885 }, { "epoch": 0.44, "learning_rate": 9.755114826173402e-07, "loss": 0.0007, "step": 886 }, { "epoch": 0.44, "logps_train/chosen": -59.6507682800293, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -206.8201904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.20430797338485718, "rewards_train/margins": 8.854247510433197, "rewards_train/rejected": -8.64993953704834, "step": 886 }, { "epoch": 0.44, "learning_rate": 9.754275209167777e-07, "loss": 0.0022, "step": 887 }, { "epoch": 0.44, "logps_train/chosen": -62.19049835205078, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -212.25294494628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14960290491580963, "rewards_train/margins": 9.22475053369999, "rewards_train/rejected": -9.07514762878418, "step": 887 }, { "epoch": 0.44, "learning_rate": 9.753434191520592e-07, "loss": 0.001, "step": 888 }, { "epoch": 0.44, "logps_train/chosen": -58.66339111328125, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -118.1875, "logps_train/rejected": -204.9986572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27775251865386963, "rewards_train/margins": 8.95632803440094, "rewards_train/rejected": -8.67857551574707, "step": 888 }, { "epoch": 0.44, "learning_rate": 9.752591773479622e-07, "loss": 0.0008, "step": 889 }, { "epoch": 0.44, "logps_train/chosen": -61.50318908691406, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -217.88539123535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38889050483703613, "rewards_train/margins": 9.812780618667603, "rewards_train/rejected": -9.423890113830566, "step": 889 }, { "epoch": 0.44, "learning_rate": 9.751747955293045e-07, "loss": 0.0013, "step": 890 }, { "epoch": 0.44, "logps_train/chosen": -62.60679626464844, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -216.63299560546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24320252239704132, "rewards_train/margins": 9.453377082943916, "rewards_train/rejected": -9.210174560546875, "step": 890 }, { "epoch": 0.44, "learning_rate": 9.750902737209455e-07, "loss": 0.0016, "step": 891 }, { "epoch": 0.44, "logps_train/chosen": -63.62767791748047, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -219.31361389160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3114508092403412, "rewards_train/margins": 9.61078056693077, "rewards_train/rejected": -9.29932975769043, "step": 891 }, { "epoch": 0.44, "learning_rate": 9.750056119477861e-07, "loss": 0.0003, "step": 892 }, { "epoch": 0.44, "logps_train/chosen": -62.028690338134766, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -222.99993896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3717895746231079, "rewards_train/margins": 9.77979052066803, "rewards_train/rejected": -9.408000946044922, "step": 892 }, { "epoch": 0.44, "learning_rate": 9.749208102347682e-07, "loss": 0.001, "step": 893 }, { "epoch": 0.44, "logps_train/chosen": -60.72241973876953, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -209.6902313232422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3509025275707245, "rewards_train/margins": 9.209964722394943, "rewards_train/rejected": -8.859062194824219, "step": 893 }, { "epoch": 0.44, "learning_rate": 9.74835868606875e-07, "loss": 0.0013, "step": 894 }, { "epoch": 0.44, "logps_train/chosen": -63.62615966796875, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -227.11825561523438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04998190701007843, "rewards_train/margins": 9.984708622097969, "rewards_train/rejected": -9.93472671508789, "step": 894 }, { "epoch": 0.44, "learning_rate": 9.74750787089131e-07, "loss": 0.0008, "step": 895 }, { "epoch": 0.44, "logps_train/chosen": -61.74647903442383, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -211.20327758789062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13714399933815002, "rewards_train/margins": 9.23085966706276, "rewards_train/rejected": -9.09371566772461, "step": 895 }, { "epoch": 0.44, "learning_rate": 9.746655657066013e-07, "loss": 0.0018, "step": 896 }, { "epoch": 0.44, "logps_train/chosen": -62.389503479003906, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -223.09352111816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41749516129493713, "rewards_train/margins": 9.85507145524025, "rewards_train/rejected": -9.437576293945312, "step": 896 }, { "epoch": 0.44, "learning_rate": 9.745802044843933e-07, "loss": 0.0005, "step": 897 }, { "epoch": 0.44, "logps_train/chosen": -64.72581481933594, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -218.50204467773438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06018252670764923, "rewards_train/margins": 9.476989701390266, "rewards_train/rejected": -9.416807174682617, "step": 897 }, { "epoch": 0.44, "learning_rate": 9.744947034476545e-07, "loss": 0.0005, "step": 898 }, { "epoch": 0.44, "logps_train/chosen": -62.5589714050293, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -227.3072052001953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3563345670700073, "rewards_train/margins": 10.178460955619812, "rewards_train/rejected": -9.822126388549805, "step": 898 }, { "epoch": 0.44, "learning_rate": 9.744090626215744e-07, "loss": 0.0004, "step": 899 }, { "epoch": 0.44, "logps_train/chosen": -62.56920623779297, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -214.52964782714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06993480771780014, "rewards_train/margins": 9.36821361631155, "rewards_train/rejected": -9.29827880859375, "step": 899 }, { "epoch": 0.44, "learning_rate": 9.74323282031383e-07, "loss": 0.0014, "step": 900 }, { "epoch": 0.44, "logps_train/chosen": -57.900482177734375, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -215.23614501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3998929262161255, "rewards_train/margins": 9.472580075263977, "rewards_train/rejected": -9.072687149047852, "step": 900 }, { "epoch": 0.44, "learning_rate": 9.742373617023525e-07, "loss": 0.0005, "step": 901 }, { "epoch": 0.44, "logps_train/chosen": -62.9185676574707, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -223.4396514892578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.20807002484798431, "rewards_train/margins": 9.765512689948082, "rewards_train/rejected": -9.557442665100098, "step": 901 }, { "epoch": 0.44, "learning_rate": 9.74151301659795e-07, "loss": 0.0002, "step": 902 }, { "epoch": 0.44, "logps_train/chosen": -63.701351165771484, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -214.8190155029297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2290836125612259, "rewards_train/margins": 9.201806619763374, "rewards_train/rejected": -8.972723007202148, "step": 902 }, { "epoch": 0.44, "learning_rate": 9.740651019290648e-07, "loss": 0.0008, "step": 903 }, { "epoch": 0.44, "logps_train/chosen": -62.28923034667969, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -219.7306365966797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3759113550186157, "rewards_train/margins": 9.835301995277405, "rewards_train/rejected": -9.459390640258789, "step": 903 }, { "epoch": 0.45, "learning_rate": 9.739787625355562e-07, "loss": 0.0008, "step": 904 }, { "epoch": 0.45, "logps_train/chosen": -62.63615417480469, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -219.86932373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1404862105846405, "rewards_train/margins": 9.397827595472336, "rewards_train/rejected": -9.257341384887695, "step": 904 }, { "epoch": 0.45, "learning_rate": 9.738922835047062e-07, "loss": 0.0012, "step": 905 }, { "epoch": 0.45, "logps_train/chosen": -59.61259078979492, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -212.8419189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2580280601978302, "rewards_train/margins": 9.411849051713943, "rewards_train/rejected": -9.153820991516113, "step": 905 }, { "epoch": 0.45, "learning_rate": 9.738056648619917e-07, "loss": 0.0025, "step": 906 }, { "epoch": 0.45, "logps_train/chosen": -64.72688293457031, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -231.07534790039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15367910265922546, "rewards_train/margins": 9.993246287107468, "rewards_train/rejected": -9.839567184448242, "step": 906 }, { "epoch": 0.45, "learning_rate": 9.737189066329313e-07, "loss": 0.0003, "step": 907 }, { "epoch": 0.45, "logps_train/chosen": -58.79172897338867, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -119.0625, "logps_train/rejected": -209.87289428710938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2155778706073761, "rewards_train/margins": 9.297936230897903, "rewards_train/rejected": -9.082358360290527, "step": 907 }, { "epoch": 0.45, "learning_rate": 9.736320088430842e-07, "loss": 0.0057, "step": 908 }, { "epoch": 0.45, "logps_train/chosen": -59.416038513183594, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -216.45474243164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26728272438049316, "rewards_train/margins": 9.511097192764282, "rewards_train/rejected": -9.243814468383789, "step": 908 }, { "epoch": 0.45, "learning_rate": 9.735449715180518e-07, "loss": 0.0016, "step": 909 }, { "epoch": 0.45, "logps_train/chosen": -63.3751220703125, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -217.08236694335938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0029418617486953735, "rewards_train/margins": 9.298264726996422, "rewards_train/rejected": -9.301206588745117, "step": 909 }, { "epoch": 0.45, "learning_rate": 9.73457794683475e-07, "loss": 0.0008, "step": 910 }, { "epoch": 0.45, "logps_train/chosen": -65.02398681640625, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -224.02511596679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11576493084430695, "rewards_train/margins": 9.67403919994831, "rewards_train/rejected": -9.558274269104004, "step": 910 }, { "epoch": 0.45, "learning_rate": 9.733704783650372e-07, "loss": 0.0016, "step": 911 }, { "epoch": 0.45, "logps_train/chosen": -61.18366241455078, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -217.3531951904297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17401650547981262, "rewards_train/margins": 9.533359080553055, "rewards_train/rejected": -9.359342575073242, "step": 911 }, { "epoch": 0.45, "learning_rate": 9.732830225884623e-07, "loss": 0.0026, "step": 912 }, { "epoch": 0.45, "logps_train/chosen": -60.365882873535156, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -221.23959350585938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.48870447278022766, "rewards_train/margins": 9.839176923036575, "rewards_train/rejected": -9.350472450256348, "step": 912 }, { "epoch": 0.45, "learning_rate": 9.731954273795155e-07, "loss": 0.0008, "step": 913 }, { "epoch": 0.45, "logps_train/chosen": -64.61328887939453, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -227.3712615966797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2840815782546997, "rewards_train/margins": 10.168180584907532, "rewards_train/rejected": -9.884099006652832, "step": 913 }, { "epoch": 0.45, "learning_rate": 9.731076927640025e-07, "loss": 0.0008, "step": 914 }, { "epoch": 0.45, "logps_train/chosen": -62.694210052490234, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -223.407958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12366969883441925, "rewards_train/margins": 9.658118322491646, "rewards_train/rejected": -9.534448623657227, "step": 914 }, { "epoch": 0.45, "learning_rate": 9.73019818767771e-07, "loss": 0.0012, "step": 915 }, { "epoch": 0.45, "logps_train/chosen": -62.885894775390625, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -227.52719116210938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2274259775876999, "rewards_train/margins": 10.25514642894268, "rewards_train/rejected": -10.02772045135498, "step": 915 }, { "epoch": 0.45, "learning_rate": 9.72931805416709e-07, "loss": 0.0005, "step": 916 }, { "epoch": 0.45, "logps_train/chosen": -65.45243835449219, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -231.61441040039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13673855364322662, "rewards_train/margins": 10.052377477288246, "rewards_train/rejected": -9.91563892364502, "step": 916 }, { "epoch": 0.45, "learning_rate": 9.728436527367459e-07, "loss": 0.0015, "step": 917 }, { "epoch": 0.45, "logps_train/chosen": -61.57970428466797, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -227.11138916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14376290142536163, "rewards_train/margins": 9.91300804913044, "rewards_train/rejected": -9.769245147705078, "step": 917 }, { "epoch": 0.45, "learning_rate": 9.727553607538522e-07, "loss": 0.0009, "step": 918 }, { "epoch": 0.45, "logps_train/chosen": -59.411338806152344, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -218.46812438964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5502721071243286, "rewards_train/margins": 9.743863224983215, "rewards_train/rejected": -9.193591117858887, "step": 918 }, { "epoch": 0.45, "learning_rate": 9.72666929494039e-07, "loss": 0.0008, "step": 919 }, { "epoch": 0.45, "logps_train/chosen": -65.3531494140625, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -219.5357208251953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11685799062252045, "rewards_train/margins": 9.531758770346642, "rewards_train/rejected": -9.414900779724121, "step": 919 }, { "epoch": 0.45, "learning_rate": 9.72578358983359e-07, "loss": 0.0009, "step": 920 }, { "epoch": 0.45, "logps_train/chosen": -60.80828094482422, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -216.39035034179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3014470934867859, "rewards_train/margins": 9.602104365825653, "rewards_train/rejected": -9.300657272338867, "step": 920 }, { "epoch": 0.45, "learning_rate": 9.724896492479056e-07, "loss": 0.0022, "step": 921 }, { "epoch": 0.45, "logps_train/chosen": -61.224613189697266, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -217.8835906982422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1488766223192215, "rewards_train/margins": 9.490702107548714, "rewards_train/rejected": -9.341825485229492, "step": 921 }, { "epoch": 0.45, "learning_rate": 9.724008003138131e-07, "loss": 0.0009, "step": 922 }, { "epoch": 0.45, "logps_train/chosen": -60.70450973510742, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -222.07518005371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22769367694854736, "rewards_train/margins": 10.06470501422882, "rewards_train/rejected": -9.837011337280273, "step": 922 }, { "epoch": 0.45, "learning_rate": 9.723118122072574e-07, "loss": 0.0008, "step": 923 }, { "epoch": 0.45, "logps_train/chosen": -61.629981994628906, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -225.51641845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.46444350481033325, "rewards_train/margins": 10.273604691028595, "rewards_train/rejected": -9.809161186218262, "step": 923 }, { "epoch": 0.45, "learning_rate": 9.722226849544548e-07, "loss": 0.0004, "step": 924 }, { "epoch": 0.45, "logps_train/chosen": -63.876976013183594, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -227.5535888671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16581842303276062, "rewards_train/margins": 10.013072222471237, "rewards_train/rejected": -9.847253799438477, "step": 924 }, { "epoch": 0.46, "learning_rate": 9.721334185816627e-07, "loss": 0.0005, "step": 925 }, { "epoch": 0.46, "logps_train/chosen": -61.92849349975586, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -218.56805419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0604221373796463, "rewards_train/margins": 9.498280718922615, "rewards_train/rejected": -9.437858581542969, "step": 925 }, { "epoch": 0.46, "learning_rate": 9.720440131151794e-07, "loss": 0.0005, "step": 926 }, { "epoch": 0.46, "logps_train/chosen": -61.81538391113281, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -213.94091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05264104902744293, "rewards_train/margins": 9.359135761857033, "rewards_train/rejected": -9.30649471282959, "step": 926 }, { "epoch": 0.46, "learning_rate": 9.71954468581345e-07, "loss": 0.0015, "step": 927 }, { "epoch": 0.46, "logps_train/chosen": -66.03678131103516, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -227.65635681152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06619487702846527, "rewards_train/margins": 10.04213272035122, "rewards_train/rejected": -9.975937843322754, "step": 927 }, { "epoch": 0.46, "learning_rate": 9.718647850065393e-07, "loss": 0.0009, "step": 928 }, { "epoch": 0.46, "logps_train/chosen": -65.46467590332031, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -224.86996459960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.199885755777359, "rewards_train/margins": 9.762501329183578, "rewards_train/rejected": -9.962387084960938, "step": 928 }, { "epoch": 0.46, "learning_rate": 9.71774962417184e-07, "loss": 0.0017, "step": 929 }, { "epoch": 0.46, "logps_train/chosen": -63.13969421386719, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -221.62863159179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1685013473033905, "rewards_train/margins": 9.830973118543625, "rewards_train/rejected": -9.662471771240234, "step": 929 }, { "epoch": 0.46, "learning_rate": 9.716850008397412e-07, "loss": 0.0005, "step": 930 }, { "epoch": 0.46, "logps_train/chosen": -65.29747009277344, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -227.46646118164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.216981440782547, "rewards_train/margins": 9.98384240269661, "rewards_train/rejected": -9.766860961914062, "step": 930 }, { "epoch": 0.46, "learning_rate": 9.715949003007143e-07, "loss": 0.0006, "step": 931 }, { "epoch": 0.46, "logps_train/chosen": -63.26981735229492, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -222.7797393798828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28029370307922363, "rewards_train/margins": 9.981315851211548, "rewards_train/rejected": -9.701022148132324, "step": 931 }, { "epoch": 0.46, "learning_rate": 9.71504660826648e-07, "loss": 0.004, "step": 932 }, { "epoch": 0.46, "logps_train/chosen": -61.234718322753906, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -216.18467712402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11246541887521744, "rewards_train/margins": 9.528248347342014, "rewards_train/rejected": -9.415782928466797, "step": 932 }, { "epoch": 0.46, "learning_rate": 9.714142824441265e-07, "loss": 0.0013, "step": 933 }, { "epoch": 0.46, "logps_train/chosen": -62.298057556152344, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -217.0645751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09560917317867279, "rewards_train/margins": 9.669254764914513, "rewards_train/rejected": -9.57364559173584, "step": 933 }, { "epoch": 0.46, "learning_rate": 9.713237651797769e-07, "loss": 0.0015, "step": 934 }, { "epoch": 0.46, "logps_train/chosen": -62.71316909790039, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -223.35232543945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22231093049049377, "rewards_train/margins": 10.057690531015396, "rewards_train/rejected": -9.835379600524902, "step": 934 }, { "epoch": 0.46, "learning_rate": 9.712331090602652e-07, "loss": 0.0008, "step": 935 }, { "epoch": 0.46, "logps_train/chosen": -60.36776351928711, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -218.80377197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23724716901779175, "rewards_train/margins": 9.641550719738007, "rewards_train/rejected": -9.404303550720215, "step": 935 }, { "epoch": 0.46, "learning_rate": 9.711423141123001e-07, "loss": 0.0008, "step": 936 }, { "epoch": 0.46, "logps_train/chosen": -62.52879333496094, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -212.19583129882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2441418617963791, "rewards_train/margins": 9.365042969584465, "rewards_train/rejected": -9.120901107788086, "step": 936 }, { "epoch": 0.46, "learning_rate": 9.7105138036263e-07, "loss": 0.0039, "step": 937 }, { "epoch": 0.46, "logps_train/chosen": -63.58271026611328, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -224.6624298095703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07942412793636322, "rewards_train/margins": 9.756408914923668, "rewards_train/rejected": -9.676984786987305, "step": 937 }, { "epoch": 0.46, "learning_rate": 9.709603078380447e-07, "loss": 0.0007, "step": 938 }, { "epoch": 0.46, "logps_train/chosen": -65.07077026367188, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -133.5, "logps_train/rejected": -230.67654418945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22539328038692474, "rewards_train/margins": 9.946369156241417, "rewards_train/rejected": -9.720975875854492, "step": 938 }, { "epoch": 0.46, "learning_rate": 9.708690965653748e-07, "loss": 0.0018, "step": 939 }, { "epoch": 0.46, "logps_train/chosen": -63.04298400878906, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -227.2189483642578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2846178412437439, "rewards_train/margins": 10.050067365169525, "rewards_train/rejected": -9.765449523925781, "step": 939 }, { "epoch": 0.46, "learning_rate": 9.707777465714917e-07, "loss": 0.0002, "step": 940 }, { "epoch": 0.46, "logps_train/chosen": -58.213348388671875, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -217.43096923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33471983671188354, "rewards_train/margins": 9.897641360759735, "rewards_train/rejected": -9.562921524047852, "step": 940 }, { "epoch": 0.46, "learning_rate": 9.706862578833079e-07, "loss": 0.0018, "step": 941 }, { "epoch": 0.46, "logps_train/chosen": -58.698394775390625, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -119.125, "logps_train/rejected": -209.19105529785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.36033663153648376, "rewards_train/margins": 9.36723455786705, "rewards_train/rejected": -9.006897926330566, "step": 941 }, { "epoch": 0.46, "learning_rate": 9.705946305277764e-07, "loss": 0.0009, "step": 942 }, { "epoch": 0.46, "logps_train/chosen": -64.57035827636719, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -228.32009887695312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1753373146057129, "rewards_train/margins": 10.046899318695068, "rewards_train/rejected": -9.871562004089355, "step": 942 }, { "epoch": 0.46, "learning_rate": 9.70502864531891e-07, "loss": 0.0009, "step": 943 }, { "epoch": 0.46, "logps_train/chosen": -63.16143035888672, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -226.24755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.250458300113678, "rewards_train/margins": 10.156561434268951, "rewards_train/rejected": -9.906103134155273, "step": 943 }, { "epoch": 0.46, "learning_rate": 9.704109599226873e-07, "loss": 0.0007, "step": 944 }, { "epoch": 0.46, "logps_train/chosen": -61.9725341796875, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -218.1905059814453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2725951075553894, "rewards_train/margins": 9.653120696544647, "rewards_train/rejected": -9.380525588989258, "step": 944 }, { "epoch": 0.47, "learning_rate": 9.703189167272403e-07, "loss": 0.0029, "step": 945 }, { "epoch": 0.47, "logps_train/chosen": -61.237220764160156, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -222.34564208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3036213517189026, "rewards_train/margins": 10.034572660923004, "rewards_train/rejected": -9.730951309204102, "step": 945 }, { "epoch": 0.47, "learning_rate": 9.70226734972667e-07, "loss": 0.0005, "step": 946 }, { "epoch": 0.47, "logps_train/chosen": -62.919010162353516, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -219.72418212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0744563490152359, "rewards_train/margins": 9.655175343155861, "rewards_train/rejected": -9.580718994140625, "step": 946 }, { "epoch": 0.47, "learning_rate": 9.701344146861244e-07, "loss": 0.0005, "step": 947 }, { "epoch": 0.47, "logps_train/chosen": -59.42103576660156, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -221.53994750976562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3679060637950897, "rewards_train/margins": 10.18176218867302, "rewards_train/rejected": -9.81385612487793, "step": 947 }, { "epoch": 0.47, "learning_rate": 9.700419558948112e-07, "loss": 0.0005, "step": 948 }, { "epoch": 0.47, "logps_train/chosen": -63.032535552978516, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -230.99313354492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17492011189460754, "rewards_train/margins": 10.374037772417068, "rewards_train/rejected": -10.199117660522461, "step": 948 }, { "epoch": 0.47, "learning_rate": 9.699493586259657e-07, "loss": 0.0004, "step": 949 }, { "epoch": 0.47, "logps_train/chosen": -61.004188537597656, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -223.0598602294922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.006619924679398537, "rewards_train/margins": 9.92519617639482, "rewards_train/rejected": -9.931816101074219, "step": 949 }, { "epoch": 0.47, "learning_rate": 9.698566229068684e-07, "loss": 0.0034, "step": 950 }, { "epoch": 0.47, "logps_train/chosen": -64.89236450195312, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -228.81190490722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23752106726169586, "rewards_train/margins": 10.179259195923805, "rewards_train/rejected": -9.94173812866211, "step": 950 }, { "epoch": 0.47, "learning_rate": 9.697637487648392e-07, "loss": 0.0023, "step": 951 }, { "epoch": 0.47, "logps_train/chosen": -64.607421875, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -224.57583618164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15625005960464478, "rewards_train/margins": 9.915054261684418, "rewards_train/rejected": -10.071304321289062, "step": 951 }, { "epoch": 0.47, "learning_rate": 9.696707362272398e-07, "loss": 0.0013, "step": 952 }, { "epoch": 0.47, "logps_train/chosen": -62.5639762878418, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -134.375, "logps_train/rejected": -239.19703674316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3309558629989624, "rewards_train/margins": 10.817652106285095, "rewards_train/rejected": -10.486696243286133, "step": 952 }, { "epoch": 0.47, "learning_rate": 9.695775853214724e-07, "loss": 0.0004, "step": 953 }, { "epoch": 0.47, "logps_train/chosen": -64.47419738769531, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -222.87850952148438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2181074321269989, "rewards_train/margins": 9.978859156370163, "rewards_train/rejected": -9.760751724243164, "step": 953 }, { "epoch": 0.47, "learning_rate": 9.694842960749798e-07, "loss": 0.0008, "step": 954 }, { "epoch": 0.47, "logps_train/chosen": -65.2178955078125, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -234.50042724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11849343031644821, "rewards_train/margins": 10.333917014300823, "rewards_train/rejected": -10.215423583984375, "step": 954 }, { "epoch": 0.47, "learning_rate": 9.693908685152454e-07, "loss": 0.0007, "step": 955 }, { "epoch": 0.47, "logps_train/chosen": -63.538352966308594, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -228.65704345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1765843629837036, "rewards_train/margins": 10.107033848762512, "rewards_train/rejected": -9.930449485778809, "step": 955 }, { "epoch": 0.47, "learning_rate": 9.69297302669794e-07, "loss": 0.0003, "step": 956 }, { "epoch": 0.47, "logps_train/chosen": -64.15596771240234, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -229.16485595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16702021658420563, "rewards_train/margins": 10.405185118317604, "rewards_train/rejected": -10.238164901733398, "step": 956 }, { "epoch": 0.47, "learning_rate": 9.692035985661906e-07, "loss": 0.0012, "step": 957 }, { "epoch": 0.47, "logps_train/chosen": -63.89501190185547, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -229.30609130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21889714896678925, "rewards_train/margins": 10.436809822916985, "rewards_train/rejected": -10.217912673950195, "step": 957 }, { "epoch": 0.47, "learning_rate": 9.691097562320406e-07, "loss": 0.0003, "step": 958 }, { "epoch": 0.47, "logps_train/chosen": -60.63079071044922, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -216.553955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2296455204486847, "rewards_train/margins": 9.834942609071732, "rewards_train/rejected": -9.605297088623047, "step": 958 }, { "epoch": 0.47, "learning_rate": 9.690157756949913e-07, "loss": 0.0008, "step": 959 }, { "epoch": 0.47, "logps_train/chosen": -61.22347640991211, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -215.8250732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17345309257507324, "rewards_train/margins": 9.55781626701355, "rewards_train/rejected": -9.384363174438477, "step": 959 }, { "epoch": 0.47, "learning_rate": 9.689216569827297e-07, "loss": 0.0014, "step": 960 }, { "epoch": 0.47, "logps_train/chosen": -66.69874572753906, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -227.87554931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0143052376806736, "rewards_train/margins": 9.968608979135752, "rewards_train/rejected": -9.954303741455078, "step": 960 }, { "epoch": 0.47, "learning_rate": 9.688274001229837e-07, "loss": 0.0007, "step": 961 }, { "epoch": 0.47, "logps_train/chosen": -63.69194030761719, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -225.73133850097656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.20531779527664185, "rewards_train/margins": 10.209018051624298, "rewards_train/rejected": -10.003700256347656, "step": 961 }, { "epoch": 0.47, "learning_rate": 9.68733005143522e-07, "loss": 0.0005, "step": 962 }, { "epoch": 0.47, "logps_train/chosen": -60.558528900146484, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -222.53765869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37354177236557007, "rewards_train/margins": 10.182482659816742, "rewards_train/rejected": -9.808940887451172, "step": 962 }, { "epoch": 0.47, "learning_rate": 9.68638472072154e-07, "loss": 0.0006, "step": 963 }, { "epoch": 0.47, "logps_train/chosen": -62.02183532714844, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -224.62661743164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27804139256477356, "rewards_train/margins": 10.123174220323563, "rewards_train/rejected": -9.845132827758789, "step": 963 }, { "epoch": 0.47, "learning_rate": 9.685438009367301e-07, "loss": 0.0012, "step": 964 }, { "epoch": 0.47, "logps_train/chosen": -65.10279846191406, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -226.8076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03444729000329971, "rewards_train/margins": 10.168920136988163, "rewards_train/rejected": -10.134472846984863, "step": 964 }, { "epoch": 0.48, "learning_rate": 9.684489917651408e-07, "loss": 0.0011, "step": 965 }, { "epoch": 0.48, "logps_train/chosen": -64.14552307128906, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -226.61029052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2931142747402191, "rewards_train/margins": 10.243156999349594, "rewards_train/rejected": -9.950042724609375, "step": 965 }, { "epoch": 0.48, "learning_rate": 9.683540445853176e-07, "loss": 0.0006, "step": 966 }, { "epoch": 0.48, "logps_train/chosen": -64.68702697753906, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -226.48666381835938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.014894414693117142, "rewards_train/margins": 9.98191649839282, "rewards_train/rejected": -9.996810913085938, "step": 966 }, { "epoch": 0.48, "learning_rate": 9.682589594252323e-07, "loss": 0.0006, "step": 967 }, { "epoch": 0.48, "logps_train/chosen": -61.15333938598633, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -217.20997619628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.31664836406707764, "rewards_train/margins": 9.630517840385437, "rewards_train/rejected": -9.31386947631836, "step": 967 }, { "epoch": 0.48, "learning_rate": 9.68163736312898e-07, "loss": 0.0009, "step": 968 }, { "epoch": 0.48, "logps_train/chosen": -61.34892272949219, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -217.61300659179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21667034924030304, "rewards_train/margins": 9.777384117245674, "rewards_train/rejected": -9.560713768005371, "step": 968 }, { "epoch": 0.48, "learning_rate": 9.680683752763678e-07, "loss": 0.0008, "step": 969 }, { "epoch": 0.48, "logps_train/chosen": -65.51229858398438, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -218.4979248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15008825063705444, "rewards_train/margins": 9.754128396511078, "rewards_train/rejected": -9.604040145874023, "step": 969 }, { "epoch": 0.48, "learning_rate": 9.67972876343736e-07, "loss": 0.0006, "step": 970 }, { "epoch": 0.48, "logps_train/chosen": -60.38467788696289, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -219.91270446777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1574794203042984, "rewards_train/margins": 9.687568798661232, "rewards_train/rejected": -9.530089378356934, "step": 970 }, { "epoch": 0.48, "learning_rate": 9.67877239543137e-07, "loss": 0.0019, "step": 971 }, { "epoch": 0.48, "logps_train/chosen": -64.51134490966797, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -220.72286987304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06881201267242432, "rewards_train/margins": 9.674596428871155, "rewards_train/rejected": -9.60578441619873, "step": 971 }, { "epoch": 0.48, "learning_rate": 9.67781464902746e-07, "loss": 0.0016, "step": 972 }, { "epoch": 0.48, "logps_train/chosen": -60.12333679199219, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -226.37503051757812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26042020320892334, "rewards_train/margins": 10.634446501731873, "rewards_train/rejected": -10.37402629852295, "step": 972 }, { "epoch": 0.48, "learning_rate": 9.67685552450779e-07, "loss": 0.0012, "step": 973 }, { "epoch": 0.48, "logps_train/chosen": -64.043212890625, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -231.26693725585938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.011352375149726868, "rewards_train/margins": 10.398494556546211, "rewards_train/rejected": -10.387142181396484, "step": 973 }, { "epoch": 0.48, "learning_rate": 9.675895022154925e-07, "loss": 0.0004, "step": 974 }, { "epoch": 0.48, "logps_train/chosen": -63.731666564941406, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -223.88604736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.032790545374155045, "rewards_train/margins": 10.171103838831186, "rewards_train/rejected": -10.138313293457031, "step": 974 }, { "epoch": 0.48, "learning_rate": 9.674933142251834e-07, "loss": 0.0009, "step": 975 }, { "epoch": 0.48, "logps_train/chosen": -66.4134292602539, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -223.68853759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15394054353237152, "rewards_train/margins": 9.75290359556675, "rewards_train/rejected": -9.906844139099121, "step": 975 }, { "epoch": 0.48, "learning_rate": 9.673969885081896e-07, "loss": 0.0004, "step": 976 }, { "epoch": 0.48, "logps_train/chosen": -62.65525817871094, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -231.22967529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17851710319519043, "rewards_train/margins": 10.475116491317749, "rewards_train/rejected": -10.296599388122559, "step": 976 }, { "epoch": 0.48, "learning_rate": 9.673005250928887e-07, "loss": 0.0002, "step": 977 }, { "epoch": 0.48, "logps_train/chosen": -60.2034912109375, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -222.36065673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19046640396118164, "rewards_train/margins": 10.111103534698486, "rewards_train/rejected": -9.920637130737305, "step": 977 }, { "epoch": 0.48, "learning_rate": 9.672039240077002e-07, "loss": 0.002, "step": 978 }, { "epoch": 0.48, "logps_train/chosen": -62.963462829589844, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -228.38986206054688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10941511392593384, "rewards_train/margins": 10.210266172885895, "rewards_train/rejected": -10.100851058959961, "step": 978 }, { "epoch": 0.48, "learning_rate": 9.67107185281083e-07, "loss": 0.0015, "step": 979 }, { "epoch": 0.48, "logps_train/chosen": -62.4469108581543, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -229.23092651367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2894393801689148, "rewards_train/margins": 10.391585528850555, "rewards_train/rejected": -10.10214614868164, "step": 979 }, { "epoch": 0.48, "learning_rate": 9.670103089415372e-07, "loss": 0.0004, "step": 980 }, { "epoch": 0.48, "logps_train/chosen": -66.62832641601562, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -234.11810302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13077044486999512, "rewards_train/margins": 10.452004194259644, "rewards_train/rejected": -10.321233749389648, "step": 980 }, { "epoch": 0.48, "learning_rate": 9.669132950176029e-07, "loss": 0.0018, "step": 981 }, { "epoch": 0.48, "logps_train/chosen": -59.013877868652344, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -222.3815155029297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4473915696144104, "rewards_train/margins": 10.318941175937653, "rewards_train/rejected": -9.871549606323242, "step": 981 }, { "epoch": 0.48, "learning_rate": 9.668161435378615e-07, "loss": 0.0011, "step": 982 }, { "epoch": 0.48, "logps_train/chosen": -62.160579681396484, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -222.18463134765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2535221576690674, "rewards_train/margins": 10.057043313980103, "rewards_train/rejected": -9.803521156311035, "step": 982 }, { "epoch": 0.48, "learning_rate": 9.66718854530934e-07, "loss": 0.0004, "step": 983 }, { "epoch": 0.48, "logps_train/chosen": -61.264610290527344, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -229.64212036132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1507846862077713, "rewards_train/margins": 10.600347712635994, "rewards_train/rejected": -10.449563026428223, "step": 983 }, { "epoch": 0.48, "learning_rate": 9.666214280254828e-07, "loss": 0.0004, "step": 984 }, { "epoch": 0.48, "logps_train/chosen": -63.693851470947266, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -227.93394470214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.013988804072141647, "rewards_train/margins": 10.364804577082396, "rewards_train/rejected": -10.350815773010254, "step": 984 }, { "epoch": 0.48, "learning_rate": 9.665238640502101e-07, "loss": 0.0015, "step": 985 }, { "epoch": 0.48, "logps_train/chosen": -63.008888244628906, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -229.8961639404297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02440412901341915, "rewards_train/margins": 10.522368988022208, "rewards_train/rejected": -10.497964859008789, "step": 985 }, { "epoch": 0.49, "learning_rate": 9.664261626338593e-07, "loss": 0.0009, "step": 986 }, { "epoch": 0.49, "logps_train/chosen": -59.395843505859375, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -220.81591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2556793987751007, "rewards_train/margins": 9.9829743206501, "rewards_train/rejected": -9.727294921875, "step": 986 }, { "epoch": 0.49, "learning_rate": 9.663283238052136e-07, "loss": 0.0014, "step": 987 }, { "epoch": 0.49, "logps_train/chosen": -61.48584747314453, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -228.69549560546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19321200251579285, "rewards_train/margins": 10.651237934827805, "rewards_train/rejected": -10.458025932312012, "step": 987 }, { "epoch": 0.49, "learning_rate": 9.66230347593097e-07, "loss": 0.0002, "step": 988 }, { "epoch": 0.49, "logps_train/chosen": -62.932472229003906, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -220.34063720703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0017237626016139984, "rewards_train/margins": 9.852878089994192, "rewards_train/rejected": -9.851154327392578, "step": 988 }, { "epoch": 0.49, "learning_rate": 9.661322340263738e-07, "loss": 0.0008, "step": 989 }, { "epoch": 0.49, "logps_train/chosen": -61.53657150268555, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -216.35679626464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.045073382556438446, "rewards_train/margins": 9.609806887805462, "rewards_train/rejected": -9.564733505249023, "step": 989 }, { "epoch": 0.49, "learning_rate": 9.660339831339494e-07, "loss": 0.001, "step": 990 }, { "epoch": 0.49, "logps_train/chosen": -59.334068298339844, "logps_train/ref_chosen": -61.1875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -227.59365844726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1868566870689392, "rewards_train/margins": 10.474396169185638, "rewards_train/rejected": -10.2875394821167, "step": 990 }, { "epoch": 0.49, "learning_rate": 9.659355949447688e-07, "loss": 0.0004, "step": 991 }, { "epoch": 0.49, "logps_train/chosen": -60.113136291503906, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -223.93284606933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23263144493103027, "rewards_train/margins": 10.049743413925171, "rewards_train/rejected": -9.81711196899414, "step": 991 }, { "epoch": 0.49, "learning_rate": 9.658370694878177e-07, "loss": 0.0013, "step": 992 }, { "epoch": 0.49, "logps_train/chosen": -62.43144607543945, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -234.14993286132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3095897436141968, "rewards_train/margins": 10.785813689231873, "rewards_train/rejected": -10.476223945617676, "step": 992 }, { "epoch": 0.49, "learning_rate": 9.657384067921229e-07, "loss": 0.0005, "step": 993 }, { "epoch": 0.49, "logps_train/chosen": -64.31713104248047, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -227.20123291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.022681400179862976, "rewards_train/margins": 10.135676547884941, "rewards_train/rejected": -10.112995147705078, "step": 993 }, { "epoch": 0.49, "learning_rate": 9.656396068867505e-07, "loss": 0.0007, "step": 994 }, { "epoch": 0.49, "logps_train/chosen": -61.01570510864258, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -220.64450073242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35790225863456726, "rewards_train/margins": 10.062732428312302, "rewards_train/rejected": -9.704830169677734, "step": 994 }, { "epoch": 0.49, "learning_rate": 9.65540669800808e-07, "loss": 0.0016, "step": 995 }, { "epoch": 0.49, "logps_train/chosen": -59.69934844970703, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -222.70437622070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3120962977409363, "rewards_train/margins": 10.079945266246796, "rewards_train/rejected": -9.76784896850586, "step": 995 }, { "epoch": 0.49, "learning_rate": 9.654415955634428e-07, "loss": 0.0003, "step": 996 }, { "epoch": 0.49, "logps_train/chosen": -61.72303009033203, "logps_train/ref_chosen": -60.59375, "logps_train/ref_rejected": -118.625, "logps_train/rejected": -219.4945068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11273263394832611, "rewards_train/margins": 9.973241105675697, "rewards_train/rejected": -10.085973739624023, "step": 996 }, { "epoch": 0.49, "learning_rate": 9.653423842038428e-07, "loss": 0.0007, "step": 997 }, { "epoch": 0.49, "logps_train/chosen": -60.77830505371094, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -232.48895263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3222668170928955, "rewards_train/margins": 10.565205812454224, "rewards_train/rejected": -10.242938995361328, "step": 997 }, { "epoch": 0.49, "learning_rate": 9.652430357512365e-07, "loss": 0.0006, "step": 998 }, { "epoch": 0.49, "logps_train/chosen": -65.01168823242188, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -221.44696044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0024868622422218323, "rewards_train/margins": 9.932785354554653, "rewards_train/rejected": -9.935272216796875, "step": 998 }, { "epoch": 0.49, "learning_rate": 9.651435502348925e-07, "loss": 0.002, "step": 999 }, { "epoch": 0.49, "logps_train/chosen": -65.98023986816406, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -235.33990478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06970115005970001, "rewards_train/margins": 10.565606072545052, "rewards_train/rejected": -10.495904922485352, "step": 999 }, { "epoch": 0.49, "learning_rate": 9.650439276841198e-07, "loss": 0.0011, "step": 1000 }, { "epoch": 0.49, "logps_train/chosen": -61.102577209472656, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -231.25466918945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17558225989341736, "rewards_train/margins": 10.382590621709824, "rewards_train/rejected": -10.207008361816406, "step": 1000 }, { "epoch": 0.49, "learning_rate": 9.64944168128268e-07, "loss": 0.0006, "step": 1001 }, { "epoch": 0.49, "logps_train/chosen": -64.50010681152344, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -232.797607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.048182688653469086, "rewards_train/margins": 10.549768649041653, "rewards_train/rejected": -10.501585960388184, "step": 1001 }, { "epoch": 0.49, "learning_rate": 9.648442715967271e-07, "loss": 0.0006, "step": 1002 }, { "epoch": 0.49, "logps_train/chosen": -61.788002014160156, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -224.4298095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.262020081281662, "rewards_train/margins": 10.200313538312912, "rewards_train/rejected": -9.93829345703125, "step": 1002 }, { "epoch": 0.49, "learning_rate": 9.647442381189273e-07, "loss": 0.0004, "step": 1003 }, { "epoch": 0.49, "logps_train/chosen": -58.6911735534668, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -228.00547790527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4277576804161072, "rewards_train/margins": 10.404575765132904, "rewards_train/rejected": -9.976818084716797, "step": 1003 }, { "epoch": 0.49, "learning_rate": 9.646440677243387e-07, "loss": 0.0004, "step": 1004 }, { "epoch": 0.49, "logps_train/chosen": -60.07696533203125, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -219.0223388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.343572735786438, "rewards_train/margins": 10.086187481880188, "rewards_train/rejected": -9.74261474609375, "step": 1004 }, { "epoch": 0.49, "learning_rate": 9.645437604424724e-07, "loss": 0.001, "step": 1005 }, { "epoch": 0.49, "logps_train/chosen": -63.314292907714844, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -226.21624755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05535462498664856, "rewards_train/margins": 10.10308501124382, "rewards_train/rejected": -10.158439636230469, "step": 1005 }, { "epoch": 0.5, "learning_rate": 9.644433163028797e-07, "loss": 0.0009, "step": 1006 }, { "epoch": 0.5, "logps_train/chosen": -61.66695785522461, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -226.13238525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02768883854150772, "rewards_train/margins": 10.24190316349268, "rewards_train/rejected": -10.214214324951172, "step": 1006 }, { "epoch": 0.5, "learning_rate": 9.64342735335152e-07, "loss": 0.0005, "step": 1007 }, { "epoch": 0.5, "logps_train/chosen": -63.727657318115234, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -232.22189331054688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06951974332332611, "rewards_train/margins": 10.40577195584774, "rewards_train/rejected": -10.336252212524414, "step": 1007 }, { "epoch": 0.5, "learning_rate": 9.642420175689214e-07, "loss": 0.0004, "step": 1008 }, { "epoch": 0.5, "logps_train/chosen": -64.99847412109375, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -233.5648956298828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05380242317914963, "rewards_train/margins": 10.552882261574268, "rewards_train/rejected": -10.606684684753418, "step": 1008 }, { "epoch": 0.5, "learning_rate": 9.641411630338597e-07, "loss": 0.0008, "step": 1009 }, { "epoch": 0.5, "logps_train/chosen": -62.186153411865234, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -231.98162841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.17923635244369507, "rewards_train/margins": 10.516463220119476, "rewards_train/rejected": -10.337226867675781, "step": 1009 }, { "epoch": 0.5, "learning_rate": 9.640401717596794e-07, "loss": 0.0029, "step": 1010 }, { "epoch": 0.5, "logps_train/chosen": -65.16708374023438, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -232.52980041503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07655297219753265, "rewards_train/margins": 10.476114854216576, "rewards_train/rejected": -10.399561882019043, "step": 1010 }, { "epoch": 0.5, "learning_rate": 9.639390437761334e-07, "loss": 0.0004, "step": 1011 }, { "epoch": 0.5, "logps_train/chosen": -66.95376586914062, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -232.06362915039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.041342586278915405, "rewards_train/margins": 10.669483989477158, "rewards_train/rejected": -10.628141403198242, "step": 1011 }, { "epoch": 0.5, "learning_rate": 9.638377791130144e-07, "loss": 0.0004, "step": 1012 }, { "epoch": 0.5, "logps_train/chosen": -64.40961456298828, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -232.08311462402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08353948593139648, "rewards_train/margins": 10.391275882720947, "rewards_train/rejected": -10.474815368652344, "step": 1012 }, { "epoch": 0.5, "learning_rate": 9.63736377800156e-07, "loss": 0.0003, "step": 1013 }, { "epoch": 0.5, "logps_train/chosen": -62.67546081542969, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -225.28781127929688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0449780598282814, "rewards_train/margins": 10.244121469557285, "rewards_train/rejected": -10.199143409729004, "step": 1013 }, { "epoch": 0.5, "learning_rate": 9.636348398674312e-07, "loss": 0.0012, "step": 1014 }, { "epoch": 0.5, "logps_train/chosen": -67.65216064453125, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -236.56863403320312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.039129335433244705, "rewards_train/margins": 10.906050760298967, "rewards_train/rejected": -10.866921424865723, "step": 1014 }, { "epoch": 0.5, "learning_rate": 9.635331653447544e-07, "loss": 0.0002, "step": 1015 }, { "epoch": 0.5, "logps_train/chosen": -64.88282775878906, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -244.45147705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.226268470287323, "rewards_train/margins": 11.408818900585175, "rewards_train/rejected": -11.182550430297852, "step": 1015 }, { "epoch": 0.5, "learning_rate": 9.634313542620792e-07, "loss": 0.0004, "step": 1016 }, { "epoch": 0.5, "logps_train/chosen": -63.76087188720703, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -234.44204711914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06063142418861389, "rewards_train/margins": 10.417727142572403, "rewards_train/rejected": -10.357095718383789, "step": 1016 }, { "epoch": 0.5, "learning_rate": 9.633294066493997e-07, "loss": 0.0007, "step": 1017 }, { "epoch": 0.5, "logps_train/chosen": -62.712257385253906, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -232.07826232910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26783689856529236, "rewards_train/margins": 10.575076431035995, "rewards_train/rejected": -10.307239532470703, "step": 1017 }, { "epoch": 0.5, "learning_rate": 9.63227322536751e-07, "loss": 0.0005, "step": 1018 }, { "epoch": 0.5, "logps_train/chosen": -63.833648681640625, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -225.9083251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11736766993999481, "rewards_train/margins": 10.318796083331108, "rewards_train/rejected": -10.201428413391113, "step": 1018 }, { "epoch": 0.5, "learning_rate": 9.631251019542068e-07, "loss": 0.0008, "step": 1019 }, { "epoch": 0.5, "logps_train/chosen": -60.35034942626953, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -116.8125, "logps_train/rejected": -216.8912811279297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16086335480213165, "rewards_train/margins": 10.170449689030647, "rewards_train/rejected": -10.009586334228516, "step": 1019 }, { "epoch": 0.5, "learning_rate": 9.630227449318828e-07, "loss": 0.0022, "step": 1020 }, { "epoch": 0.5, "logps_train/chosen": -62.87005615234375, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -231.47000122070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12295566499233246, "rewards_train/margins": 10.456383094191551, "rewards_train/rejected": -10.333427429199219, "step": 1020 }, { "epoch": 0.5, "learning_rate": 9.629202514999339e-07, "loss": 0.0003, "step": 1021 }, { "epoch": 0.5, "logps_train/chosen": -60.933319091796875, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -224.0347900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4153593182563782, "rewards_train/margins": 10.256145298480988, "rewards_train/rejected": -9.84078598022461, "step": 1021 }, { "epoch": 0.5, "learning_rate": 9.62817621688555e-07, "loss": 0.0004, "step": 1022 }, { "epoch": 0.5, "logps_train/chosen": -63.83403015136719, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -228.9820556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04457564651966095, "rewards_train/margins": 10.597224190831184, "rewards_train/rejected": -10.552648544311523, "step": 1022 }, { "epoch": 0.5, "learning_rate": 9.627148555279818e-07, "loss": 0.0008, "step": 1023 }, { "epoch": 0.5, "logps_train/chosen": -61.16929626464844, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -224.28311157226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15885160863399506, "rewards_train/margins": 10.191167816519737, "rewards_train/rejected": -10.032316207885742, "step": 1023 }, { "epoch": 0.5, "learning_rate": 9.6261195304849e-07, "loss": 0.0005, "step": 1024 }, { "epoch": 0.5, "logps_train/chosen": -60.57225799560547, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -220.9827880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3411141037940979, "rewards_train/margins": 10.282995283603668, "rewards_train/rejected": -9.94188117980957, "step": 1024 }, { "epoch": 0.5, "learning_rate": 9.625089142803953e-07, "loss": 0.0003, "step": 1025 }, { "epoch": 0.5, "logps_train/chosen": -63.78540802001953, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -227.9666290283203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09377386420965195, "rewards_train/margins": 10.32090475410223, "rewards_train/rejected": -10.227130889892578, "step": 1025 }, { "epoch": 0.51, "learning_rate": 9.624057392540532e-07, "loss": 0.0006, "step": 1026 }, { "epoch": 0.51, "logps_train/chosen": -64.66973114013672, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -230.74188232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.011309508234262466, "rewards_train/margins": 10.439830895513296, "rewards_train/rejected": -10.451140403747559, "step": 1026 }, { "epoch": 0.51, "learning_rate": 9.623024279998605e-07, "loss": 0.0004, "step": 1027 }, { "epoch": 0.51, "logps_train/chosen": -55.837825775146484, "logps_train/ref_chosen": -58.1875, "logps_train/ref_rejected": -115.3125, "logps_train/rejected": -210.58355712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23379555344581604, "rewards_train/margins": 9.763489156961441, "rewards_train/rejected": -9.529693603515625, "step": 1027 }, { "epoch": 0.51, "learning_rate": 9.621989805482526e-07, "loss": 0.0017, "step": 1028 }, { "epoch": 0.51, "logps_train/chosen": -63.26935577392578, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -232.19696044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1322440803050995, "rewards_train/margins": 10.592319458723068, "rewards_train/rejected": -10.460075378417969, "step": 1028 }, { "epoch": 0.51, "learning_rate": 9.620953969297066e-07, "loss": 0.0004, "step": 1029 }, { "epoch": 0.51, "logps_train/chosen": -63.83796691894531, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -226.27011108398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10957778245210648, "rewards_train/margins": 10.305568091571331, "rewards_train/rejected": -10.415145874023438, "step": 1029 }, { "epoch": 0.51, "learning_rate": 9.619916771747383e-07, "loss": 0.0006, "step": 1030 }, { "epoch": 0.51, "logps_train/chosen": -62.911903381347656, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -224.5323486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10070429742336273, "rewards_train/margins": 10.180404767394066, "rewards_train/rejected": -10.079700469970703, "step": 1030 }, { "epoch": 0.51, "learning_rate": 9.618878213139047e-07, "loss": 0.0018, "step": 1031 }, { "epoch": 0.51, "logps_train/chosen": -63.329219818115234, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -222.49386596679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.052941590547561646, "rewards_train/margins": 10.108797758817673, "rewards_train/rejected": -10.161739349365234, "step": 1031 }, { "epoch": 0.51, "learning_rate": 9.61783829377802e-07, "loss": 0.001, "step": 1032 }, { "epoch": 0.51, "logps_train/chosen": -64.69248962402344, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -232.9074249267578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.008165061473846436, "rewards_train/margins": 10.374569237232208, "rewards_train/rejected": -10.382734298706055, "step": 1032 }, { "epoch": 0.51, "learning_rate": 9.616797013970674e-07, "loss": 0.0034, "step": 1033 }, { "epoch": 0.51, "logps_train/chosen": -65.28230285644531, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -236.088623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.037116650491952896, "rewards_train/margins": 10.783514376729727, "rewards_train/rejected": -10.82063102722168, "step": 1033 }, { "epoch": 0.51, "learning_rate": 9.615754374023773e-07, "loss": 0.0003, "step": 1034 }, { "epoch": 0.51, "logps_train/chosen": -64.41887664794922, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -235.84185791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1385316550731659, "rewards_train/margins": 11.011095970869064, "rewards_train/rejected": -10.872564315795898, "step": 1034 }, { "epoch": 0.51, "learning_rate": 9.614710374244487e-07, "loss": 0.0013, "step": 1035 }, { "epoch": 0.51, "logps_train/chosen": -60.97425842285156, "logps_train/ref_chosen": -60.40625, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -223.98114013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.056727584451436996, "rewards_train/margins": 10.241778198629618, "rewards_train/rejected": -10.298505783081055, "step": 1035 }, { "epoch": 0.51, "learning_rate": 9.61366501494039e-07, "loss": 0.0006, "step": 1036 }, { "epoch": 0.51, "logps_train/chosen": -61.90428161621094, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -227.12771606445312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38686710596084595, "rewards_train/margins": 10.559992372989655, "rewards_train/rejected": -10.173125267028809, "step": 1036 }, { "epoch": 0.51, "learning_rate": 9.612618296419442e-07, "loss": 0.0019, "step": 1037 }, { "epoch": 0.51, "logps_train/chosen": -62.49864196777344, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -226.93795776367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2402239590883255, "rewards_train/margins": 10.351695135235786, "rewards_train/rejected": -10.111471176147461, "step": 1037 }, { "epoch": 0.51, "learning_rate": 9.61157021899002e-07, "loss": 0.0005, "step": 1038 }, { "epoch": 0.51, "logps_train/chosen": -61.73992156982422, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -226.76023864746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16582724452018738, "rewards_train/margins": 10.636919468641281, "rewards_train/rejected": -10.471092224121094, "step": 1038 }, { "epoch": 0.51, "learning_rate": 9.610520782960897e-07, "loss": 0.0006, "step": 1039 }, { "epoch": 0.51, "logps_train/chosen": -58.67992401123047, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -220.60369873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2580327093601227, "rewards_train/margins": 10.239496141672134, "rewards_train/rejected": -9.981463432312012, "step": 1039 }, { "epoch": 0.51, "learning_rate": 9.60946998864124e-07, "loss": 0.0015, "step": 1040 }, { "epoch": 0.51, "logps_train/chosen": -64.27899169921875, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -241.22679138183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.29710066318511963, "rewards_train/margins": 11.380962014198303, "rewards_train/rejected": -11.083861351013184, "step": 1040 }, { "epoch": 0.51, "learning_rate": 9.608417836340618e-07, "loss": 0.0002, "step": 1041 }, { "epoch": 0.51, "logps_train/chosen": -60.664207458496094, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -225.10450744628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23074743151664734, "rewards_train/margins": 10.516148775815964, "rewards_train/rejected": -10.285401344299316, "step": 1041 }, { "epoch": 0.51, "learning_rate": 9.607364326369008e-07, "loss": 0.0007, "step": 1042 }, { "epoch": 0.51, "logps_train/chosen": -63.341697692871094, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -233.874755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24742186069488525, "rewards_train/margins": 10.724838852882385, "rewards_train/rejected": -10.4774169921875, "step": 1042 }, { "epoch": 0.51, "learning_rate": 9.606309459036775e-07, "loss": 0.0006, "step": 1043 }, { "epoch": 0.51, "logps_train/chosen": -63.227962493896484, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -230.74282836914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03364892303943634, "rewards_train/margins": 10.599386647343636, "rewards_train/rejected": -10.5657377243042, "step": 1043 }, { "epoch": 0.51, "learning_rate": 9.605253234654696e-07, "loss": 0.0007, "step": 1044 }, { "epoch": 0.51, "logps_train/chosen": -63.41537094116211, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -223.5428009033203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.04056030884385109, "rewards_train/margins": 10.102929528802633, "rewards_train/rejected": -10.143489837646484, "step": 1044 }, { "epoch": 0.51, "learning_rate": 9.604195653533937e-07, "loss": 0.0031, "step": 1045 }, { "epoch": 0.51, "logps_train/chosen": -65.98336029052734, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -230.51663208007812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17441019415855408, "rewards_train/margins": 10.567584663629532, "rewards_train/rejected": -10.741994857788086, "step": 1045 }, { "epoch": 0.52, "learning_rate": 9.603136715986072e-07, "loss": 0.002, "step": 1046 }, { "epoch": 0.52, "logps_train/chosen": -63.061492919921875, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -233.2791748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.29043301939964294, "rewards_train/margins": 10.80819520354271, "rewards_train/rejected": -10.517762184143066, "step": 1046 }, { "epoch": 0.52, "learning_rate": 9.602076422323067e-07, "loss": 0.0001, "step": 1047 }, { "epoch": 0.52, "logps_train/chosen": -59.917701721191406, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -230.65768432617188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.43777039647102356, "rewards_train/margins": 10.80178216099739, "rewards_train/rejected": -10.364011764526367, "step": 1047 }, { "epoch": 0.52, "learning_rate": 9.601014772857295e-07, "loss": 0.0001, "step": 1048 }, { "epoch": 0.52, "logps_train/chosen": -59.78086471557617, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -117.5625, "logps_train/rejected": -215.4367218017578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.200331449508667, "rewards_train/margins": 9.990194082260132, "rewards_train/rejected": -9.789862632751465, "step": 1048 }, { "epoch": 0.52, "learning_rate": 9.599951767901526e-07, "loss": 0.0025, "step": 1049 }, { "epoch": 0.52, "logps_train/chosen": -63.88404846191406, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -237.46522521972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11486668884754181, "rewards_train/margins": 11.200452283024788, "rewards_train/rejected": -11.085585594177246, "step": 1049 }, { "epoch": 0.52, "learning_rate": 9.598887407768926e-07, "loss": 0.0005, "step": 1050 }, { "epoch": 0.52, "logps_train/chosen": -63.85972595214844, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -235.359619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.20728877186775208, "rewards_train/margins": 10.835780173540115, "rewards_train/rejected": -10.628491401672363, "step": 1050 }, { "epoch": 0.52, "learning_rate": 9.597821692773062e-07, "loss": 0.0014, "step": 1051 }, { "epoch": 0.52, "logps_train/chosen": -65.02678680419922, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -232.91871643066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05503606051206589, "rewards_train/margins": 10.579427234828472, "rewards_train/rejected": -10.524391174316406, "step": 1051 }, { "epoch": 0.52, "learning_rate": 9.596754623227905e-07, "loss": 0.0012, "step": 1052 }, { "epoch": 0.52, "logps_train/chosen": -63.040122985839844, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -231.29403686523438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11449383199214935, "rewards_train/margins": 10.550050243735313, "rewards_train/rejected": -10.435556411743164, "step": 1052 }, { "epoch": 0.52, "learning_rate": 9.595686199447817e-07, "loss": 0.001, "step": 1053 }, { "epoch": 0.52, "logps_train/chosen": -62.13719177246094, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -219.70175170898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19560709595680237, "rewards_train/margins": 10.134433656930923, "rewards_train/rejected": -9.938826560974121, "step": 1053 }, { "epoch": 0.52, "learning_rate": 9.594616421747563e-07, "loss": 0.0011, "step": 1054 }, { "epoch": 0.52, "logps_train/chosen": -63.92317199707031, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -226.756591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28668686747550964, "rewards_train/margins": 10.518204659223557, "rewards_train/rejected": -10.231517791748047, "step": 1054 }, { "epoch": 0.52, "learning_rate": 9.59354529044231e-07, "loss": 0.0002, "step": 1055 }, { "epoch": 0.52, "logps_train/chosen": -61.147560119628906, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -234.31625366210938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.42528292536735535, "rewards_train/margins": 11.084056347608566, "rewards_train/rejected": -10.658773422241211, "step": 1055 }, { "epoch": 0.52, "learning_rate": 9.592472805847617e-07, "loss": 0.0006, "step": 1056 }, { "epoch": 0.52, "logps_train/chosen": -64.34980773925781, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -231.14137268066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01897420361638069, "rewards_train/margins": 10.712603468447924, "rewards_train/rejected": -10.693629264831543, "step": 1056 }, { "epoch": 0.52, "learning_rate": 9.591398968279448e-07, "loss": 0.0014, "step": 1057 }, { "epoch": 0.52, "logps_train/chosen": -66.0998764038086, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -231.7212677001953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1410418450832367, "rewards_train/margins": 10.5949048101902, "rewards_train/rejected": -10.735946655273438, "step": 1057 }, { "epoch": 0.52, "learning_rate": 9.59032377805416e-07, "loss": 0.0003, "step": 1058 }, { "epoch": 0.52, "logps_train/chosen": -64.98731994628906, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -228.77598571777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12168122828006744, "rewards_train/margins": 10.352012619376183, "rewards_train/rejected": -10.47369384765625, "step": 1058 }, { "epoch": 0.52, "learning_rate": 9.589247235488511e-07, "loss": 0.0007, "step": 1059 }, { "epoch": 0.52, "logps_train/chosen": -64.70653533935547, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -234.03628540039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05161197483539581, "rewards_train/margins": 10.663247182965279, "rewards_train/rejected": -10.611635208129883, "step": 1059 }, { "epoch": 0.52, "learning_rate": 9.588169340899662e-07, "loss": 0.0004, "step": 1060 }, { "epoch": 0.52, "logps_train/chosen": -61.24702453613281, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -231.34144592285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.195927232503891, "rewards_train/margins": 10.787786096334457, "rewards_train/rejected": -10.591858863830566, "step": 1060 }, { "epoch": 0.52, "learning_rate": 9.587090094605163e-07, "loss": 0.0004, "step": 1061 }, { "epoch": 0.52, "logps_train/chosen": -64.70350646972656, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -229.96975708007812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11402466893196106, "rewards_train/margins": 10.688247233629227, "rewards_train/rejected": -10.574222564697266, "step": 1061 }, { "epoch": 0.52, "learning_rate": 9.58600949692297e-07, "loss": 0.0004, "step": 1062 }, { "epoch": 0.52, "logps_train/chosen": -59.15875244140625, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -219.425048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2111021727323532, "rewards_train/margins": 10.139202758669853, "rewards_train/rejected": -9.9281005859375, "step": 1062 }, { "epoch": 0.52, "learning_rate": 9.584927548171435e-07, "loss": 0.0009, "step": 1063 }, { "epoch": 0.52, "logps_train/chosen": -60.61376190185547, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -116.0, "logps_train/rejected": -210.12240600585938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19169963896274567, "rewards_train/margins": 9.605794563889503, "rewards_train/rejected": -9.414094924926758, "step": 1063 }, { "epoch": 0.52, "learning_rate": 9.583844248669304e-07, "loss": 0.0016, "step": 1064 }, { "epoch": 0.52, "logps_train/chosen": -60.85150146484375, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -119.9375, "logps_train/rejected": -223.0569610595703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09019173681735992, "rewards_train/margins": 10.403406992554665, "rewards_train/rejected": -10.313215255737305, "step": 1064 }, { "epoch": 0.52, "learning_rate": 9.58275959873573e-07, "loss": 0.001, "step": 1065 }, { "epoch": 0.52, "logps_train/chosen": -62.580169677734375, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -224.017822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07440514862537384, "rewards_train/margins": 10.198552563786507, "rewards_train/rejected": -10.124147415161133, "step": 1065 }, { "epoch": 0.52, "learning_rate": 9.581673598690252e-07, "loss": 0.0015, "step": 1066 }, { "epoch": 0.52, "logps_train/chosen": -60.28066635131836, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -221.80055236816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21187463402748108, "rewards_train/margins": 10.19495740532875, "rewards_train/rejected": -9.98308277130127, "step": 1066 }, { "epoch": 0.53, "learning_rate": 9.580586248852818e-07, "loss": 0.0011, "step": 1067 }, { "epoch": 0.53, "logps_train/chosen": -66.2328109741211, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -240.13397216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.040956731885671616, "rewards_train/margins": 11.013503793627024, "rewards_train/rejected": -11.054460525512695, "step": 1067 }, { "epoch": 0.53, "learning_rate": 9.579497549543764e-07, "loss": 0.0007, "step": 1068 }, { "epoch": 0.53, "logps_train/chosen": -65.3631591796875, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -232.8603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.061754822731018066, "rewards_train/margins": 10.514367461204529, "rewards_train/rejected": -10.576122283935547, "step": 1068 }, { "epoch": 0.53, "learning_rate": 9.578407501083833e-07, "loss": 0.0004, "step": 1069 }, { "epoch": 0.53, "logps_train/chosen": -66.76187896728516, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -238.77279663085938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1824059784412384, "rewards_train/margins": 11.143378764390945, "rewards_train/rejected": -10.960972785949707, "step": 1069 }, { "epoch": 0.53, "learning_rate": 9.577316103794158e-07, "loss": 0.0002, "step": 1070 }, { "epoch": 0.53, "logps_train/chosen": -63.446468353271484, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -227.72215270996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06643696129322052, "rewards_train/margins": 10.62214870750904, "rewards_train/rejected": -10.55571174621582, "step": 1070 }, { "epoch": 0.53, "learning_rate": 9.57622335799627e-07, "loss": 0.0011, "step": 1071 }, { "epoch": 0.53, "logps_train/chosen": -59.49119567871094, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -227.9575958251953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.395778089761734, "rewards_train/margins": 10.799203306436539, "rewards_train/rejected": -10.403425216674805, "step": 1071 }, { "epoch": 0.53, "learning_rate": 9.575129264012103e-07, "loss": 0.0005, "step": 1072 }, { "epoch": 0.53, "logps_train/chosen": -61.95475769042969, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -227.09841918945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2643631100654602, "rewards_train/margins": 10.371324360370636, "rewards_train/rejected": -10.106961250305176, "step": 1072 }, { "epoch": 0.53, "learning_rate": 9.574033822163982e-07, "loss": 0.0006, "step": 1073 }, { "epoch": 0.53, "logps_train/chosen": -61.548973083496094, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -224.17669677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.009555463679134846, "rewards_train/margins": 10.206813459284604, "rewards_train/rejected": -10.197257995605469, "step": 1073 }, { "epoch": 0.53, "learning_rate": 9.572937032774635e-07, "loss": 0.0003, "step": 1074 }, { "epoch": 0.53, "logps_train/chosen": -65.92372131347656, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -235.04348754882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22816264629364014, "rewards_train/margins": 10.53140938282013, "rewards_train/rejected": -10.75957202911377, "step": 1074 }, { "epoch": 0.53, "learning_rate": 9.57183889616718e-07, "loss": 0.0024, "step": 1075 }, { "epoch": 0.53, "logps_train/chosen": -64.50826263427734, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -243.67294311523438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.018501900136470795, "rewards_train/margins": 11.181898452341557, "rewards_train/rejected": -11.200400352478027, "step": 1075 }, { "epoch": 0.53, "learning_rate": 9.570739412665133e-07, "loss": 0.0002, "step": 1076 }, { "epoch": 0.53, "logps_train/chosen": -64.74122619628906, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -235.05264282226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.023240579292178154, "rewards_train/margins": 11.038173211738467, "rewards_train/rejected": -11.014932632446289, "step": 1076 }, { "epoch": 0.53, "learning_rate": 9.569638582592417e-07, "loss": 0.0002, "step": 1077 }, { "epoch": 0.53, "logps_train/chosen": -65.64334869384766, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -133.125, "logps_train/rejected": -241.0719757080078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11237427592277527, "rewards_train/margins": 10.908829659223557, "rewards_train/rejected": -10.796455383300781, "step": 1077 }, { "epoch": 0.53, "learning_rate": 9.568536406273338e-07, "loss": 0.0002, "step": 1078 }, { "epoch": 0.53, "logps_train/chosen": -60.69788360595703, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -219.6110076904297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13660794496536255, "rewards_train/margins": 10.083743870258331, "rewards_train/rejected": -9.947135925292969, "step": 1078 }, { "epoch": 0.53, "learning_rate": 9.567432884032607e-07, "loss": 0.0011, "step": 1079 }, { "epoch": 0.53, "logps_train/chosen": -63.33281707763672, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -236.6922607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.025165535509586334, "rewards_train/margins": 10.942779518663883, "rewards_train/rejected": -10.917613983154297, "step": 1079 }, { "epoch": 0.53, "learning_rate": 9.566328016195328e-07, "loss": 0.0034, "step": 1080 }, { "epoch": 0.53, "logps_train/chosen": -64.96775817871094, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -229.38235473632812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13591191172599792, "rewards_train/margins": 10.288455992937088, "rewards_train/rejected": -10.424367904663086, "step": 1080 }, { "epoch": 0.53, "learning_rate": 9.565221803087002e-07, "loss": 0.0011, "step": 1081 }, { "epoch": 0.53, "logps_train/chosen": -62.41670227050781, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -227.92295837402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1661423295736313, "rewards_train/margins": 10.611027583479881, "rewards_train/rejected": -10.44488525390625, "step": 1081 }, { "epoch": 0.53, "learning_rate": 9.56411424503353e-07, "loss": 0.0006, "step": 1082 }, { "epoch": 0.53, "logps_train/chosen": -59.30944061279297, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -229.78521728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28419244289398193, "rewards_train/margins": 10.742401480674744, "rewards_train/rejected": -10.458209037780762, "step": 1082 }, { "epoch": 0.53, "learning_rate": 9.563005342361203e-07, "loss": 0.0006, "step": 1083 }, { "epoch": 0.53, "logps_train/chosen": -63.131103515625, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -230.11541748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.043017297983169556, "rewards_train/margins": 10.685515493154526, "rewards_train/rejected": -10.728532791137695, "step": 1083 }, { "epoch": 0.53, "learning_rate": 9.56189509539671e-07, "loss": 0.0012, "step": 1084 }, { "epoch": 0.53, "logps_train/chosen": -63.843048095703125, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -238.8095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14098793268203735, "rewards_train/margins": 10.896946489810944, "rewards_train/rejected": -10.755958557128906, "step": 1084 }, { "epoch": 0.53, "learning_rate": 9.560783504467142e-07, "loss": 0.0003, "step": 1085 }, { "epoch": 0.53, "logps_train/chosen": -66.06220245361328, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -234.3602294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14998085796833038, "rewards_train/margins": 10.95070107281208, "rewards_train/rejected": -10.80072021484375, "step": 1085 }, { "epoch": 0.53, "learning_rate": 9.559670569899979e-07, "loss": 0.0003, "step": 1086 }, { "epoch": 0.53, "logps_train/chosen": -62.90443801879883, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -233.79217529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15882395207881927, "rewards_train/margins": 11.037259086966515, "rewards_train/rejected": -10.878435134887695, "step": 1086 }, { "epoch": 0.54, "learning_rate": 9.558556292023095e-07, "loss": 0.0003, "step": 1087 }, { "epoch": 0.54, "logps_train/chosen": -61.98513412475586, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -232.27322387695312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09841015189886093, "rewards_train/margins": 10.713329814374447, "rewards_train/rejected": -10.614919662475586, "step": 1087 }, { "epoch": 0.54, "learning_rate": 9.557440671164769e-07, "loss": 0.0017, "step": 1088 }, { "epoch": 0.54, "logps_train/chosen": -62.42052459716797, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -230.67242431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1181039959192276, "rewards_train/margins": 10.679976478219032, "rewards_train/rejected": -10.561872482299805, "step": 1088 }, { "epoch": 0.54, "learning_rate": 9.55632370765367e-07, "loss": 0.001, "step": 1089 }, { "epoch": 0.54, "logps_train/chosen": -64.34732055664062, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -242.68324279785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.012515205889940262, "rewards_train/margins": 11.230515342205763, "rewards_train/rejected": -11.243030548095703, "step": 1089 }, { "epoch": 0.54, "learning_rate": 9.55520540181886e-07, "loss": 0.0004, "step": 1090 }, { "epoch": 0.54, "logps_train/chosen": -65.74179077148438, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -241.60476684570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14789196848869324, "rewards_train/margins": 11.12999913096428, "rewards_train/rejected": -10.982107162475586, "step": 1090 }, { "epoch": 0.54, "learning_rate": 9.554085753989803e-07, "loss": 0.0001, "step": 1091 }, { "epoch": 0.54, "logps_train/chosen": -64.34773254394531, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -235.91567993164062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0028398334980010986, "rewards_train/margins": 10.934334963560104, "rewards_train/rejected": -10.937174797058105, "step": 1091 }, { "epoch": 0.54, "learning_rate": 9.552964764496353e-07, "loss": 0.0001, "step": 1092 }, { "epoch": 0.54, "logps_train/chosen": -61.546932220458984, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -233.1671905517578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27079495787620544, "rewards_train/margins": 10.91339311003685, "rewards_train/rejected": -10.642598152160645, "step": 1092 }, { "epoch": 0.54, "learning_rate": 9.55184243366876e-07, "loss": 0.0002, "step": 1093 }, { "epoch": 0.54, "logps_train/chosen": -63.89363098144531, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -232.333984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.014201164245605469, "rewards_train/margins": 10.731878280639648, "rewards_train/rejected": -10.717677116394043, "step": 1093 }, { "epoch": 0.54, "learning_rate": 9.55071876183767e-07, "loss": 0.0006, "step": 1094 }, { "epoch": 0.54, "logps_train/chosen": -64.29857635498047, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -238.58053588867188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05998604744672775, "rewards_train/margins": 11.036839418113232, "rewards_train/rejected": -10.976853370666504, "step": 1094 }, { "epoch": 0.54, "learning_rate": 9.549593749334127e-07, "loss": 0.0004, "step": 1095 }, { "epoch": 0.54, "logps_train/chosen": -61.74565505981445, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -227.8038787841797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.20976057648658752, "rewards_train/margins": 10.556457430124283, "rewards_train/rejected": -10.346696853637695, "step": 1095 }, { "epoch": 0.54, "learning_rate": 9.548467396489566e-07, "loss": 0.0002, "step": 1096 }, { "epoch": 0.54, "logps_train/chosen": -63.056312561035156, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -230.46597290039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2189294397830963, "rewards_train/margins": 10.659470707178116, "rewards_train/rejected": -10.44054126739502, "step": 1096 }, { "epoch": 0.54, "learning_rate": 9.547339703635816e-07, "loss": 0.0004, "step": 1097 }, { "epoch": 0.54, "logps_train/chosen": -65.16686248779297, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -234.8914337158203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03409499675035477, "rewards_train/margins": 10.801558680832386, "rewards_train/rejected": -10.767463684082031, "step": 1097 }, { "epoch": 0.54, "learning_rate": 9.54621067110511e-07, "loss": 0.0005, "step": 1098 }, { "epoch": 0.54, "logps_train/chosen": -62.76832580566406, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -229.96563720703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15168309211730957, "rewards_train/margins": 10.615630388259888, "rewards_train/rejected": -10.463947296142578, "step": 1098 }, { "epoch": 0.54, "learning_rate": 9.54508029923006e-07, "loss": 0.0008, "step": 1099 }, { "epoch": 0.54, "logps_train/chosen": -61.1656494140625, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -232.51315307617188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34476345777511597, "rewards_train/margins": 11.007114112377167, "rewards_train/rejected": -10.66235065460205, "step": 1099 }, { "epoch": 0.54, "learning_rate": 9.543948588343684e-07, "loss": 0.0006, "step": 1100 }, { "epoch": 0.54, "logps_train/chosen": -59.80779266357422, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -229.9407196044922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.404620885848999, "rewards_train/margins": 10.967052221298218, "rewards_train/rejected": -10.562431335449219, "step": 1100 }, { "epoch": 0.54, "learning_rate": 9.542815538779395e-07, "loss": 0.0017, "step": 1101 }, { "epoch": 0.54, "logps_train/chosen": -63.933555603027344, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -233.87149047851562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14370501041412354, "rewards_train/margins": 11.041497826576233, "rewards_train/rejected": -10.89779281616211, "step": 1101 }, { "epoch": 0.54, "learning_rate": 9.54168115087099e-07, "loss": 0.0002, "step": 1102 }, { "epoch": 0.54, "logps_train/chosen": -65.5740966796875, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -233.9806671142578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23018741607666016, "rewards_train/margins": 10.998322486877441, "rewards_train/rejected": -10.768135070800781, "step": 1102 }, { "epoch": 0.54, "learning_rate": 9.540545424952675e-07, "loss": 0.0002, "step": 1103 }, { "epoch": 0.54, "logps_train/chosen": -64.78968811035156, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -239.64601135253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09588514268398285, "rewards_train/margins": 11.383631572127342, "rewards_train/rejected": -11.28774642944336, "step": 1103 }, { "epoch": 0.54, "learning_rate": 9.53940836135904e-07, "loss": 0.0035, "step": 1104 }, { "epoch": 0.54, "logps_train/chosen": -63.60489273071289, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -238.48690795898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1302822232246399, "rewards_train/margins": 11.085953533649445, "rewards_train/rejected": -10.955671310424805, "step": 1104 }, { "epoch": 0.54, "learning_rate": 9.538269960425068e-07, "loss": 0.0002, "step": 1105 }, { "epoch": 0.54, "logps_train/chosen": -64.25664520263672, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -241.39405822753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.38314902782440186, "rewards_train/margins": 11.268063426017761, "rewards_train/rejected": -10.88491439819336, "step": 1105 }, { "epoch": 0.54, "learning_rate": 9.537130222486146e-07, "loss": 0.0005, "step": 1106 }, { "epoch": 0.54, "logps_train/chosen": -65.63490295410156, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -232.45542907714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07786742597818375, "rewards_train/margins": 10.94103804975748, "rewards_train/rejected": -10.863170623779297, "step": 1106 }, { "epoch": 0.55, "learning_rate": 9.535989147878043e-07, "loss": 0.0005, "step": 1107 }, { "epoch": 0.55, "logps_train/chosen": -61.970603942871094, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -224.95887756347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.052749015390872955, "rewards_train/margins": 10.317454002797604, "rewards_train/rejected": -10.370203018188477, "step": 1107 }, { "epoch": 0.55, "learning_rate": 9.53484673693693e-07, "loss": 0.0005, "step": 1108 }, { "epoch": 0.55, "logps_train/chosen": -63.398765563964844, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -226.599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19088515639305115, "rewards_train/margins": 10.52848395705223, "rewards_train/rejected": -10.33759880065918, "step": 1108 }, { "epoch": 0.55, "learning_rate": 9.533702989999368e-07, "loss": 0.0004, "step": 1109 }, { "epoch": 0.55, "logps_train/chosen": -61.92168045043945, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -224.55238342285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.305683434009552, "rewards_train/margins": 10.518490135669708, "rewards_train/rejected": -10.212806701660156, "step": 1109 }, { "epoch": 0.55, "learning_rate": 9.532557907402312e-07, "loss": 0.0006, "step": 1110 }, { "epoch": 0.55, "logps_train/chosen": -64.94735717773438, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -228.9637908935547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04476594924926758, "rewards_train/margins": 10.514289379119873, "rewards_train/rejected": -10.469523429870605, "step": 1110 }, { "epoch": 0.55, "learning_rate": 9.531411489483113e-07, "loss": 0.0009, "step": 1111 }, { "epoch": 0.55, "logps_train/chosen": -58.14286804199219, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -220.82659912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.41496121883392334, "rewards_train/margins": 10.539614081382751, "rewards_train/rejected": -10.124652862548828, "step": 1111 }, { "epoch": 0.55, "learning_rate": 9.530263736579511e-07, "loss": 0.0005, "step": 1112 }, { "epoch": 0.55, "logps_train/chosen": -62.05200958251953, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -229.5445556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14340728521347046, "rewards_train/margins": 10.6553835272789, "rewards_train/rejected": -10.51197624206543, "step": 1112 }, { "epoch": 0.55, "learning_rate": 9.529114649029644e-07, "loss": 0.0006, "step": 1113 }, { "epoch": 0.55, "logps_train/chosen": -58.443397521972656, "logps_train/ref_chosen": -60.75, "logps_train/ref_rejected": -118.75, "logps_train/rejected": -216.6830596923828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.229635089635849, "rewards_train/margins": 10.020988315343857, "rewards_train/rejected": -9.791353225708008, "step": 1113 }, { "epoch": 0.55, "learning_rate": 9.52796422717204e-07, "loss": 0.0021, "step": 1114 }, { "epoch": 0.55, "logps_train/chosen": -64.11470794677734, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -231.66073608398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02842174470424652, "rewards_train/margins": 10.668030127882957, "rewards_train/rejected": -10.639608383178711, "step": 1114 }, { "epoch": 0.55, "learning_rate": 9.526812471345623e-07, "loss": 0.0009, "step": 1115 }, { "epoch": 0.55, "logps_train/chosen": -63.253379821777344, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -235.63433837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11655654013156891, "rewards_train/margins": 10.724910154938698, "rewards_train/rejected": -10.608353614807129, "step": 1115 }, { "epoch": 0.55, "learning_rate": 9.525659381889704e-07, "loss": 0.0004, "step": 1116 }, { "epoch": 0.55, "logps_train/chosen": -61.644290924072266, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -228.77450561523438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2892822027206421, "rewards_train/margins": 10.814631819725037, "rewards_train/rejected": -10.525349617004395, "step": 1116 }, { "epoch": 0.55, "learning_rate": 9.524504959143991e-07, "loss": 0.001, "step": 1117 }, { "epoch": 0.55, "logps_train/chosen": -63.18159866333008, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -225.703857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.03212480992078781, "rewards_train/margins": 10.551200576126575, "rewards_train/rejected": -10.583325386047363, "step": 1117 }, { "epoch": 0.55, "learning_rate": 9.52334920344859e-07, "loss": 0.0005, "step": 1118 }, { "epoch": 0.55, "logps_train/chosen": -62.651519775390625, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -234.64364624023438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0703214555978775, "rewards_train/margins": 10.796698942780495, "rewards_train/rejected": -10.726377487182617, "step": 1118 }, { "epoch": 0.55, "learning_rate": 9.52219211514399e-07, "loss": 0.0004, "step": 1119 }, { "epoch": 0.55, "logps_train/chosen": -63.010128021240234, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -229.9600830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.381823867559433, "rewards_train/margins": 10.795167297124863, "rewards_train/rejected": -10.41334342956543, "step": 1119 }, { "epoch": 0.55, "learning_rate": 9.521033694571078e-07, "loss": 0.0012, "step": 1120 }, { "epoch": 0.55, "logps_train/chosen": -63.61444091796875, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -238.076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10745242238044739, "rewards_train/margins": 11.07776454091072, "rewards_train/rejected": -10.970312118530273, "step": 1120 }, { "epoch": 0.55, "learning_rate": 9.519873942071133e-07, "loss": 0.0004, "step": 1121 }, { "epoch": 0.55, "logps_train/chosen": -62.31708908081055, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -239.75222778320312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21985335648059845, "rewards_train/margins": 11.423298791050911, "rewards_train/rejected": -11.203445434570312, "step": 1121 }, { "epoch": 0.55, "learning_rate": 9.518712857985824e-07, "loss": 0.0037, "step": 1122 }, { "epoch": 0.55, "logps_train/chosen": -61.5065803527832, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -234.8422393798828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3016371726989746, "rewards_train/margins": 11.095089435577393, "rewards_train/rejected": -10.793452262878418, "step": 1122 }, { "epoch": 0.55, "learning_rate": 9.517550442657219e-07, "loss": 0.0004, "step": 1123 }, { "epoch": 0.55, "logps_train/chosen": -63.3712043762207, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -239.4890899658203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.325428307056427, "rewards_train/margins": 11.130684196949005, "rewards_train/rejected": -10.805255889892578, "step": 1123 }, { "epoch": 0.55, "learning_rate": 9.516386696427769e-07, "loss": 0.0006, "step": 1124 }, { "epoch": 0.55, "logps_train/chosen": -63.16741180419922, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -243.9705047607422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3039374351501465, "rewards_train/margins": 11.601035594940186, "rewards_train/rejected": -11.297098159790039, "step": 1124 }, { "epoch": 0.55, "learning_rate": 9.515221619640322e-07, "loss": 0.0002, "step": 1125 }, { "epoch": 0.55, "logps_train/chosen": -64.00039672851562, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -234.4420166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01797812059521675, "rewards_train/margins": 10.695481706410646, "rewards_train/rejected": -10.67750358581543, "step": 1125 }, { "epoch": 0.55, "learning_rate": 9.514055212638119e-07, "loss": 0.0005, "step": 1126 }, { "epoch": 0.55, "logps_train/chosen": -66.93673706054688, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -239.01791381835938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24059756100177765, "rewards_train/margins": 10.84996335208416, "rewards_train/rejected": -11.090560913085938, "step": 1126 }, { "epoch": 0.55, "learning_rate": 9.512887475764789e-07, "loss": 0.0005, "step": 1127 }, { "epoch": 0.55, "logps_train/chosen": -62.11578369140625, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -233.53309631347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0956481397151947, "rewards_train/margins": 10.657991737127304, "rewards_train/rejected": -10.56234359741211, "step": 1127 }, { "epoch": 0.56, "learning_rate": 9.511718409364358e-07, "loss": 0.0004, "step": 1128 }, { "epoch": 0.56, "logps_train/chosen": -64.89695739746094, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -243.0778045654297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01977437734603882, "rewards_train/margins": 11.36451917886734, "rewards_train/rejected": -11.384293556213379, "step": 1128 }, { "epoch": 0.56, "learning_rate": 9.51054801378124e-07, "loss": 0.0006, "step": 1129 }, { "epoch": 0.56, "logps_train/chosen": -67.42234802246094, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -239.9952850341797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.014451146125793457, "rewards_train/margins": 10.859685778617859, "rewards_train/rejected": -10.874136924743652, "step": 1129 }, { "epoch": 0.56, "learning_rate": 9.50937628936024e-07, "loss": 0.0003, "step": 1130 }, { "epoch": 0.56, "logps_train/chosen": -65.94861602783203, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -223.13754272460938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28231281042099, "rewards_train/margins": 10.05917602777481, "rewards_train/rejected": -10.3414888381958, "step": 1130 }, { "epoch": 0.56, "learning_rate": 9.508203236446558e-07, "loss": 0.0015, "step": 1131 }, { "epoch": 0.56, "logps_train/chosen": -64.27481079101562, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -229.986572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13153395056724548, "rewards_train/margins": 10.722982078790665, "rewards_train/rejected": -10.85451602935791, "step": 1131 }, { "epoch": 0.56, "learning_rate": 9.507028855385781e-07, "loss": 0.0004, "step": 1132 }, { "epoch": 0.56, "logps_train/chosen": -66.07756042480469, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -233.1622314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.028019718825817108, "rewards_train/margins": 10.71144599467516, "rewards_train/rejected": -10.739465713500977, "step": 1132 }, { "epoch": 0.56, "learning_rate": 9.505853146523893e-07, "loss": 0.0005, "step": 1133 }, { "epoch": 0.56, "logps_train/chosen": -62.60697937011719, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -230.45352172851562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26493650674819946, "rewards_train/margins": 10.880991995334625, "rewards_train/rejected": -10.616055488586426, "step": 1133 }, { "epoch": 0.56, "learning_rate": 9.504676110207261e-07, "loss": 0.0008, "step": 1134 }, { "epoch": 0.56, "logps_train/chosen": -61.43004608154297, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -236.43658447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.18653610348701477, "rewards_train/margins": 11.400700837373734, "rewards_train/rejected": -11.214164733886719, "step": 1134 }, { "epoch": 0.56, "learning_rate": 9.503497746782652e-07, "loss": 0.0006, "step": 1135 }, { "epoch": 0.56, "logps_train/chosen": -64.90605163574219, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -241.33192443847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.03840825706720352, "rewards_train/margins": 11.279452346265316, "rewards_train/rejected": -11.31786060333252, "step": 1135 }, { "epoch": 0.56, "learning_rate": 9.502318056597219e-07, "loss": 0.0003, "step": 1136 }, { "epoch": 0.56, "logps_train/chosen": -66.2090072631836, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -238.47860717773438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18232624232769012, "rewards_train/margins": 11.0519610196352, "rewards_train/rejected": -11.23428726196289, "step": 1136 }, { "epoch": 0.56, "learning_rate": 9.501137039998504e-07, "loss": 0.0004, "step": 1137 }, { "epoch": 0.56, "logps_train/chosen": -63.25052261352539, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -233.10922241210938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07636363059282303, "rewards_train/margins": 11.010723181068897, "rewards_train/rejected": -10.934359550476074, "step": 1137 }, { "epoch": 0.56, "learning_rate": 9.499954697334444e-07, "loss": 0.0007, "step": 1138 }, { "epoch": 0.56, "logps_train/chosen": -66.69009399414062, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -240.1603546142578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.04190996289253235, "rewards_train/margins": 11.183987826108932, "rewards_train/rejected": -11.225897789001465, "step": 1138 }, { "epoch": 0.56, "learning_rate": 9.498771028953369e-07, "loss": 0.0007, "step": 1139 }, { "epoch": 0.56, "logps_train/chosen": -64.89002990722656, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -244.2027587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01876063644886017, "rewards_train/margins": 11.677366212010384, "rewards_train/rejected": -11.658605575561523, "step": 1139 }, { "epoch": 0.56, "learning_rate": 9.497586035203989e-07, "loss": 0.0015, "step": 1140 }, { "epoch": 0.56, "logps_train/chosen": -64.1437759399414, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -235.05474853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.029270220547914505, "rewards_train/margins": 10.70115466043353, "rewards_train/rejected": -10.730424880981445, "step": 1140 }, { "epoch": 0.56, "learning_rate": 9.496399716435416e-07, "loss": 0.0007, "step": 1141 }, { "epoch": 0.56, "logps_train/chosen": -65.58897399902344, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -246.26162719726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.020811662077903748, "rewards_train/margins": 11.400955572724342, "rewards_train/rejected": -11.421767234802246, "step": 1141 }, { "epoch": 0.56, "learning_rate": 9.495212072997145e-07, "loss": 0.0005, "step": 1142 }, { "epoch": 0.56, "logps_train/chosen": -64.46686553955078, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -239.58168029785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11005149781703949, "rewards_train/margins": 11.157281264662743, "rewards_train/rejected": -11.047229766845703, "step": 1142 }, { "epoch": 0.56, "learning_rate": 9.494023105239065e-07, "loss": 0.0004, "step": 1143 }, { "epoch": 0.56, "logps_train/chosen": -65.8167495727539, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -243.0345458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18887679278850555, "rewards_train/margins": 11.064919784665108, "rewards_train/rejected": -11.253796577453613, "step": 1143 }, { "epoch": 0.56, "learning_rate": 9.492832813511453e-07, "loss": 0.0011, "step": 1144 }, { "epoch": 0.56, "logps_train/chosen": -65.28194427490234, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -235.1020050048828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06466855108737946, "rewards_train/margins": 10.877220258116722, "rewards_train/rejected": -10.941888809204102, "step": 1144 }, { "epoch": 0.56, "learning_rate": 9.49164119816498e-07, "loss": 0.0009, "step": 1145 }, { "epoch": 0.56, "logps_train/chosen": -64.88374328613281, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -230.35891723632812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14160582423210144, "rewards_train/margins": 10.715290516614914, "rewards_train/rejected": -10.573684692382812, "step": 1145 }, { "epoch": 0.56, "learning_rate": 9.490448259550699e-07, "loss": 0.0004, "step": 1146 }, { "epoch": 0.56, "logps_train/chosen": -64.83963775634766, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -234.22402954101562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.03332863003015518, "rewards_train/margins": 10.81720008701086, "rewards_train/rejected": -10.850528717041016, "step": 1146 }, { "epoch": 0.56, "learning_rate": 9.489253998020061e-07, "loss": 0.0004, "step": 1147 }, { "epoch": 0.56, "logps_train/chosen": -68.01261901855469, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -239.0877227783203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30599838495254517, "rewards_train/margins": 10.814395368099213, "rewards_train/rejected": -11.120393753051758, "step": 1147 }, { "epoch": 0.57, "learning_rate": 9.488058413924902e-07, "loss": 0.0003, "step": 1148 }, { "epoch": 0.57, "logps_train/chosen": -65.49202728271484, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -235.6201934814453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1049639880657196, "rewards_train/margins": 10.894849091768265, "rewards_train/rejected": -10.999813079833984, "step": 1148 }, { "epoch": 0.57, "learning_rate": 9.486861507617452e-07, "loss": 0.0004, "step": 1149 }, { "epoch": 0.57, "logps_train/chosen": -61.448829650878906, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -225.4963836669922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15287120640277863, "rewards_train/margins": 10.500458791851997, "rewards_train/rejected": -10.347587585449219, "step": 1149 }, { "epoch": 0.57, "learning_rate": 9.485663279450325e-07, "loss": 0.0005, "step": 1150 }, { "epoch": 0.57, "logps_train/chosen": -67.94270324707031, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -238.07571411132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1973952054977417, "rewards_train/margins": 11.01833164691925, "rewards_train/rejected": -11.215726852416992, "step": 1150 }, { "epoch": 0.57, "learning_rate": 9.484463729776526e-07, "loss": 0.0008, "step": 1151 }, { "epoch": 0.57, "logps_train/chosen": -62.804954528808594, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -233.15493774414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12248289585113525, "rewards_train/margins": 10.869421601295471, "rewards_train/rejected": -10.746938705444336, "step": 1151 }, { "epoch": 0.57, "learning_rate": 9.483262858949452e-07, "loss": 0.0004, "step": 1152 }, { "epoch": 0.57, "logps_train/chosen": -67.094970703125, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -135.0, "logps_train/rejected": -251.9759979248047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0632573813199997, "rewards_train/margins": 11.760663196444511, "rewards_train/rejected": -11.697405815124512, "step": 1152 }, { "epoch": 0.57, "learning_rate": 9.482060667322889e-07, "loss": 0.0002, "step": 1153 }, { "epoch": 0.57, "logps_train/chosen": -63.22105407714844, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -236.40347290039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2807753384113312, "rewards_train/margins": 11.198954850435257, "rewards_train/rejected": -10.918179512023926, "step": 1153 }, { "epoch": 0.57, "learning_rate": 9.48085715525101e-07, "loss": 0.0002, "step": 1154 }, { "epoch": 0.57, "logps_train/chosen": -61.594112396240234, "logps_train/ref_chosen": -61.46875, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -228.4780731201172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.011047057807445526, "rewards_train/margins": 10.73319561034441, "rewards_train/rejected": -10.744242668151855, "step": 1154 }, { "epoch": 0.57, "learning_rate": 9.479652323088376e-07, "loss": 0.0007, "step": 1155 }, { "epoch": 0.57, "logps_train/chosen": -62.013702392578125, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -232.75445556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.040816985070705414, "rewards_train/margins": 10.969485960900784, "rewards_train/rejected": -10.928668975830078, "step": 1155 }, { "epoch": 0.57, "learning_rate": 9.478446171189941e-07, "loss": 0.0007, "step": 1156 }, { "epoch": 0.57, "logps_train/chosen": -67.87628173828125, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -134.375, "logps_train/rejected": -253.66702270507812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16116292774677277, "rewards_train/margins": 11.76886884868145, "rewards_train/rejected": -11.930031776428223, "step": 1156 }, { "epoch": 0.57, "learning_rate": 9.477238699911044e-07, "loss": 0.0004, "step": 1157 }, { "epoch": 0.57, "logps_train/chosen": -61.56583786010742, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -225.97068786621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21319155395030975, "rewards_train/margins": 10.68076853454113, "rewards_train/rejected": -10.46757698059082, "step": 1157 }, { "epoch": 0.57, "learning_rate": 9.476029909607415e-07, "loss": 0.0007, "step": 1158 }, { "epoch": 0.57, "logps_train/chosen": -62.860572814941406, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -233.3427276611328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.006130363792181015, "rewards_train/margins": 11.272922661155462, "rewards_train/rejected": -11.266792297363281, "step": 1158 }, { "epoch": 0.57, "learning_rate": 9.474819800635172e-07, "loss": 0.0004, "step": 1159 }, { "epoch": 0.57, "logps_train/chosen": -65.81631469726562, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -236.71144104003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.04103102535009384, "rewards_train/margins": 10.999059535562992, "rewards_train/rejected": -11.040090560913086, "step": 1159 }, { "epoch": 0.57, "learning_rate": 9.473608373350818e-07, "loss": 0.0004, "step": 1160 }, { "epoch": 0.57, "logps_train/chosen": -68.14588928222656, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -242.66836547851562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12484294176101685, "rewards_train/margins": 11.208986937999725, "rewards_train/rejected": -11.333829879760742, "step": 1160 }, { "epoch": 0.57, "learning_rate": 9.472395628111254e-07, "loss": 0.0004, "step": 1161 }, { "epoch": 0.57, "logps_train/chosen": -62.54010009765625, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -233.25518798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12133187055587769, "rewards_train/margins": 10.805980384349823, "rewards_train/rejected": -10.684648513793945, "step": 1161 }, { "epoch": 0.57, "learning_rate": 9.471181565273757e-07, "loss": 0.0002, "step": 1162 }, { "epoch": 0.57, "logps_train/chosen": -63.55633544921875, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -239.8771209716797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1045714020729065, "rewards_train/margins": 11.334471762180328, "rewards_train/rejected": -11.229900360107422, "step": 1162 }, { "epoch": 0.57, "learning_rate": 9.469966185196001e-07, "loss": 0.0006, "step": 1163 }, { "epoch": 0.57, "logps_train/chosen": -64.71221923828125, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -246.4224853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.18893426656723022, "rewards_train/margins": 11.64065545797348, "rewards_train/rejected": -11.45172119140625, "step": 1163 }, { "epoch": 0.57, "learning_rate": 9.468749488236045e-07, "loss": 0.0001, "step": 1164 }, { "epoch": 0.57, "logps_train/chosen": -63.511749267578125, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -230.67404174804688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.005325447767972946, "rewards_train/margins": 10.831561911851168, "rewards_train/rejected": -10.83688735961914, "step": 1164 }, { "epoch": 0.57, "learning_rate": 9.467531474752335e-07, "loss": 0.0004, "step": 1165 }, { "epoch": 0.57, "logps_train/chosen": -66.62297058105469, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -236.34213256835938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.052775539457798004, "rewards_train/margins": 10.987786136567593, "rewards_train/rejected": -11.04056167602539, "step": 1165 }, { "epoch": 0.57, "learning_rate": 9.466312145103707e-07, "loss": 0.0008, "step": 1166 }, { "epoch": 0.57, "logps_train/chosen": -58.873504638671875, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -225.30807495117188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26997387409210205, "rewards_train/margins": 10.875096440315247, "rewards_train/rejected": -10.605122566223145, "step": 1166 }, { "epoch": 0.57, "learning_rate": 9.465091499649384e-07, "loss": 0.0002, "step": 1167 }, { "epoch": 0.57, "logps_train/chosen": -59.875091552734375, "logps_train/ref_chosen": -60.625, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -233.43399047851562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07381894439458847, "rewards_train/margins": 11.158232472836971, "rewards_train/rejected": -11.084413528442383, "step": 1167 }, { "epoch": 0.58, "learning_rate": 9.463869538748974e-07, "loss": 0.0002, "step": 1168 }, { "epoch": 0.58, "logps_train/chosen": -68.12120056152344, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -244.044189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19127047061920166, "rewards_train/margins": 11.427796721458435, "rewards_train/rejected": -11.619067192077637, "step": 1168 }, { "epoch": 0.58, "learning_rate": 9.462646262762479e-07, "loss": 0.0006, "step": 1169 }, { "epoch": 0.58, "logps_train/chosen": -63.37330627441406, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -238.82083129882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10671547055244446, "rewards_train/margins": 11.345874518156052, "rewards_train/rejected": -11.452589988708496, "step": 1169 }, { "epoch": 0.58, "learning_rate": 9.46142167205028e-07, "loss": 0.0001, "step": 1170 }, { "epoch": 0.58, "logps_train/chosen": -63.38459777832031, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -234.1964874267578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10335269570350647, "rewards_train/margins": 11.000280231237411, "rewards_train/rejected": -11.103632926940918, "step": 1170 }, { "epoch": 0.58, "learning_rate": 9.460195766973153e-07, "loss": 0.0002, "step": 1171 }, { "epoch": 0.58, "logps_train/chosen": -63.62976837158203, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -231.87466430664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.037244271486997604, "rewards_train/margins": 10.895829726010561, "rewards_train/rejected": -10.933073997497559, "step": 1171 }, { "epoch": 0.58, "learning_rate": 9.458968547892257e-07, "loss": 0.0004, "step": 1172 }, { "epoch": 0.58, "logps_train/chosen": -61.94892120361328, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -230.64505004882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16362866759300232, "rewards_train/margins": 10.773445218801498, "rewards_train/rejected": -10.609816551208496, "step": 1172 }, { "epoch": 0.58, "learning_rate": 9.45774001516914e-07, "loss": 0.0003, "step": 1173 }, { "epoch": 0.58, "logps_train/chosen": -62.69709777832031, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -231.8984832763672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10514356195926666, "rewards_train/margins": 10.927852645516396, "rewards_train/rejected": -10.822709083557129, "step": 1173 }, { "epoch": 0.58, "learning_rate": 9.456510169165733e-07, "loss": 0.0004, "step": 1174 }, { "epoch": 0.58, "logps_train/chosen": -67.07559204101562, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -241.38011169433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3589995503425598, "rewards_train/margins": 11.269246757030487, "rewards_train/rejected": -11.628246307373047, "step": 1174 }, { "epoch": 0.58, "learning_rate": 9.455279010244359e-07, "loss": 0.0009, "step": 1175 }, { "epoch": 0.58, "logps_train/chosen": -68.62821960449219, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -235.45118713378906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3724411129951477, "rewards_train/margins": 10.921164691448212, "rewards_train/rejected": -11.29360580444336, "step": 1175 }, { "epoch": 0.58, "learning_rate": 9.454046538767725e-07, "loss": 0.0004, "step": 1176 }, { "epoch": 0.58, "logps_train/chosen": -65.6810073852539, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -237.1503448486328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0877784863114357, "rewards_train/margins": 11.100987993180752, "rewards_train/rejected": -11.188766479492188, "step": 1176 }, { "epoch": 0.58, "learning_rate": 9.452812755098926e-07, "loss": 0.0009, "step": 1177 }, { "epoch": 0.58, "logps_train/chosen": -58.820892333984375, "logps_train/ref_chosen": -61.15625, "logps_train/ref_rejected": -119.6875, "logps_train/rejected": -225.66558837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23473221063613892, "rewards_train/margins": 10.832150042057037, "rewards_train/rejected": -10.597417831420898, "step": 1177 }, { "epoch": 0.58, "learning_rate": 9.451577659601443e-07, "loss": 0.0008, "step": 1178 }, { "epoch": 0.58, "logps_train/chosen": -64.03740692138672, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -234.3478240966797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13142631947994232, "rewards_train/margins": 11.069566264748573, "rewards_train/rejected": -11.200992584228516, "step": 1178 }, { "epoch": 0.58, "learning_rate": 9.450341252639143e-07, "loss": 0.0015, "step": 1179 }, { "epoch": 0.58, "logps_train/chosen": -65.57785034179688, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -240.49029541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.047971174120903015, "rewards_train/margins": 11.147935464978218, "rewards_train/rejected": -11.195906639099121, "step": 1179 }, { "epoch": 0.58, "learning_rate": 9.44910353457628e-07, "loss": 0.0002, "step": 1180 }, { "epoch": 0.58, "logps_train/chosen": -60.73167419433594, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -117.25, "logps_train/rejected": -224.9261474609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08996710926294327, "rewards_train/margins": 10.856948234140873, "rewards_train/rejected": -10.76698112487793, "step": 1180 }, { "epoch": 0.58, "learning_rate": 9.447864505777494e-07, "loss": 0.0005, "step": 1181 }, { "epoch": 0.58, "logps_train/chosen": -62.86444854736328, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -227.5688018798828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06505805253982544, "rewards_train/margins": 10.627906501293182, "rewards_train/rejected": -10.692964553833008, "step": 1181 }, { "epoch": 0.58, "learning_rate": 9.446624166607813e-07, "loss": 0.0009, "step": 1182 }, { "epoch": 0.58, "logps_train/chosen": -64.22395324707031, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -234.77203369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0625162273645401, "rewards_train/margins": 11.060130134224892, "rewards_train/rejected": -10.997613906860352, "step": 1182 }, { "epoch": 0.58, "learning_rate": 9.445382517432647e-07, "loss": 0.0006, "step": 1183 }, { "epoch": 0.58, "logps_train/chosen": -64.84681701660156, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -233.5303955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1701548546552658, "rewards_train/margins": 10.87692804634571, "rewards_train/rejected": -11.047082901000977, "step": 1183 }, { "epoch": 0.58, "learning_rate": 9.444139558617794e-07, "loss": 0.0003, "step": 1184 }, { "epoch": 0.58, "logps_train/chosen": -60.472084045410156, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -232.44134521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26531627774238586, "rewards_train/margins": 11.248025208711624, "rewards_train/rejected": -10.982708930969238, "step": 1184 }, { "epoch": 0.58, "learning_rate": 9.44289529052944e-07, "loss": 0.0005, "step": 1185 }, { "epoch": 0.58, "logps_train/chosen": -65.72013854980469, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -250.20840454101562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.00707206130027771, "rewards_train/margins": 11.997751623392105, "rewards_train/rejected": -12.004823684692383, "step": 1185 }, { "epoch": 0.58, "learning_rate": 9.441649713534155e-07, "loss": 0.0002, "step": 1186 }, { "epoch": 0.58, "logps_train/chosen": -65.80450439453125, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -246.7115478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08038970082998276, "rewards_train/margins": 11.77068492025137, "rewards_train/rejected": -11.690295219421387, "step": 1186 }, { "epoch": 0.58, "learning_rate": 9.440402827998891e-07, "loss": 0.0002, "step": 1187 }, { "epoch": 0.58, "logps_train/chosen": -61.71145248413086, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -231.31703186035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.243063822388649, "rewards_train/margins": 11.052597895264626, "rewards_train/rejected": -10.809534072875977, "step": 1187 }, { "epoch": 0.58, "learning_rate": 9.439154634290992e-07, "loss": 0.0001, "step": 1188 }, { "epoch": 0.58, "logps_train/chosen": -65.5792236328125, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -244.56466674804688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1278201937675476, "rewards_train/margins": 11.742489993572235, "rewards_train/rejected": -11.614669799804688, "step": 1188 }, { "epoch": 0.59, "learning_rate": 9.437905132778183e-07, "loss": 0.0002, "step": 1189 }, { "epoch": 0.59, "logps_train/chosen": -65.07750701904297, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -240.00933837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05804365873336792, "rewards_train/margins": 11.374044239521027, "rewards_train/rejected": -11.432087898254395, "step": 1189 }, { "epoch": 0.59, "learning_rate": 9.436654323828577e-07, "loss": 0.0003, "step": 1190 }, { "epoch": 0.59, "logps_train/chosen": -64.28108978271484, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -234.60833740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07991566509008408, "rewards_train/margins": 11.119882918894291, "rewards_train/rejected": -11.199798583984375, "step": 1190 }, { "epoch": 0.59, "learning_rate": 9.435402207810669e-07, "loss": 0.0002, "step": 1191 }, { "epoch": 0.59, "logps_train/chosen": -61.31407928466797, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -244.544677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11944638192653656, "rewards_train/margins": 11.605846986174583, "rewards_train/rejected": -11.486400604248047, "step": 1191 }, { "epoch": 0.59, "learning_rate": 9.434148785093341e-07, "loss": 0.0001, "step": 1192 }, { "epoch": 0.59, "logps_train/chosen": -66.0867691040039, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -242.61663818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1638527512550354, "rewards_train/margins": 11.438436448574066, "rewards_train/rejected": -11.602289199829102, "step": 1192 }, { "epoch": 0.59, "learning_rate": 9.432894056045861e-07, "loss": 0.0004, "step": 1193 }, { "epoch": 0.59, "logps_train/chosen": -63.59541702270508, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -241.1126708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06091723218560219, "rewards_train/margins": 11.597086284309626, "rewards_train/rejected": -11.536169052124023, "step": 1193 }, { "epoch": 0.59, "learning_rate": 9.43163802103788e-07, "loss": 0.0008, "step": 1194 }, { "epoch": 0.59, "logps_train/chosen": -63.235252380371094, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -229.8876190185547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07237809151411057, "rewards_train/margins": 10.827663488686085, "rewards_train/rejected": -10.900041580200195, "step": 1194 }, { "epoch": 0.59, "learning_rate": 9.430380680439434e-07, "loss": 0.0003, "step": 1195 }, { "epoch": 0.59, "logps_train/chosen": -65.49363708496094, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -243.74346923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.03447112813591957, "rewards_train/margins": 11.484407808631659, "rewards_train/rejected": -11.518878936767578, "step": 1195 }, { "epoch": 0.59, "learning_rate": 9.429122034620944e-07, "loss": 0.0002, "step": 1196 }, { "epoch": 0.59, "logps_train/chosen": -63.39272689819336, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -242.55813598632812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05369602143764496, "rewards_train/margins": 11.734362944960594, "rewards_train/rejected": -11.68066692352295, "step": 1196 }, { "epoch": 0.59, "learning_rate": 9.427862083953218e-07, "loss": 0.0003, "step": 1197 }, { "epoch": 0.59, "logps_train/chosen": -64.7717056274414, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -236.5234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10827375203371048, "rewards_train/margins": 11.00461744517088, "rewards_train/rejected": -11.11289119720459, "step": 1197 }, { "epoch": 0.59, "learning_rate": 9.426600828807441e-07, "loss": 0.0003, "step": 1198 }, { "epoch": 0.59, "logps_train/chosen": -64.09197998046875, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -235.75259399414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1737002730369568, "rewards_train/margins": 11.052047789096832, "rewards_train/rejected": -11.225748062133789, "step": 1198 }, { "epoch": 0.59, "learning_rate": 9.425338269555192e-07, "loss": 0.0003, "step": 1199 }, { "epoch": 0.59, "logps_train/chosen": -69.30653381347656, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -243.41920471191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3096085786819458, "rewards_train/margins": 11.208874583244324, "rewards_train/rejected": -11.51848316192627, "step": 1199 }, { "epoch": 0.59, "learning_rate": 9.424074406568428e-07, "loss": 0.0002, "step": 1200 }, { "epoch": 0.59, "logps_train/chosen": -60.78847122192383, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -234.4656524658203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0710553228855133, "rewards_train/margins": 10.971234232187271, "rewards_train/rejected": -10.900178909301758, "step": 1200 }, { "epoch": 0.59, "learning_rate": 9.422809240219491e-07, "loss": 0.0003, "step": 1201 }, { "epoch": 0.59, "logps_train/chosen": -64.26058959960938, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -240.70413208007812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02257322520017624, "rewards_train/margins": 11.599432699382305, "rewards_train/rejected": -11.576859474182129, "step": 1201 }, { "epoch": 0.59, "learning_rate": 9.421542770881107e-07, "loss": 0.0001, "step": 1202 }, { "epoch": 0.59, "logps_train/chosen": -62.6238899230957, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -238.39852905273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0013290345668792725, "rewards_train/margins": 11.404539495706558, "rewards_train/rejected": -11.405868530273438, "step": 1202 }, { "epoch": 0.59, "learning_rate": 9.420274998926387e-07, "loss": 0.0004, "step": 1203 }, { "epoch": 0.59, "logps_train/chosen": -65.89344787597656, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -243.47328186035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07012812793254852, "rewards_train/margins": 11.753101035952568, "rewards_train/rejected": -11.68297290802002, "step": 1203 }, { "epoch": 0.59, "learning_rate": 9.419005924728827e-07, "loss": 0.0005, "step": 1204 }, { "epoch": 0.59, "logps_train/chosen": -64.99614715576172, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -242.308837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.058550525456666946, "rewards_train/margins": 11.50939304754138, "rewards_train/rejected": -11.567943572998047, "step": 1204 }, { "epoch": 0.59, "learning_rate": 9.417735548662301e-07, "loss": 0.0002, "step": 1205 }, { "epoch": 0.59, "logps_train/chosen": -65.07421875, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -236.43637084960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1703614890575409, "rewards_train/margins": 11.005502730607986, "rewards_train/rejected": -11.175864219665527, "step": 1205 }, { "epoch": 0.59, "learning_rate": 9.416463871101073e-07, "loss": 0.0003, "step": 1206 }, { "epoch": 0.59, "logps_train/chosen": -65.15362548828125, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -241.45684814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.017229992896318436, "rewards_train/margins": 11.666089970618486, "rewards_train/rejected": -11.648859977722168, "step": 1206 }, { "epoch": 0.59, "learning_rate": 9.415190892419789e-07, "loss": 0.0004, "step": 1207 }, { "epoch": 0.59, "logps_train/chosen": -65.35975646972656, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -245.69912719726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07699161767959595, "rewards_train/margins": 11.604834973812103, "rewards_train/rejected": -11.6818265914917, "step": 1207 }, { "epoch": 0.59, "learning_rate": 9.413916612993473e-07, "loss": 0.0002, "step": 1208 }, { "epoch": 0.59, "logps_train/chosen": -62.42444610595703, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -234.49244689941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06549126654863358, "rewards_train/margins": 11.360852651298046, "rewards_train/rejected": -11.42634391784668, "step": 1208 }, { "epoch": 0.6, "learning_rate": 9.412641033197541e-07, "loss": 0.0003, "step": 1209 }, { "epoch": 0.6, "logps_train/chosen": -62.55946350097656, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -241.91595458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2523546516895294, "rewards_train/margins": 11.754154235124588, "rewards_train/rejected": -11.501799583435059, "step": 1209 }, { "epoch": 0.6, "learning_rate": 9.411364153407783e-07, "loss": 0.0001, "step": 1210 }, { "epoch": 0.6, "logps_train/chosen": -63.54185485839844, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -233.67962646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08736368268728256, "rewards_train/margins": 10.970003642141819, "rewards_train/rejected": -11.057367324829102, "step": 1210 }, { "epoch": 0.6, "learning_rate": 9.410085974000381e-07, "loss": 0.0009, "step": 1211 }, { "epoch": 0.6, "logps_train/chosen": -62.44192123413086, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -237.87538146972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08190661668777466, "rewards_train/margins": 11.25904494524002, "rewards_train/rejected": -11.177138328552246, "step": 1211 }, { "epoch": 0.6, "learning_rate": 9.408806495351892e-07, "loss": 0.0003, "step": 1212 }, { "epoch": 0.6, "logps_train/chosen": -67.71760559082031, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -244.5999755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06614544987678528, "rewards_train/margins": 11.455522030591965, "rewards_train/rejected": -11.52166748046875, "step": 1212 }, { "epoch": 0.6, "learning_rate": 9.40752571783926e-07, "loss": 0.0011, "step": 1213 }, { "epoch": 0.6, "logps_train/chosen": -61.54656982421875, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -243.9331817626953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.306793212890625, "rewards_train/margins": 11.64293384552002, "rewards_train/rejected": -11.336140632629395, "step": 1213 }, { "epoch": 0.6, "learning_rate": 9.40624364183981e-07, "loss": 0.0012, "step": 1214 }, { "epoch": 0.6, "logps_train/chosen": -64.25283813476562, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -247.09713745117188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15455079078674316, "rewards_train/margins": 12.01660943031311, "rewards_train/rejected": -11.862058639526367, "step": 1214 }, { "epoch": 0.6, "learning_rate": 9.40496026773125e-07, "loss": 0.0001, "step": 1215 }, { "epoch": 0.6, "logps_train/chosen": -61.90118408203125, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -232.57318115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21168792247772217, "rewards_train/margins": 11.073107600212097, "rewards_train/rejected": -10.861419677734375, "step": 1215 }, { "epoch": 0.6, "learning_rate": 9.403675595891674e-07, "loss": 0.0006, "step": 1216 }, { "epoch": 0.6, "logps_train/chosen": -65.97721099853516, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -244.35357666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15491551160812378, "rewards_train/margins": 11.834412276744843, "rewards_train/rejected": -11.679496765136719, "step": 1216 }, { "epoch": 0.6, "learning_rate": 9.40238962669955e-07, "loss": 0.0002, "step": 1217 }, { "epoch": 0.6, "logps_train/chosen": -63.83898162841797, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -233.97433471679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1867791712284088, "rewards_train/margins": 11.048105090856552, "rewards_train/rejected": -11.234884262084961, "step": 1217 }, { "epoch": 0.6, "learning_rate": 9.401102360533737e-07, "loss": 0.0007, "step": 1218 }, { "epoch": 0.6, "logps_train/chosen": -61.852481842041016, "logps_train/ref_chosen": -60.1875, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -230.25411987304688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.165570467710495, "rewards_train/margins": 10.74997690320015, "rewards_train/rejected": -10.915547370910645, "step": 1218 }, { "epoch": 0.6, "learning_rate": 9.39981379777347e-07, "loss": 0.0006, "step": 1219 }, { "epoch": 0.6, "logps_train/chosen": -62.42755889892578, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -230.785888671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05343551188707352, "rewards_train/margins": 10.85843963176012, "rewards_train/rejected": -10.805004119873047, "step": 1219 }, { "epoch": 0.6, "learning_rate": 9.39852393879837e-07, "loss": 0.0005, "step": 1220 }, { "epoch": 0.6, "logps_train/chosen": -65.16590881347656, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -229.0740966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14774295687675476, "rewards_train/margins": 10.674219399690628, "rewards_train/rejected": -10.821962356567383, "step": 1220 }, { "epoch": 0.6, "learning_rate": 9.397232783988438e-07, "loss": 0.0008, "step": 1221 }, { "epoch": 0.6, "logps_train/chosen": -65.11620330810547, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -246.3607177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.046117622405290604, "rewards_train/margins": 11.642981689423323, "rewards_train/rejected": -11.689099311828613, "step": 1221 }, { "epoch": 0.6, "learning_rate": 9.395940333724055e-07, "loss": 0.0006, "step": 1222 }, { "epoch": 0.6, "logps_train/chosen": -63.48719787597656, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -241.11892700195312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2279893457889557, "rewards_train/margins": 11.620838314294815, "rewards_train/rejected": -11.39284896850586, "step": 1222 }, { "epoch": 0.6, "learning_rate": 9.394646588385988e-07, "loss": 0.0003, "step": 1223 }, { "epoch": 0.6, "logps_train/chosen": -63.587890625, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -238.74581909179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12958991527557373, "rewards_train/margins": 11.443918108940125, "rewards_train/rejected": -11.31432819366455, "step": 1223 }, { "epoch": 0.6, "learning_rate": 9.393351548355382e-07, "loss": 0.0005, "step": 1224 }, { "epoch": 0.6, "logps_train/chosen": -68.48545837402344, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -250.13987731933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12515684962272644, "rewards_train/margins": 11.753480464220047, "rewards_train/rejected": -11.878637313842773, "step": 1224 }, { "epoch": 0.6, "learning_rate": 9.392055214013765e-07, "loss": 0.0007, "step": 1225 }, { "epoch": 0.6, "logps_train/chosen": -66.7669448852539, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -133.625, "logps_train/rejected": -252.0027618408203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08551305532455444, "rewards_train/margins": 11.918845117092133, "rewards_train/rejected": -11.833332061767578, "step": 1225 }, { "epoch": 0.6, "learning_rate": 9.390757585743043e-07, "loss": 0.0004, "step": 1226 }, { "epoch": 0.6, "logps_train/chosen": -64.47602844238281, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -248.03982543945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11440899968147278, "rewards_train/margins": 12.051547557115555, "rewards_train/rejected": -11.937138557434082, "step": 1226 }, { "epoch": 0.6, "learning_rate": 9.389458663925511e-07, "loss": 0.0002, "step": 1227 }, { "epoch": 0.6, "logps_train/chosen": -66.48463439941406, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -246.75718688964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02614634484052658, "rewards_train/margins": 11.77647440880537, "rewards_train/rejected": -11.750328063964844, "step": 1227 }, { "epoch": 0.6, "learning_rate": 9.388158448943837e-07, "loss": 0.0002, "step": 1228 }, { "epoch": 0.6, "logps_train/chosen": -61.386383056640625, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -238.81578063964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.18406683206558228, "rewards_train/margins": 11.677461683750153, "rewards_train/rejected": -11.49339485168457, "step": 1228 }, { "epoch": 0.61, "learning_rate": 9.386856941181074e-07, "loss": 0.0003, "step": 1229 }, { "epoch": 0.61, "logps_train/chosen": -63.218448638916016, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -233.85211181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12033125758171082, "rewards_train/margins": 11.04241994023323, "rewards_train/rejected": -11.162751197814941, "step": 1229 }, { "epoch": 0.61, "learning_rate": 9.385554141020653e-07, "loss": 0.0017, "step": 1230 }, { "epoch": 0.61, "logps_train/chosen": -64.92808532714844, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -244.13720703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09549474716186523, "rewards_train/margins": 11.880091190338135, "rewards_train/rejected": -11.9755859375, "step": 1230 }, { "epoch": 0.61, "learning_rate": 9.384250048846391e-07, "loss": 0.0006, "step": 1231 }, { "epoch": 0.61, "logps_train/chosen": -65.72036743164062, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -238.32676696777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16607993841171265, "rewards_train/margins": 11.324702799320221, "rewards_train/rejected": -11.490782737731934, "step": 1231 }, { "epoch": 0.61, "learning_rate": 9.38294466504248e-07, "loss": 0.0005, "step": 1232 }, { "epoch": 0.61, "logps_train/chosen": -65.02297973632812, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -237.5404052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13789349794387817, "rewards_train/margins": 11.407943904399872, "rewards_train/rejected": -11.54583740234375, "step": 1232 }, { "epoch": 0.61, "learning_rate": 9.381637989993496e-07, "loss": 0.0002, "step": 1233 }, { "epoch": 0.61, "logps_train/chosen": -63.4007682800293, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -239.40090942382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.003113657236099243, "rewards_train/margins": 11.382338613271713, "rewards_train/rejected": -11.385452270507812, "step": 1233 }, { "epoch": 0.61, "learning_rate": 9.380330024084393e-07, "loss": 0.001, "step": 1234 }, { "epoch": 0.61, "logps_train/chosen": -64.56686401367188, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -244.01364135742188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11444957554340363, "rewards_train/margins": 11.699024125933647, "rewards_train/rejected": -11.81347370147705, "step": 1234 }, { "epoch": 0.61, "learning_rate": 9.379020767700508e-07, "loss": 0.0002, "step": 1235 }, { "epoch": 0.61, "logps_train/chosen": -63.61906433105469, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -253.088623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21167778968811035, "rewards_train/margins": 12.347299814224243, "rewards_train/rejected": -12.135622024536133, "step": 1235 }, { "epoch": 0.61, "learning_rate": 9.377710221227557e-07, "loss": 0.0006, "step": 1236 }, { "epoch": 0.61, "logps_train/chosen": -67.34719848632812, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -249.72320556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08728398382663727, "rewards_train/margins": 11.953105077147484, "rewards_train/rejected": -12.040389060974121, "step": 1236 }, { "epoch": 0.61, "learning_rate": 9.376398385051635e-07, "loss": 0.0002, "step": 1237 }, { "epoch": 0.61, "logps_train/chosen": -67.09397888183594, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -245.18222045898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05275698006153107, "rewards_train/margins": 11.473472878336906, "rewards_train/rejected": -11.526229858398438, "step": 1237 }, { "epoch": 0.61, "learning_rate": 9.375085259559217e-07, "loss": 0.0005, "step": 1238 }, { "epoch": 0.61, "logps_train/chosen": -66.4852294921875, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -241.14352416992188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10238044708967209, "rewards_train/margins": 11.556454010307789, "rewards_train/rejected": -11.658834457397461, "step": 1238 }, { "epoch": 0.61, "learning_rate": 9.373770845137161e-07, "loss": 0.0001, "step": 1239 }, { "epoch": 0.61, "logps_train/chosen": -63.446571350097656, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -244.45196533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07028444111347198, "rewards_train/margins": 11.837795808911324, "rewards_train/rejected": -11.767511367797852, "step": 1239 }, { "epoch": 0.61, "learning_rate": 9.372455142172699e-07, "loss": 0.0004, "step": 1240 }, { "epoch": 0.61, "logps_train/chosen": -67.47283935546875, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -249.46343994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16171252727508545, "rewards_train/margins": 11.911486744880676, "rewards_train/rejected": -12.073199272155762, "step": 1240 }, { "epoch": 0.61, "learning_rate": 9.371138151053448e-07, "loss": 0.0002, "step": 1241 }, { "epoch": 0.61, "logps_train/chosen": -63.68867492675781, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -240.2176513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.13684512674808502, "rewards_train/margins": 11.459293857216835, "rewards_train/rejected": -11.32244873046875, "step": 1241 }, { "epoch": 0.61, "learning_rate": 9.369819872167404e-07, "loss": 0.0003, "step": 1242 }, { "epoch": 0.61, "logps_train/chosen": -62.163787841796875, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -237.78729248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1230258196592331, "rewards_train/margins": 11.446088716387749, "rewards_train/rejected": -11.323062896728516, "step": 1242 }, { "epoch": 0.61, "learning_rate": 9.368500305902937e-07, "loss": 0.0002, "step": 1243 }, { "epoch": 0.61, "logps_train/chosen": -63.206790924072266, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -242.34542846679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08815861493349075, "rewards_train/margins": 11.836469657719135, "rewards_train/rejected": -11.748311042785645, "step": 1243 }, { "epoch": 0.61, "learning_rate": 9.367179452648804e-07, "loss": 0.0004, "step": 1244 }, { "epoch": 0.61, "logps_train/chosen": -61.74025344848633, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -235.587646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24369916319847107, "rewards_train/margins": 11.34274873137474, "rewards_train/rejected": -11.09904956817627, "step": 1244 }, { "epoch": 0.61, "learning_rate": 9.365857312794134e-07, "loss": 0.0005, "step": 1245 }, { "epoch": 0.61, "logps_train/chosen": -67.1909408569336, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -242.91860961914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.267434298992157, "rewards_train/margins": 11.573401272296906, "rewards_train/rejected": -11.840835571289062, "step": 1245 }, { "epoch": 0.61, "learning_rate": 9.364533886728442e-07, "loss": 0.0002, "step": 1246 }, { "epoch": 0.61, "logps_train/chosen": -62.79624557495117, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -231.86509704589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16151303052902222, "rewards_train/margins": 11.059791266918182, "rewards_train/rejected": -10.89827823638916, "step": 1246 }, { "epoch": 0.61, "learning_rate": 9.363209174841615e-07, "loss": 0.0005, "step": 1247 }, { "epoch": 0.61, "logps_train/chosen": -57.47859191894531, "logps_train/ref_chosen": -60.21875, "logps_train/ref_rejected": -114.875, "logps_train/rejected": -220.5344696044922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27323484420776367, "rewards_train/margins": 10.83830213546753, "rewards_train/rejected": -10.565067291259766, "step": 1247 }, { "epoch": 0.61, "learning_rate": 9.361883177523923e-07, "loss": 0.0006, "step": 1248 }, { "epoch": 0.61, "logps_train/chosen": -61.23674392700195, "logps_train/ref_chosen": -60.65625, "logps_train/ref_rejected": -117.8125, "logps_train/rejected": -225.6571807861328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05900151655077934, "rewards_train/margins": 10.728787828236818, "rewards_train/rejected": -10.787789344787598, "step": 1248 }, { "epoch": 0.61, "learning_rate": 9.360555895166013e-07, "loss": 0.0016, "step": 1249 }, { "epoch": 0.61, "logps_train/chosen": -67.11155700683594, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -245.89691162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.006666660308837891, "rewards_train/margins": 11.764716625213623, "rewards_train/rejected": -11.758049964904785, "step": 1249 }, { "epoch": 0.62, "learning_rate": 9.359227328158915e-07, "loss": 0.0001, "step": 1250 }, { "epoch": 0.62, "logps_train/chosen": -65.97854614257812, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -244.3297119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.153658926486969, "rewards_train/margins": 11.908113539218903, "rewards_train/rejected": -11.754454612731934, "step": 1250 }, { "epoch": 0.62, "learning_rate": 9.357897476894027e-07, "loss": 0.0004, "step": 1251 }, { "epoch": 0.62, "logps_train/chosen": -64.84465026855469, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -245.16000366210938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.050333984196186066, "rewards_train/margins": 11.677484504878521, "rewards_train/rejected": -11.727818489074707, "step": 1251 }, { "epoch": 0.62, "learning_rate": 9.356566341763141e-07, "loss": 0.0001, "step": 1252 }, { "epoch": 0.62, "logps_train/chosen": -64.57455444335938, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -242.7904510498047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.003256823867559433, "rewards_train/margins": 11.6132888533175, "rewards_train/rejected": -11.616545677185059, "step": 1252 }, { "epoch": 0.62, "learning_rate": 9.35523392315841e-07, "loss": 0.0005, "step": 1253 }, { "epoch": 0.62, "logps_train/chosen": -64.84193420410156, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -241.4191131591797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.011606909334659576, "rewards_train/margins": 11.605081297457218, "rewards_train/rejected": -11.593474388122559, "step": 1253 }, { "epoch": 0.62, "learning_rate": 9.353900221472379e-07, "loss": 0.0001, "step": 1254 }, { "epoch": 0.62, "logps_train/chosen": -62.27939987182617, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -240.96029663085938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12611256539821625, "rewards_train/margins": 11.5476785749197, "rewards_train/rejected": -11.421566009521484, "step": 1254 }, { "epoch": 0.62, "learning_rate": 9.352565237097964e-07, "loss": 0.0003, "step": 1255 }, { "epoch": 0.62, "logps_train/chosen": -65.81370544433594, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -241.30078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18102821707725525, "rewards_train/margins": 11.366140514612198, "rewards_train/rejected": -11.547168731689453, "step": 1255 }, { "epoch": 0.62, "learning_rate": 9.351228970428459e-07, "loss": 0.0008, "step": 1256 }, { "epoch": 0.62, "logps_train/chosen": -66.95368194580078, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -240.580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06826863437891006, "rewards_train/margins": 11.4379807934165, "rewards_train/rejected": -11.50624942779541, "step": 1256 }, { "epoch": 0.62, "learning_rate": 9.349891421857538e-07, "loss": 0.0005, "step": 1257 }, { "epoch": 0.62, "logps_train/chosen": -65.85307312011719, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -242.4070281982422, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.021479249000549316, "rewards_train/margins": 11.638793587684631, "rewards_train/rejected": -11.617314338684082, "step": 1257 }, { "epoch": 0.62, "learning_rate": 9.348552591779253e-07, "loss": 0.0006, "step": 1258 }, { "epoch": 0.62, "logps_train/chosen": -62.60426330566406, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -243.4640655517578, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19506672024726868, "rewards_train/margins": 12.06525257229805, "rewards_train/rejected": -11.870185852050781, "step": 1258 }, { "epoch": 0.62, "learning_rate": 9.347212480588032e-07, "loss": 0.0005, "step": 1259 }, { "epoch": 0.62, "logps_train/chosen": -65.13607788085938, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -243.64219665527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07913495600223541, "rewards_train/margins": 11.457741722464561, "rewards_train/rejected": -11.536876678466797, "step": 1259 }, { "epoch": 0.62, "learning_rate": 9.345871088678679e-07, "loss": 0.0001, "step": 1260 }, { "epoch": 0.62, "logps_train/chosen": -70.12760925292969, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -250.53057861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28966522216796875, "rewards_train/margins": 11.736927032470703, "rewards_train/rejected": -12.026592254638672, "step": 1260 }, { "epoch": 0.62, "learning_rate": 9.344528416446377e-07, "loss": 0.0003, "step": 1261 }, { "epoch": 0.62, "logps_train/chosen": -63.218788146972656, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -246.40267944335938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3738003075122833, "rewards_train/margins": 12.106696158647537, "rewards_train/rejected": -11.732895851135254, "step": 1261 }, { "epoch": 0.62, "learning_rate": 9.343184464286691e-07, "loss": 0.0001, "step": 1262 }, { "epoch": 0.62, "logps_train/chosen": -63.765892028808594, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -238.7200927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.056569959968328476, "rewards_train/margins": 11.475595567375422, "rewards_train/rejected": -11.53216552734375, "step": 1262 }, { "epoch": 0.62, "learning_rate": 9.341839232595553e-07, "loss": 0.0002, "step": 1263 }, { "epoch": 0.62, "logps_train/chosen": -64.42474365234375, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -237.77786254882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07772810012102127, "rewards_train/margins": 11.207967929542065, "rewards_train/rejected": -11.285696029663086, "step": 1263 }, { "epoch": 0.62, "learning_rate": 9.34049272176928e-07, "loss": 0.0002, "step": 1264 }, { "epoch": 0.62, "logps_train/chosen": -65.99137878417969, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -240.065185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06673180311918259, "rewards_train/margins": 11.667683951556683, "rewards_train/rejected": -11.6009521484375, "step": 1264 }, { "epoch": 0.62, "learning_rate": 9.339144932204562e-07, "loss": 0.0002, "step": 1265 }, { "epoch": 0.62, "logps_train/chosen": -65.6173095703125, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -239.9805908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12830740213394165, "rewards_train/margins": 11.078516066074371, "rewards_train/rejected": -10.95020866394043, "step": 1265 }, { "epoch": 0.62, "learning_rate": 9.337795864298468e-07, "loss": 0.0006, "step": 1266 }, { "epoch": 0.62, "logps_train/chosen": -59.56352996826172, "logps_train/ref_chosen": -60.53125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -237.56764221191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09755341708660126, "rewards_train/margins": 11.393869563937187, "rewards_train/rejected": -11.296316146850586, "step": 1266 }, { "epoch": 0.62, "learning_rate": 9.336445518448442e-07, "loss": 0.0011, "step": 1267 }, { "epoch": 0.62, "logps_train/chosen": -62.71009063720703, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -243.04965209960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07464523613452911, "rewards_train/margins": 11.912717059254646, "rewards_train/rejected": -11.838071823120117, "step": 1267 }, { "epoch": 0.62, "learning_rate": 9.335093895052304e-07, "loss": 0.0007, "step": 1268 }, { "epoch": 0.62, "logps_train/chosen": -57.79289245605469, "logps_train/ref_chosen": -61.125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -241.48403930664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3323809802532196, "rewards_train/margins": 12.116038054227829, "rewards_train/rejected": -11.78365707397461, "step": 1268 }, { "epoch": 0.62, "learning_rate": 9.333740994508253e-07, "loss": 0.0002, "step": 1269 }, { "epoch": 0.62, "logps_train/chosen": -65.22954559326172, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -244.4477996826172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06456995010375977, "rewards_train/margins": 11.832252025604248, "rewards_train/rejected": -11.767682075500488, "step": 1269 }, { "epoch": 0.63, "learning_rate": 9.332386817214862e-07, "loss": 0.0003, "step": 1270 }, { "epoch": 0.63, "logps_train/chosen": -60.27527618408203, "logps_train/ref_chosen": -61.53125, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -232.43368530273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12632936239242554, "rewards_train/margins": 11.322286546230316, "rewards_train/rejected": -11.19595718383789, "step": 1270 }, { "epoch": 0.63, "learning_rate": 9.331031363571081e-07, "loss": 0.0005, "step": 1271 }, { "epoch": 0.63, "logps_train/chosen": -66.26225280761719, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -243.54229736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.053169555962085724, "rewards_train/margins": 11.7693627551198, "rewards_train/rejected": -11.716193199157715, "step": 1271 }, { "epoch": 0.63, "learning_rate": 9.329674633976236e-07, "loss": 0.0002, "step": 1272 }, { "epoch": 0.63, "logps_train/chosen": -63.90375518798828, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -235.98193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1331002116203308, "rewards_train/margins": 11.148296654224396, "rewards_train/rejected": -11.281396865844727, "step": 1272 }, { "epoch": 0.63, "learning_rate": 9.328316628830027e-07, "loss": 0.0003, "step": 1273 }, { "epoch": 0.63, "logps_train/chosen": -60.061134338378906, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -236.03823852539062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24779270589351654, "rewards_train/margins": 11.523200497031212, "rewards_train/rejected": -11.275407791137695, "step": 1273 }, { "epoch": 0.63, "learning_rate": 9.326957348532534e-07, "loss": 0.0007, "step": 1274 }, { "epoch": 0.63, "logps_train/chosen": -64.10216522216797, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -235.43258666992188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05775243043899536, "rewards_train/margins": 11.19051343202591, "rewards_train/rejected": -11.132761001586914, "step": 1274 }, { "epoch": 0.63, "learning_rate": 9.325596793484207e-07, "loss": 0.0006, "step": 1275 }, { "epoch": 0.63, "logps_train/chosen": -63.02365493774414, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -237.23886108398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09543702751398087, "rewards_train/margins": 11.178404785692692, "rewards_train/rejected": -11.082967758178711, "step": 1275 }, { "epoch": 0.63, "learning_rate": 9.324234964085878e-07, "loss": 0.0004, "step": 1276 }, { "epoch": 0.63, "logps_train/chosen": -60.32886505126953, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -238.84243774414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06940846145153046, "rewards_train/margins": 11.448135420680046, "rewards_train/rejected": -11.378726959228516, "step": 1276 }, { "epoch": 0.63, "learning_rate": 9.322871860738749e-07, "loss": 0.0006, "step": 1277 }, { "epoch": 0.63, "logps_train/chosen": -65.22554016113281, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -243.20291137695312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.03671377897262573, "rewards_train/margins": 11.54114705324173, "rewards_train/rejected": -11.577860832214355, "step": 1277 }, { "epoch": 0.63, "learning_rate": 9.321507483844402e-07, "loss": 0.0002, "step": 1278 }, { "epoch": 0.63, "logps_train/chosen": -62.249122619628906, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -232.82826232910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0032324697822332382, "rewards_train/margins": 11.113920697942376, "rewards_train/rejected": -11.11715316772461, "step": 1278 }, { "epoch": 0.63, "learning_rate": 9.320141833804786e-07, "loss": 0.0012, "step": 1279 }, { "epoch": 0.63, "logps_train/chosen": -66.62200164794922, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -251.0448455810547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07728838920593262, "rewards_train/margins": 12.195359468460083, "rewards_train/rejected": -12.272647857666016, "step": 1279 }, { "epoch": 0.63, "learning_rate": 9.318774911022238e-07, "loss": 0.0001, "step": 1280 }, { "epoch": 0.63, "logps_train/chosen": -64.71795654296875, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -239.94720458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0898132473230362, "rewards_train/margins": 11.556421265006065, "rewards_train/rejected": -11.646234512329102, "step": 1280 }, { "epoch": 0.63, "learning_rate": 9.317406715899457e-07, "loss": 0.0004, "step": 1281 }, { "epoch": 0.63, "logps_train/chosen": -66.91033172607422, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -248.09751892089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11149223148822784, "rewards_train/margins": 11.812809869647026, "rewards_train/rejected": -11.924302101135254, "step": 1281 }, { "epoch": 0.63, "learning_rate": 9.316037248839525e-07, "loss": 0.0004, "step": 1282 }, { "epoch": 0.63, "logps_train/chosen": -60.65367126464844, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -242.4453582763672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27169305086135864, "rewards_train/margins": 12.11286050081253, "rewards_train/rejected": -11.841167449951172, "step": 1282 }, { "epoch": 0.63, "learning_rate": 9.314666510245897e-07, "loss": 0.0002, "step": 1283 }, { "epoch": 0.63, "logps_train/chosen": -65.77782440185547, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -246.62185668945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2921869158744812, "rewards_train/margins": 11.856571018695831, "rewards_train/rejected": -12.148757934570312, "step": 1283 }, { "epoch": 0.63, "learning_rate": 9.313294500522401e-07, "loss": 0.0004, "step": 1284 }, { "epoch": 0.63, "logps_train/chosen": -69.84297943115234, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -248.46450805664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3097864091396332, "rewards_train/margins": 11.833246618509293, "rewards_train/rejected": -12.143033027648926, "step": 1284 }, { "epoch": 0.63, "learning_rate": 9.311921220073239e-07, "loss": 0.0001, "step": 1285 }, { "epoch": 0.63, "logps_train/chosen": -62.12184524536133, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -250.5630340576172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1598859429359436, "rewards_train/margins": 12.29514366388321, "rewards_train/rejected": -12.135257720947266, "step": 1285 }, { "epoch": 0.63, "learning_rate": 9.31054666930299e-07, "loss": 0.0004, "step": 1286 }, { "epoch": 0.63, "logps_train/chosen": -66.4961929321289, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -237.11338806152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13663044571876526, "rewards_train/margins": 11.174025148153305, "rewards_train/rejected": -11.31065559387207, "step": 1286 }, { "epoch": 0.63, "learning_rate": 9.309170848616606e-07, "loss": 0.0003, "step": 1287 }, { "epoch": 0.63, "logps_train/chosen": -65.21730041503906, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -248.104736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13916164636611938, "rewards_train/margins": 11.96447604894638, "rewards_train/rejected": -12.1036376953125, "step": 1287 }, { "epoch": 0.63, "learning_rate": 9.307793758419411e-07, "loss": 0.0002, "step": 1288 }, { "epoch": 0.63, "logps_train/chosen": -67.45582580566406, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -251.70315551757812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15144214034080505, "rewards_train/margins": 11.835083395242691, "rewards_train/rejected": -11.986525535583496, "step": 1288 }, { "epoch": 0.63, "learning_rate": 9.30641539911711e-07, "loss": 0.0003, "step": 1289 }, { "epoch": 0.63, "logps_train/chosen": -64.97228240966797, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -242.88201904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11397640407085419, "rewards_train/margins": 11.545123174786568, "rewards_train/rejected": -11.659099578857422, "step": 1289 }, { "epoch": 0.64, "learning_rate": 9.30503577111577e-07, "loss": 0.0009, "step": 1290 }, { "epoch": 0.64, "logps_train/chosen": -63.91199493408203, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -242.08045959472656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19535012543201447, "rewards_train/margins": 11.557179018855095, "rewards_train/rejected": -11.75252914428711, "step": 1290 }, { "epoch": 0.64, "learning_rate": 9.303654874821845e-07, "loss": 0.0002, "step": 1291 }, { "epoch": 0.64, "logps_train/chosen": -65.922607421875, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -256.8615417480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11940950900316238, "rewards_train/margins": 12.697408623993397, "rewards_train/rejected": -12.577999114990234, "step": 1291 }, { "epoch": 0.64, "learning_rate": 9.302272710642154e-07, "loss": 0.0005, "step": 1292 }, { "epoch": 0.64, "logps_train/chosen": -65.83045196533203, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -248.63153076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.002918347716331482, "rewards_train/margins": 11.88455094397068, "rewards_train/rejected": -11.887469291687012, "step": 1292 }, { "epoch": 0.64, "learning_rate": 9.30088927898389e-07, "loss": 0.0012, "step": 1293 }, { "epoch": 0.64, "logps_train/chosen": -64.79346466064453, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -240.96669006347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15146560966968536, "rewards_train/margins": 11.448621556162834, "rewards_train/rejected": -11.60008716583252, "step": 1293 }, { "epoch": 0.64, "learning_rate": 9.299504580254625e-07, "loss": 0.0003, "step": 1294 }, { "epoch": 0.64, "logps_train/chosen": -68.1372299194336, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -242.8457794189453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15293234586715698, "rewards_train/margins": 11.390337765216827, "rewards_train/rejected": -11.543270111083984, "step": 1294 }, { "epoch": 0.64, "learning_rate": 9.298118614862297e-07, "loss": 0.0003, "step": 1295 }, { "epoch": 0.64, "logps_train/chosen": -64.57472229003906, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -241.68801879882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.027561619877815247, "rewards_train/margins": 11.818386510014534, "rewards_train/rejected": -11.790824890136719, "step": 1295 }, { "epoch": 0.64, "learning_rate": 9.296731383215223e-07, "loss": 0.0003, "step": 1296 }, { "epoch": 0.64, "logps_train/chosen": -67.08195495605469, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -245.98809814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01669103279709816, "rewards_train/margins": 11.67284220084548, "rewards_train/rejected": -11.689533233642578, "step": 1296 }, { "epoch": 0.64, "learning_rate": 9.29534288572209e-07, "loss": 0.0003, "step": 1297 }, { "epoch": 0.64, "logps_train/chosen": -62.69660949707031, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -241.35781860351562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3166184723377228, "rewards_train/margins": 11.894197970628738, "rewards_train/rejected": -11.577579498291016, "step": 1297 }, { "epoch": 0.64, "learning_rate": 9.29395312279196e-07, "loss": 0.0002, "step": 1298 }, { "epoch": 0.64, "logps_train/chosen": -63.20261001586914, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -243.14788818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08857140690088272, "rewards_train/margins": 11.644870899617672, "rewards_train/rejected": -11.733442306518555, "step": 1298 }, { "epoch": 0.64, "learning_rate": 9.292562094834264e-07, "loss": 0.001, "step": 1299 }, { "epoch": 0.64, "logps_train/chosen": -60.16345977783203, "logps_train/ref_chosen": -60.09375, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -235.20718383789062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.006141006946563721, "rewards_train/margins": 11.343044936656952, "rewards_train/rejected": -11.349185943603516, "step": 1299 }, { "epoch": 0.64, "learning_rate": 9.291169802258809e-07, "loss": 0.0002, "step": 1300 }, { "epoch": 0.64, "logps_train/chosen": -63.844696044921875, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -243.24404907226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03984704241156578, "rewards_train/margins": 11.925581600517035, "rewards_train/rejected": -11.885734558105469, "step": 1300 }, { "epoch": 0.64, "learning_rate": 9.289776245475776e-07, "loss": 0.0001, "step": 1301 }, { "epoch": 0.64, "logps_train/chosen": -62.326087951660156, "logps_train/ref_chosen": -60.625, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -234.21517944335938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1718178689479828, "rewards_train/margins": 11.224698930978775, "rewards_train/rejected": -11.396516799926758, "step": 1301 }, { "epoch": 0.64, "learning_rate": 9.288381424895715e-07, "loss": 0.0008, "step": 1302 }, { "epoch": 0.64, "logps_train/chosen": -66.41551971435547, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -242.61355590820312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3748284578323364, "rewards_train/margins": 11.55019748210907, "rewards_train/rejected": -11.925025939941406, "step": 1302 }, { "epoch": 0.64, "learning_rate": 9.286985340929549e-07, "loss": 0.0005, "step": 1303 }, { "epoch": 0.64, "logps_train/chosen": -64.72614288330078, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -235.7979278564453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11655974388122559, "rewards_train/margins": 11.488868951797485, "rewards_train/rejected": -11.605428695678711, "step": 1303 }, { "epoch": 0.64, "learning_rate": 9.285587993988573e-07, "loss": 0.0009, "step": 1304 }, { "epoch": 0.64, "logps_train/chosen": -65.95889282226562, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -236.2178497314453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1158108040690422, "rewards_train/margins": 11.430339403450489, "rewards_train/rejected": -11.546150207519531, "step": 1304 }, { "epoch": 0.64, "learning_rate": 9.284189384484458e-07, "loss": 0.0002, "step": 1305 }, { "epoch": 0.64, "logps_train/chosen": -61.78151321411133, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -239.6717529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.053196415305137634, "rewards_train/margins": 11.587265476584435, "rewards_train/rejected": -11.534069061279297, "step": 1305 }, { "epoch": 0.64, "learning_rate": 9.282789512829239e-07, "loss": 0.0003, "step": 1306 }, { "epoch": 0.64, "logps_train/chosen": -63.79427719116211, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -243.5892333984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07293329387903214, "rewards_train/margins": 11.852983377873898, "rewards_train/rejected": -11.92591667175293, "step": 1306 }, { "epoch": 0.64, "learning_rate": 9.281388379435331e-07, "loss": 0.0003, "step": 1307 }, { "epoch": 0.64, "logps_train/chosen": -66.03258514404297, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -249.79476928710938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11658872663974762, "rewards_train/margins": 12.036130771040916, "rewards_train/rejected": -12.152719497680664, "step": 1307 }, { "epoch": 0.64, "learning_rate": 9.279985984715518e-07, "loss": 0.0003, "step": 1308 }, { "epoch": 0.64, "logps_train/chosen": -66.38406372070312, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -241.19705200195312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3386508822441101, "rewards_train/margins": 11.437643826007843, "rewards_train/rejected": -11.776294708251953, "step": 1308 }, { "epoch": 0.64, "learning_rate": 9.278582329082951e-07, "loss": 0.0031, "step": 1309 }, { "epoch": 0.64, "logps_train/chosen": -64.76956176757812, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -250.191650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14980736374855042, "rewards_train/margins": 11.970870584249496, "rewards_train/rejected": -12.120677947998047, "step": 1309 }, { "epoch": 0.65, "learning_rate": 9.277177412951161e-07, "loss": 0.0005, "step": 1310 }, { "epoch": 0.65, "logps_train/chosen": -59.83836364746094, "logps_train/ref_chosen": -60.46875, "logps_train/ref_rejected": -119.6875, "logps_train/rejected": -230.39218139648438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06328266859054565, "rewards_train/margins": 11.136289298534393, "rewards_train/rejected": -11.073006629943848, "step": 1310 }, { "epoch": 0.65, "learning_rate": 9.275771236734045e-07, "loss": 0.0003, "step": 1311 }, { "epoch": 0.65, "logps_train/chosen": -66.56886291503906, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -249.34912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44399571418762207, "rewards_train/margins": 11.997067213058472, "rewards_train/rejected": -12.441062927246094, "step": 1311 }, { "epoch": 0.65, "learning_rate": 9.274363800845868e-07, "loss": 0.0007, "step": 1312 }, { "epoch": 0.65, "logps_train/chosen": -69.46512603759766, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -244.60720825195312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3648228645324707, "rewards_train/margins": 11.508153438568115, "rewards_train/rejected": -11.872976303100586, "step": 1312 }, { "epoch": 0.65, "learning_rate": 9.272955105701274e-07, "loss": 0.0002, "step": 1313 }, { "epoch": 0.65, "logps_train/chosen": -69.88484191894531, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -256.90838623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3341878056526184, "rewards_train/margins": 12.429504096508026, "rewards_train/rejected": -12.763691902160645, "step": 1313 }, { "epoch": 0.65, "learning_rate": 9.271545151715273e-07, "loss": 0.0008, "step": 1314 }, { "epoch": 0.65, "logps_train/chosen": -65.32736206054688, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -248.0914306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28932851552963257, "rewards_train/margins": 11.814930975437164, "rewards_train/rejected": -12.104259490966797, "step": 1314 }, { "epoch": 0.65, "learning_rate": 9.270133939303247e-07, "loss": 0.0001, "step": 1315 }, { "epoch": 0.65, "logps_train/chosen": -66.85316467285156, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -250.13900756835938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04280877858400345, "rewards_train/margins": 12.25622297078371, "rewards_train/rejected": -12.213414192199707, "step": 1315 }, { "epoch": 0.65, "learning_rate": 9.268721468880949e-07, "loss": 0.0001, "step": 1316 }, { "epoch": 0.65, "logps_train/chosen": -62.179603576660156, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -236.9364013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.022322848439216614, "rewards_train/margins": 11.416840746998787, "rewards_train/rejected": -11.39451789855957, "step": 1316 }, { "epoch": 0.65, "learning_rate": 9.267307740864501e-07, "loss": 0.0018, "step": 1317 }, { "epoch": 0.65, "logps_train/chosen": -66.9380111694336, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -250.69598388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12163271009922028, "rewards_train/margins": 12.189469203352928, "rewards_train/rejected": -12.311101913452148, "step": 1317 }, { "epoch": 0.65, "learning_rate": 9.2658927556704e-07, "loss": 0.0005, "step": 1318 }, { "epoch": 0.65, "logps_train/chosen": -64.05413818359375, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -237.38650512695312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10809950530529022, "rewards_train/margins": 11.479673817753792, "rewards_train/rejected": -11.587773323059082, "step": 1318 }, { "epoch": 0.65, "learning_rate": 9.264476513715505e-07, "loss": 0.0005, "step": 1319 }, { "epoch": 0.65, "logps_train/chosen": -66.59101867675781, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -246.89227294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26203176379203796, "rewards_train/margins": 11.89399316906929, "rewards_train/rejected": -12.156024932861328, "step": 1319 }, { "epoch": 0.65, "learning_rate": 9.263059015417054e-07, "loss": 0.0001, "step": 1320 }, { "epoch": 0.65, "logps_train/chosen": -62.1363410949707, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -119.9375, "logps_train/rejected": -235.95120239257812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.033924393355846405, "rewards_train/margins": 11.632854752242565, "rewards_train/rejected": -11.598930358886719, "step": 1320 }, { "epoch": 0.65, "learning_rate": 9.261640261192653e-07, "loss": 0.0003, "step": 1321 }, { "epoch": 0.65, "logps_train/chosen": -62.35115432739258, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -246.21664428710938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23978698253631592, "rewards_train/margins": 12.1486576795578, "rewards_train/rejected": -11.908870697021484, "step": 1321 }, { "epoch": 0.65, "learning_rate": 9.260220251460273e-07, "loss": 0.0001, "step": 1322 }, { "epoch": 0.65, "logps_train/chosen": -62.93179702758789, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -244.07098388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07413668185472488, "rewards_train/margins": 11.955717138946056, "rewards_train/rejected": -12.029853820800781, "step": 1322 }, { "epoch": 0.65, "learning_rate": 9.258798986638259e-07, "loss": 0.0003, "step": 1323 }, { "epoch": 0.65, "logps_train/chosen": -66.90126037597656, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -251.40859985351562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07664929330348969, "rewards_train/margins": 12.187356367707253, "rewards_train/rejected": -12.264005661010742, "step": 1323 }, { "epoch": 0.65, "learning_rate": 9.257376467145328e-07, "loss": 0.0001, "step": 1324 }, { "epoch": 0.65, "logps_train/chosen": -70.8133773803711, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -258.4163818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28607380390167236, "rewards_train/margins": 12.356836199760437, "rewards_train/rejected": -12.64291000366211, "step": 1324 }, { "epoch": 0.65, "learning_rate": 9.255952693400562e-07, "loss": 0.0, "step": 1325 }, { "epoch": 0.65, "logps_train/chosen": -64.19647979736328, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -249.98056030273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09119173139333725, "rewards_train/margins": 12.166591130197048, "rewards_train/rejected": -12.075399398803711, "step": 1325 }, { "epoch": 0.65, "learning_rate": 9.254527665823412e-07, "loss": 0.0003, "step": 1326 }, { "epoch": 0.65, "logps_train/chosen": -68.74608612060547, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -254.63955688476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1836419403553009, "rewards_train/margins": 12.253945797681808, "rewards_train/rejected": -12.43758773803711, "step": 1326 }, { "epoch": 0.65, "learning_rate": 9.253101384833706e-07, "loss": 0.0005, "step": 1327 }, { "epoch": 0.65, "logps_train/chosen": -66.99046325683594, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -242.54275512695312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2541244328022003, "rewards_train/margins": 11.433890551328659, "rewards_train/rejected": -11.68801498413086, "step": 1327 }, { "epoch": 0.65, "learning_rate": 9.251673850851631e-07, "loss": 0.0006, "step": 1328 }, { "epoch": 0.65, "logps_train/chosen": -65.98612213134766, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -238.79248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11531103402376175, "rewards_train/margins": 11.270823113620281, "rewards_train/rejected": -11.386134147644043, "step": 1328 }, { "epoch": 0.65, "learning_rate": 9.250245064297752e-07, "loss": 0.0004, "step": 1329 }, { "epoch": 0.65, "logps_train/chosen": -66.5528335571289, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -250.90206909179688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0345802903175354, "rewards_train/margins": 12.179262101650238, "rewards_train/rejected": -12.213842391967773, "step": 1329 }, { "epoch": 0.65, "learning_rate": 9.248815025592995e-07, "loss": 0.0005, "step": 1330 }, { "epoch": 0.65, "logps_train/chosen": -65.8134765625, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -248.36058044433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18269024789333344, "rewards_train/margins": 11.995946303009987, "rewards_train/rejected": -12.17863655090332, "step": 1330 }, { "epoch": 0.66, "learning_rate": 9.247383735158664e-07, "loss": 0.0004, "step": 1331 }, { "epoch": 0.66, "logps_train/chosen": -61.468101501464844, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -119.0625, "logps_train/rejected": -234.03773498535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08976197987794876, "rewards_train/margins": 11.588260896503925, "rewards_train/rejected": -11.498498916625977, "step": 1331 }, { "epoch": 0.66, "learning_rate": 9.245951193416424e-07, "loss": 0.0012, "step": 1332 }, { "epoch": 0.66, "logps_train/chosen": -66.46529388427734, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -238.21435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13427342474460602, "rewards_train/margins": 11.652934178709984, "rewards_train/rejected": -11.78720760345459, "step": 1332 }, { "epoch": 0.66, "learning_rate": 9.24451740078831e-07, "loss": 0.0003, "step": 1333 }, { "epoch": 0.66, "logps_train/chosen": -62.36244583129883, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -118.375, "logps_train/rejected": -234.2506103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0794086754322052, "rewards_train/margins": 11.509763687849045, "rewards_train/rejected": -11.58917236328125, "step": 1333 }, { "epoch": 0.66, "learning_rate": 9.24308235769673e-07, "loss": 0.0012, "step": 1334 }, { "epoch": 0.66, "logps_train/chosen": -60.930274963378906, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -233.86378479003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.052211690694093704, "rewards_train/margins": 11.19557373598218, "rewards_train/rejected": -11.143362045288086, "step": 1334 }, { "epoch": 0.66, "learning_rate": 9.241646064564455e-07, "loss": 0.0006, "step": 1335 }, { "epoch": 0.66, "logps_train/chosen": -63.846763610839844, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -238.555419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.007844969630241394, "rewards_train/margins": 11.487100556492805, "rewards_train/rejected": -11.494945526123047, "step": 1335 }, { "epoch": 0.66, "learning_rate": 9.240208521814629e-07, "loss": 0.0003, "step": 1336 }, { "epoch": 0.66, "logps_train/chosen": -67.41334533691406, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -250.07199096679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0462176650762558, "rewards_train/margins": 12.183930650353432, "rewards_train/rejected": -12.230148315429688, "step": 1336 }, { "epoch": 0.66, "learning_rate": 9.238769729870761e-07, "loss": 0.0001, "step": 1337 }, { "epoch": 0.66, "logps_train/chosen": -64.53935241699219, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -240.204345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06837937235832214, "rewards_train/margins": 11.974215477705002, "rewards_train/rejected": -11.90583610534668, "step": 1337 }, { "epoch": 0.66, "learning_rate": 9.237329689156728e-07, "loss": 0.0001, "step": 1338 }, { "epoch": 0.66, "logps_train/chosen": -66.47901916503906, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -248.15487670898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13984449207782745, "rewards_train/margins": 11.848005697131157, "rewards_train/rejected": -11.987850189208984, "step": 1338 }, { "epoch": 0.66, "learning_rate": 9.235888400096776e-07, "loss": 0.0002, "step": 1339 }, { "epoch": 0.66, "logps_train/chosen": -62.02552795410156, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -243.30322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07440042495727539, "rewards_train/margins": 11.912291049957275, "rewards_train/rejected": -11.837890625, "step": 1339 }, { "epoch": 0.66, "learning_rate": 9.234445863115517e-07, "loss": 0.0002, "step": 1340 }, { "epoch": 0.66, "logps_train/chosen": -65.54374694824219, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -242.91531372070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16516563296318054, "rewards_train/margins": 11.665623933076859, "rewards_train/rejected": -11.830789566040039, "step": 1340 }, { "epoch": 0.66, "learning_rate": 9.233002078637935e-07, "loss": 0.0001, "step": 1341 }, { "epoch": 0.66, "logps_train/chosen": -64.84716796875, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -249.88211059570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04902277886867523, "rewards_train/margins": 12.297681912779808, "rewards_train/rejected": -12.248659133911133, "step": 1341 }, { "epoch": 0.66, "learning_rate": 9.231557047089378e-07, "loss": 0.0, "step": 1342 }, { "epoch": 0.66, "logps_train/chosen": -64.98072814941406, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -245.16018676757812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09640900790691376, "rewards_train/margins": 12.04465501010418, "rewards_train/rejected": -11.948246002197266, "step": 1342 }, { "epoch": 0.66, "learning_rate": 9.230110768895561e-07, "loss": 0.0001, "step": 1343 }, { "epoch": 0.66, "logps_train/chosen": -61.268699645996094, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -242.3975067138672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.014878466725349426, "rewards_train/margins": 12.021767809987068, "rewards_train/rejected": -12.006889343261719, "step": 1343 }, { "epoch": 0.66, "learning_rate": 9.228663244482566e-07, "loss": 0.0002, "step": 1344 }, { "epoch": 0.66, "logps_train/chosen": -69.26335906982422, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -254.83799743652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30665814876556396, "rewards_train/margins": 12.344524025917053, "rewards_train/rejected": -12.651182174682617, "step": 1344 }, { "epoch": 0.66, "learning_rate": 9.227214474276848e-07, "loss": 0.0, "step": 1345 }, { "epoch": 0.66, "logps_train/chosen": -60.61992645263672, "logps_train/ref_chosen": -61.46875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -251.17144775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0844917893409729, "rewards_train/margins": 12.568920195102692, "rewards_train/rejected": -12.484428405761719, "step": 1345 }, { "epoch": 0.66, "learning_rate": 9.22576445870522e-07, "loss": 0.0001, "step": 1346 }, { "epoch": 0.66, "logps_train/chosen": -70.7187271118164, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -255.46340942382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42372798919677734, "rewards_train/margins": 12.025055885314941, "rewards_train/rejected": -12.448783874511719, "step": 1346 }, { "epoch": 0.66, "learning_rate": 9.224313198194869e-07, "loss": 0.0012, "step": 1347 }, { "epoch": 0.66, "logps_train/chosen": -67.60855102539062, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -250.5262451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21717806160449982, "rewards_train/margins": 12.081783577799797, "rewards_train/rejected": -12.298961639404297, "step": 1347 }, { "epoch": 0.66, "learning_rate": 9.222860693173342e-07, "loss": 0.0001, "step": 1348 }, { "epoch": 0.66, "logps_train/chosen": -69.33958435058594, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -247.595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35016927123069763, "rewards_train/margins": 11.946658045053482, "rewards_train/rejected": -12.29682731628418, "step": 1348 }, { "epoch": 0.66, "learning_rate": 9.221406944068563e-07, "loss": 0.0004, "step": 1349 }, { "epoch": 0.66, "logps_train/chosen": -63.9904899597168, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -248.859130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05837283656001091, "rewards_train/margins": 12.174705844372511, "rewards_train/rejected": -12.1163330078125, "step": 1349 }, { "epoch": 0.66, "learning_rate": 9.219951951308813e-07, "loss": 0.0002, "step": 1350 }, { "epoch": 0.66, "logps_train/chosen": -68.33697509765625, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -239.78338623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4126529097557068, "rewards_train/margins": 11.363439619541168, "rewards_train/rejected": -11.776092529296875, "step": 1350 }, { "epoch": 0.67, "learning_rate": 9.218495715322743e-07, "loss": 0.0005, "step": 1351 }, { "epoch": 0.67, "logps_train/chosen": -66.91349029541016, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -244.15036010742188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2586827576160431, "rewards_train/margins": 11.704106777906418, "rewards_train/rejected": -11.962789535522461, "step": 1351 }, { "epoch": 0.67, "learning_rate": 9.217038236539369e-07, "loss": 0.0004, "step": 1352 }, { "epoch": 0.67, "logps_train/chosen": -62.958736419677734, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -247.33349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.024780631065368652, "rewards_train/margins": 12.312916159629822, "rewards_train/rejected": -12.288135528564453, "step": 1352 }, { "epoch": 0.67, "learning_rate": 9.215579515388074e-07, "loss": 0.0006, "step": 1353 }, { "epoch": 0.67, "logps_train/chosen": -66.60076141357422, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -246.4603271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.036603860557079315, "rewards_train/margins": 12.001581124961376, "rewards_train/rejected": -11.964977264404297, "step": 1353 }, { "epoch": 0.67, "learning_rate": 9.214119552298611e-07, "loss": 0.0001, "step": 1354 }, { "epoch": 0.67, "logps_train/chosen": -67.96931457519531, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -241.60975646972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3727133274078369, "rewards_train/margins": 11.492561101913452, "rewards_train/rejected": -11.865274429321289, "step": 1354 }, { "epoch": 0.67, "learning_rate": 9.21265834770109e-07, "loss": 0.0003, "step": 1355 }, { "epoch": 0.67, "logps_train/chosen": -63.95079803466797, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -242.32046508789062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.025304336100816727, "rewards_train/margins": 11.882132034748793, "rewards_train/rejected": -11.90743637084961, "step": 1355 }, { "epoch": 0.67, "learning_rate": 9.211195902025993e-07, "loss": 0.0004, "step": 1356 }, { "epoch": 0.67, "logps_train/chosen": -65.22807312011719, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -244.61355590820312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18618609011173248, "rewards_train/margins": 11.76013158261776, "rewards_train/rejected": -11.946317672729492, "step": 1356 }, { "epoch": 0.67, "learning_rate": 9.209732215704169e-07, "loss": 0.0002, "step": 1357 }, { "epoch": 0.67, "logps_train/chosen": -66.87921905517578, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -133.5, "logps_train/rejected": -261.6263427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12678897380828857, "rewards_train/margins": 12.684282422065735, "rewards_train/rejected": -12.811071395874023, "step": 1357 }, { "epoch": 0.67, "learning_rate": 9.208267289166827e-07, "loss": 0.0003, "step": 1358 }, { "epoch": 0.67, "logps_train/chosen": -64.81861877441406, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -253.09022521972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02555980160832405, "rewards_train/margins": 12.4921983666718, "rewards_train/rejected": -12.466638565063477, "step": 1358 }, { "epoch": 0.67, "learning_rate": 9.206801122845546e-07, "loss": 0.0001, "step": 1359 }, { "epoch": 0.67, "logps_train/chosen": -61.98493957519531, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -119.5, "logps_train/rejected": -235.79299926757812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08939670026302338, "rewards_train/margins": 11.716548189520836, "rewards_train/rejected": -11.627151489257812, "step": 1359 }, { "epoch": 0.67, "learning_rate": 9.205333717172267e-07, "loss": 0.0001, "step": 1360 }, { "epoch": 0.67, "logps_train/chosen": -66.31477355957031, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -249.46266174316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.023860838264226913, "rewards_train/margins": 12.074065301567316, "rewards_train/rejected": -12.097926139831543, "step": 1360 }, { "epoch": 0.67, "learning_rate": 9.203865072579298e-07, "loss": 0.0001, "step": 1361 }, { "epoch": 0.67, "logps_train/chosen": -64.34613037109375, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -236.49185180664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16198065876960754, "rewards_train/margins": 11.301655739545822, "rewards_train/rejected": -11.46363639831543, "step": 1361 }, { "epoch": 0.67, "learning_rate": 9.202395189499312e-07, "loss": 0.0005, "step": 1362 }, { "epoch": 0.67, "logps_train/chosen": -70.14437866210938, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -250.53292846679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37571775913238525, "rewards_train/margins": 11.872107863426208, "rewards_train/rejected": -12.247825622558594, "step": 1362 }, { "epoch": 0.67, "learning_rate": 9.200924068365348e-07, "loss": 0.0001, "step": 1363 }, { "epoch": 0.67, "logps_train/chosen": -65.80233001708984, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -250.78330993652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.21063615381717682, "rewards_train/margins": 12.164015784859657, "rewards_train/rejected": -11.95337963104248, "step": 1363 }, { "epoch": 0.67, "learning_rate": 9.199451709610804e-07, "loss": 0.0001, "step": 1364 }, { "epoch": 0.67, "logps_train/chosen": -65.26258850097656, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -246.7620849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09994038939476013, "rewards_train/margins": 11.902245432138443, "rewards_train/rejected": -12.002185821533203, "step": 1364 }, { "epoch": 0.67, "learning_rate": 9.197978113669451e-07, "loss": 0.0003, "step": 1365 }, { "epoch": 0.67, "logps_train/chosen": -65.72148895263672, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -255.36192321777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01890213042497635, "rewards_train/margins": 12.63477199524641, "rewards_train/rejected": -12.653674125671387, "step": 1365 }, { "epoch": 0.67, "learning_rate": 9.196503280975419e-07, "loss": 0.0007, "step": 1366 }, { "epoch": 0.67, "logps_train/chosen": -67.35467529296875, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -251.27700805664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3197694718837738, "rewards_train/margins": 12.209786802530289, "rewards_train/rejected": -12.529556274414062, "step": 1366 }, { "epoch": 0.67, "learning_rate": 9.195027211963202e-07, "loss": 0.0002, "step": 1367 }, { "epoch": 0.67, "logps_train/chosen": -64.6307373046875, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -246.28726196289062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06707800924777985, "rewards_train/margins": 12.02878724038601, "rewards_train/rejected": -12.095865249633789, "step": 1367 }, { "epoch": 0.67, "learning_rate": 9.193549907067663e-07, "loss": 0.0065, "step": 1368 }, { "epoch": 0.67, "logps_train/chosen": -67.096923828125, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -255.5118408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.006786961108446121, "rewards_train/margins": 12.78570495173335, "rewards_train/rejected": -12.792491912841797, "step": 1368 }, { "epoch": 0.67, "learning_rate": 9.192071366724024e-07, "loss": 0.0, "step": 1369 }, { "epoch": 0.67, "logps_train/chosen": -65.05653381347656, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -246.98904418945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10765539109706879, "rewards_train/margins": 11.939196720719337, "rewards_train/rejected": -12.046852111816406, "step": 1369 }, { "epoch": 0.67, "learning_rate": 9.190591591367873e-07, "loss": 0.0006, "step": 1370 }, { "epoch": 0.67, "logps_train/chosen": -68.20880126953125, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -253.06777954101562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.048956602811813354, "rewards_train/margins": 12.406064301729202, "rewards_train/rejected": -12.455020904541016, "step": 1370 }, { "epoch": 0.68, "learning_rate": 9.189110581435162e-07, "loss": 0.0001, "step": 1371 }, { "epoch": 0.68, "logps_train/chosen": -64.75895690917969, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -239.54837036132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10450956970453262, "rewards_train/margins": 11.3832853063941, "rewards_train/rejected": -11.487794876098633, "step": 1371 }, { "epoch": 0.68, "learning_rate": 9.187628337362207e-07, "loss": 0.0003, "step": 1372 }, { "epoch": 0.68, "logps_train/chosen": -68.99784088134766, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -252.83587646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30989131331443787, "rewards_train/margins": 12.09430256485939, "rewards_train/rejected": -12.404193878173828, "step": 1372 }, { "epoch": 0.68, "learning_rate": 9.186144859585685e-07, "loss": 0.0002, "step": 1373 }, { "epoch": 0.68, "logps_train/chosen": -66.44142150878906, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -248.2686004638672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11167094856500626, "rewards_train/margins": 11.939701579511166, "rewards_train/rejected": -12.051372528076172, "step": 1373 }, { "epoch": 0.68, "learning_rate": 9.184660148542642e-07, "loss": 0.0001, "step": 1374 }, { "epoch": 0.68, "logps_train/chosen": -67.55322265625, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -247.8562774658203, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.013622120954096317, "rewards_train/margins": 12.054571942426264, "rewards_train/rejected": -12.040949821472168, "step": 1374 }, { "epoch": 0.68, "learning_rate": 9.183174204670482e-07, "loss": 0.0001, "step": 1375 }, { "epoch": 0.68, "logps_train/chosen": -66.06706237792969, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -253.83074951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2153979241847992, "rewards_train/margins": 12.32353487610817, "rewards_train/rejected": -12.538932800292969, "step": 1375 }, { "epoch": 0.68, "learning_rate": 9.181687028406975e-07, "loss": 0.0002, "step": 1376 }, { "epoch": 0.68, "logps_train/chosen": -65.09819030761719, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -250.13084411621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0026418641209602356, "rewards_train/margins": 12.21415501087904, "rewards_train/rejected": -12.216796875, "step": 1376 }, { "epoch": 0.68, "learning_rate": 9.180198620190249e-07, "loss": 0.0004, "step": 1377 }, { "epoch": 0.68, "logps_train/chosen": -68.31932067871094, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -255.47012329101562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2802232503890991, "rewards_train/margins": 12.18685781955719, "rewards_train/rejected": -12.467081069946289, "step": 1377 }, { "epoch": 0.68, "learning_rate": 9.178708980458806e-07, "loss": 0.0014, "step": 1378 }, { "epoch": 0.68, "logps_train/chosen": -63.73705291748047, "logps_train/ref_chosen": -61.1875, "logps_train/ref_rejected": -118.9375, "logps_train/rejected": -238.81631469726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2540275752544403, "rewards_train/margins": 11.737954884767532, "rewards_train/rejected": -11.991982460021973, "step": 1378 }, { "epoch": 0.68, "learning_rate": 9.177218109651499e-07, "loss": 0.0006, "step": 1379 }, { "epoch": 0.68, "logps_train/chosen": -61.592185974121094, "logps_train/ref_chosen": -60.59375, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -234.41444396972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09852512180805206, "rewards_train/margins": 11.365626260638237, "rewards_train/rejected": -11.464151382446289, "step": 1379 }, { "epoch": 0.68, "learning_rate": 9.175726008207551e-07, "loss": 0.0011, "step": 1380 }, { "epoch": 0.68, "logps_train/chosen": -64.5488052368164, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -246.86038208007812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2130882441997528, "rewards_train/margins": 12.168707102537155, "rewards_train/rejected": -11.955618858337402, "step": 1380 }, { "epoch": 0.68, "learning_rate": 9.174232676566543e-07, "loss": 0.0001, "step": 1381 }, { "epoch": 0.68, "logps_train/chosen": -66.8752212524414, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -242.33023071289062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.31633085012435913, "rewards_train/margins": 11.568595945835114, "rewards_train/rejected": -11.884926795959473, "step": 1381 }, { "epoch": 0.68, "learning_rate": 9.172738115168422e-07, "loss": 0.0003, "step": 1382 }, { "epoch": 0.68, "logps_train/chosen": -67.26736450195312, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -246.25257873535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15783971548080444, "rewards_train/margins": 11.848961889743805, "rewards_train/rejected": -12.00680160522461, "step": 1382 }, { "epoch": 0.68, "learning_rate": 9.171242324453497e-07, "loss": 0.0007, "step": 1383 }, { "epoch": 0.68, "logps_train/chosen": -62.26532745361328, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -117.875, "logps_train/rejected": -235.5118408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09088832139968872, "rewards_train/margins": 11.675531089305878, "rewards_train/rejected": -11.766419410705566, "step": 1383 }, { "epoch": 0.68, "learning_rate": 9.169745304862435e-07, "loss": 0.0004, "step": 1384 }, { "epoch": 0.68, "logps_train/chosen": -64.99732971191406, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -233.78994750976562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.000807054340839386, "rewards_train/margins": 11.27462362498045, "rewards_train/rejected": -11.275430679321289, "step": 1384 }, { "epoch": 0.68, "learning_rate": 9.168247056836269e-07, "loss": 0.0006, "step": 1385 }, { "epoch": 0.68, "logps_train/chosen": -64.8073501586914, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -246.45309448242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10989001393318176, "rewards_train/margins": 12.13410571217537, "rewards_train/rejected": -12.024215698242188, "step": 1385 }, { "epoch": 0.68, "learning_rate": 9.166747580816396e-07, "loss": 0.0002, "step": 1386 }, { "epoch": 0.68, "logps_train/chosen": -66.46498107910156, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -249.71084594726562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09610775113105774, "rewards_train/margins": 12.035718649625778, "rewards_train/rejected": -12.131826400756836, "step": 1386 }, { "epoch": 0.68, "learning_rate": 9.165246877244568e-07, "loss": 0.0003, "step": 1387 }, { "epoch": 0.68, "logps_train/chosen": -66.20695495605469, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -256.4605712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.185636505484581, "rewards_train/margins": 12.421747222542763, "rewards_train/rejected": -12.607383728027344, "step": 1387 }, { "epoch": 0.68, "learning_rate": 9.163744946562905e-07, "loss": 0.0001, "step": 1388 }, { "epoch": 0.68, "logps_train/chosen": -66.4551010131836, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -252.1058349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3996610641479492, "rewards_train/margins": 12.12293529510498, "rewards_train/rejected": -12.52259635925293, "step": 1388 }, { "epoch": 0.68, "learning_rate": 9.162241789213884e-07, "loss": 0.0009, "step": 1389 }, { "epoch": 0.68, "logps_train/chosen": -67.13087463378906, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -256.144775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.035154566168785095, "rewards_train/margins": 12.571313127875328, "rewards_train/rejected": -12.536158561706543, "step": 1389 }, { "epoch": 0.68, "learning_rate": 9.160737405640346e-07, "loss": 0.0002, "step": 1390 }, { "epoch": 0.68, "logps_train/chosen": -66.37954711914062, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -244.96630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.058316007256507874, "rewards_train/margins": 12.318196520209312, "rewards_train/rejected": -12.37651252746582, "step": 1390 }, { "epoch": 0.68, "learning_rate": 9.159231796285493e-07, "loss": 0.0002, "step": 1391 }, { "epoch": 0.68, "logps_train/chosen": -64.69090270996094, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -254.6916961669922, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10771642625331879, "rewards_train/margins": 12.882892474532127, "rewards_train/rejected": -12.775176048278809, "step": 1391 }, { "epoch": 0.69, "learning_rate": 9.157724961592887e-07, "loss": 0.0001, "step": 1392 }, { "epoch": 0.69, "logps_train/chosen": -58.864322662353516, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -118.1875, "logps_train/rejected": -230.67312622070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28788426518440247, "rewards_train/margins": 11.536739856004715, "rewards_train/rejected": -11.248855590820312, "step": 1392 }, { "epoch": 0.69, "learning_rate": 9.156216902006451e-07, "loss": 0.0002, "step": 1393 }, { "epoch": 0.69, "logps_train/chosen": -64.26758575439453, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -241.80145263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06784552335739136, "rewards_train/margins": 11.75687712430954, "rewards_train/rejected": -11.689031600952148, "step": 1393 }, { "epoch": 0.69, "learning_rate": 9.154707617970472e-07, "loss": 0.0005, "step": 1394 }, { "epoch": 0.69, "logps_train/chosen": -64.39518737792969, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -247.62606811523438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.03805375099182129, "rewards_train/margins": 12.27543044090271, "rewards_train/rejected": -12.313484191894531, "step": 1394 }, { "epoch": 0.69, "learning_rate": 9.153197109929593e-07, "loss": 0.0003, "step": 1395 }, { "epoch": 0.69, "logps_train/chosen": -63.63857650756836, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -245.87010192871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09146462380886078, "rewards_train/margins": 12.084821328520775, "rewards_train/rejected": -11.993356704711914, "step": 1395 }, { "epoch": 0.69, "learning_rate": 9.151685378328821e-07, "loss": 0.0001, "step": 1396 }, { "epoch": 0.69, "logps_train/chosen": -64.97920227050781, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -250.94837951660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08502981811761856, "rewards_train/margins": 12.422990582883358, "rewards_train/rejected": -12.508020401000977, "step": 1396 }, { "epoch": 0.69, "learning_rate": 9.150172423613523e-07, "loss": 0.0001, "step": 1397 }, { "epoch": 0.69, "logps_train/chosen": -63.32399368286133, "logps_train/ref_chosen": -61.125, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -245.9784698486328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21904507279396057, "rewards_train/margins": 12.12650641798973, "rewards_train/rejected": -12.345551490783691, "step": 1397 }, { "epoch": 0.69, "learning_rate": 9.148658246229424e-07, "loss": 0.0001, "step": 1398 }, { "epoch": 0.69, "logps_train/chosen": -65.39665222167969, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -252.10533142089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12263942509889603, "rewards_train/margins": 12.509297139942646, "rewards_train/rejected": -12.38665771484375, "step": 1398 }, { "epoch": 0.69, "learning_rate": 9.147142846622611e-07, "loss": 0.0001, "step": 1399 }, { "epoch": 0.69, "logps_train/chosen": -68.12879943847656, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -253.130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05208902433514595, "rewards_train/margins": 12.207483004778624, "rewards_train/rejected": -12.25957202911377, "step": 1399 }, { "epoch": 0.69, "learning_rate": 9.145626225239531e-07, "loss": 0.0001, "step": 1400 }, { "epoch": 0.69, "logps_train/chosen": -61.756404876708984, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -245.9628143310547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1188417375087738, "rewards_train/margins": 12.185730546712875, "rewards_train/rejected": -12.066888809204102, "step": 1400 }, { "epoch": 0.69, "learning_rate": 9.144108382526992e-07, "loss": 0.0003, "step": 1401 }, { "epoch": 0.69, "logps_train/chosen": -64.37979125976562, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -248.2490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08604466915130615, "rewards_train/margins": 12.369394659996033, "rewards_train/rejected": -12.283349990844727, "step": 1401 }, { "epoch": 0.69, "learning_rate": 9.14258931893216e-07, "loss": 0.0005, "step": 1402 }, { "epoch": 0.69, "logps_train/chosen": -62.07099914550781, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -242.57406616210938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.010933086276054382, "rewards_train/margins": 11.83812601864338, "rewards_train/rejected": -11.849059104919434, "step": 1402 }, { "epoch": 0.69, "learning_rate": 9.141069034902562e-07, "loss": 0.0005, "step": 1403 }, { "epoch": 0.69, "logps_train/chosen": -67.39100646972656, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -242.22824096679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28514569997787476, "rewards_train/margins": 11.642902433872223, "rewards_train/rejected": -11.928048133850098, "step": 1403 }, { "epoch": 0.69, "learning_rate": 9.139547530886083e-07, "loss": 0.0008, "step": 1404 }, { "epoch": 0.69, "logps_train/chosen": -64.65565490722656, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -246.66954040527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.019151732325553894, "rewards_train/margins": 12.109640166163445, "rewards_train/rejected": -12.09048843383789, "step": 1404 }, { "epoch": 0.69, "learning_rate": 9.138024807330969e-07, "loss": 0.0002, "step": 1405 }, { "epoch": 0.69, "logps_train/chosen": -69.982421875, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -250.2117919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44384831190109253, "rewards_train/margins": 12.002037346363068, "rewards_train/rejected": -12.44588565826416, "step": 1405 }, { "epoch": 0.69, "learning_rate": 9.136500864685824e-07, "loss": 0.0001, "step": 1406 }, { "epoch": 0.69, "logps_train/chosen": -66.36640167236328, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -247.90968322753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1424262821674347, "rewards_train/margins": 12.121687144041061, "rewards_train/rejected": -12.264113426208496, "step": 1406 }, { "epoch": 0.69, "learning_rate": 9.13497570339961e-07, "loss": 0.0001, "step": 1407 }, { "epoch": 0.69, "logps_train/chosen": -60.89537811279297, "logps_train/ref_chosen": -60.53125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -242.33370971679688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.037633687257766724, "rewards_train/margins": 11.97093316912651, "rewards_train/rejected": -12.008566856384277, "step": 1407 }, { "epoch": 0.69, "learning_rate": 9.133449323921654e-07, "loss": 0.0001, "step": 1408 }, { "epoch": 0.69, "logps_train/chosen": -67.73184967041016, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -249.5111846923828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3699132800102234, "rewards_train/margins": 11.797855198383331, "rewards_train/rejected": -12.167768478393555, "step": 1408 }, { "epoch": 0.69, "learning_rate": 9.131921726701636e-07, "loss": 0.0004, "step": 1409 }, { "epoch": 0.69, "logps_train/chosen": -69.47980499267578, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -248.60006713867188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3277653753757477, "rewards_train/margins": 11.806363195180893, "rewards_train/rejected": -12.13412857055664, "step": 1409 }, { "epoch": 0.69, "learning_rate": 9.130392912189595e-07, "loss": 0.0001, "step": 1410 }, { "epoch": 0.69, "logps_train/chosen": -63.53357696533203, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -243.87051391601562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.02894354611635208, "rewards_train/margins": 11.855763904750347, "rewards_train/rejected": -11.8847074508667, "step": 1410 }, { "epoch": 0.69, "learning_rate": 9.128862880835933e-07, "loss": 0.0016, "step": 1411 }, { "epoch": 0.69, "logps_train/chosen": -66.27584838867188, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -253.37380981445312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17035779356956482, "rewards_train/margins": 12.575226694345474, "rewards_train/rejected": -12.745584487915039, "step": 1411 }, { "epoch": 0.7, "learning_rate": 9.127331633091404e-07, "loss": 0.0004, "step": 1412 }, { "epoch": 0.7, "logps_train/chosen": -65.97626495361328, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -246.58929443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21359336376190186, "rewards_train/margins": 12.110716938972473, "rewards_train/rejected": -12.324310302734375, "step": 1412 }, { "epoch": 0.7, "learning_rate": 9.125799169407127e-07, "loss": 0.0002, "step": 1413 }, { "epoch": 0.7, "logps_train/chosen": -65.23095703125, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -245.982666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05624959617853165, "rewards_train/margins": 11.799634955823421, "rewards_train/rejected": -11.855884552001953, "step": 1413 }, { "epoch": 0.7, "learning_rate": 9.124265490234573e-07, "loss": 0.0005, "step": 1414 }, { "epoch": 0.7, "logps_train/chosen": -58.77250671386719, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -119.375, "logps_train/rejected": -239.4902801513672, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.23090356588363647, "rewards_train/margins": 12.242773950099945, "rewards_train/rejected": -12.011870384216309, "step": 1414 }, { "epoch": 0.7, "learning_rate": 9.122730596025578e-07, "loss": 0.0004, "step": 1415 }, { "epoch": 0.7, "logps_train/chosen": -66.51930236816406, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -258.02557373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05749652534723282, "rewards_train/margins": 12.832754634320736, "rewards_train/rejected": -12.890251159667969, "step": 1415 }, { "epoch": 0.7, "learning_rate": 9.121194487232329e-07, "loss": 0.0, "step": 1416 }, { "epoch": 0.7, "logps_train/chosen": -64.5352783203125, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -248.1069793701172, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16280502080917358, "rewards_train/margins": 12.162150919437408, "rewards_train/rejected": -12.324955940246582, "step": 1416 }, { "epoch": 0.7, "learning_rate": 9.119657164307375e-07, "loss": 0.0002, "step": 1417 }, { "epoch": 0.7, "logps_train/chosen": -62.044334411621094, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -251.3399658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0762305036187172, "rewards_train/margins": 12.459151722490788, "rewards_train/rejected": -12.38292121887207, "step": 1417 }, { "epoch": 0.7, "learning_rate": 9.118118627703622e-07, "loss": 0.0003, "step": 1418 }, { "epoch": 0.7, "logps_train/chosen": -67.40447998046875, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -256.0220947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23932525515556335, "rewards_train/margins": 12.551411896944046, "rewards_train/rejected": -12.79073715209961, "step": 1418 }, { "epoch": 0.7, "learning_rate": 9.116578877874334e-07, "loss": 0.0001, "step": 1419 }, { "epoch": 0.7, "logps_train/chosen": -66.47798156738281, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -254.9536895751953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24193930625915527, "rewards_train/margins": 12.621007204055786, "rewards_train/rejected": -12.862946510314941, "step": 1419 }, { "epoch": 0.7, "learning_rate": 9.115037915273129e-07, "loss": 0.0005, "step": 1420 }, { "epoch": 0.7, "logps_train/chosen": -65.51487731933594, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -258.69317626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06032903492450714, "rewards_train/margins": 12.693221643567085, "rewards_train/rejected": -12.632892608642578, "step": 1420 }, { "epoch": 0.7, "learning_rate": 9.113495740353989e-07, "loss": 0.0002, "step": 1421 }, { "epoch": 0.7, "logps_train/chosen": -60.760841369628906, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -240.6943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.030507519841194153, "rewards_train/margins": 11.91849561035633, "rewards_train/rejected": -11.887988090515137, "step": 1421 }, { "epoch": 0.7, "learning_rate": 9.111952353571246e-07, "loss": 0.0002, "step": 1422 }, { "epoch": 0.7, "logps_train/chosen": -67.01731872558594, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -245.66510009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3564678430557251, "rewards_train/margins": 11.931673407554626, "rewards_train/rejected": -12.288141250610352, "step": 1422 }, { "epoch": 0.7, "learning_rate": 9.110407755379595e-07, "loss": 0.0004, "step": 1423 }, { "epoch": 0.7, "logps_train/chosen": -61.92820739746094, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -246.68170166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.141993448138237, "rewards_train/margins": 12.161435052752495, "rewards_train/rejected": -12.019441604614258, "step": 1423 }, { "epoch": 0.7, "learning_rate": 9.108861946234081e-07, "loss": 0.0006, "step": 1424 }, { "epoch": 0.7, "logps_train/chosen": -67.30711364746094, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -249.34774780273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13998828828334808, "rewards_train/margins": 11.916711464524269, "rewards_train/rejected": -12.056699752807617, "step": 1424 }, { "epoch": 0.7, "learning_rate": 9.107314926590112e-07, "loss": 0.0007, "step": 1425 }, { "epoch": 0.7, "logps_train/chosen": -67.00265502929688, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -256.9155578613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.25251150131225586, "rewards_train/margins": 12.374005794525146, "rewards_train/rejected": -12.626517295837402, "step": 1425 }, { "epoch": 0.7, "learning_rate": 9.105766696903452e-07, "loss": 0.0002, "step": 1426 }, { "epoch": 0.7, "logps_train/chosen": -63.43783950805664, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -253.28579711914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.025210291147232056, "rewards_train/margins": 12.708086878061295, "rewards_train/rejected": -12.682876586914062, "step": 1426 }, { "epoch": 0.7, "learning_rate": 9.104217257630218e-07, "loss": 0.0007, "step": 1427 }, { "epoch": 0.7, "logps_train/chosen": -64.54647064208984, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -252.50318908691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1690022498369217, "rewards_train/margins": 12.51491193473339, "rewards_train/rejected": -12.683914184570312, "step": 1427 }, { "epoch": 0.7, "learning_rate": 9.102666609226884e-07, "loss": 0.0012, "step": 1428 }, { "epoch": 0.7, "logps_train/chosen": -64.82524871826172, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -249.79733276367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.20772023499011993, "rewards_train/margins": 12.372111842036247, "rewards_train/rejected": -12.579832077026367, "step": 1428 }, { "epoch": 0.7, "learning_rate": 9.101114752150285e-07, "loss": 0.0002, "step": 1429 }, { "epoch": 0.7, "logps_train/chosen": -68.26631927490234, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -250.23760986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.27936673164367676, "rewards_train/margins": 12.116562604904175, "rewards_train/rejected": -12.395929336547852, "step": 1429 }, { "epoch": 0.7, "learning_rate": 9.099561686857605e-07, "loss": 0.0002, "step": 1430 }, { "epoch": 0.7, "logps_train/chosen": -63.959388732910156, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -251.72763061523438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07831206917762756, "rewards_train/margins": 12.085025638341904, "rewards_train/rejected": -12.163337707519531, "step": 1430 }, { "epoch": 0.7, "learning_rate": 9.09800741380639e-07, "loss": 0.0003, "step": 1431 }, { "epoch": 0.7, "logps_train/chosen": -65.73603820800781, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -250.30987548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06901389360427856, "rewards_train/margins": 12.392150580883026, "rewards_train/rejected": -12.461164474487305, "step": 1431 }, { "epoch": 0.71, "learning_rate": 9.096451933454539e-07, "loss": 0.0001, "step": 1432 }, { "epoch": 0.71, "logps_train/chosen": -64.28309631347656, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -245.71298217773438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.040980786085128784, "rewards_train/margins": 12.006783038377762, "rewards_train/rejected": -12.04776382446289, "step": 1432 }, { "epoch": 0.71, "learning_rate": 9.094895246260306e-07, "loss": 0.0002, "step": 1433 }, { "epoch": 0.71, "logps_train/chosen": -66.17672729492188, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -245.12001037597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2786100506782532, "rewards_train/margins": 11.968107402324677, "rewards_train/rejected": -12.24671745300293, "step": 1433 }, { "epoch": 0.71, "learning_rate": 9.093337352682302e-07, "loss": 0.0003, "step": 1434 }, { "epoch": 0.71, "logps_train/chosen": -61.1033935546875, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -236.4736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04097917675971985, "rewards_train/margins": 11.49327352643013, "rewards_train/rejected": -11.45229434967041, "step": 1434 }, { "epoch": 0.71, "learning_rate": 9.091778253179494e-07, "loss": 0.0011, "step": 1435 }, { "epoch": 0.71, "logps_train/chosen": -62.760643005371094, "logps_train/ref_chosen": -61.53125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -246.0069580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12376941740512848, "rewards_train/margins": 12.092063292860985, "rewards_train/rejected": -12.215832710266113, "step": 1435 }, { "epoch": 0.71, "learning_rate": 9.090217948211201e-07, "loss": 0.0003, "step": 1436 }, { "epoch": 0.71, "logps_train/chosen": -65.2040023803711, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -254.70843505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.047763641923666, "rewards_train/margins": 12.734914597123861, "rewards_train/rejected": -12.687150955200195, "step": 1436 }, { "epoch": 0.71, "learning_rate": 9.088656438237101e-07, "loss": 0.0001, "step": 1437 }, { "epoch": 0.71, "logps_train/chosen": -62.512855529785156, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -250.0372772216797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05597323924303055, "rewards_train/margins": 12.395605854690075, "rewards_train/rejected": -12.451579093933105, "step": 1437 }, { "epoch": 0.71, "learning_rate": 9.087093723717225e-07, "loss": 0.0004, "step": 1438 }, { "epoch": 0.71, "logps_train/chosen": -70.55065155029297, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -252.0746307373047, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4145866930484772, "rewards_train/margins": 11.942974418401718, "rewards_train/rejected": -12.357561111450195, "step": 1438 }, { "epoch": 0.71, "learning_rate": 9.08552980511196e-07, "loss": 0.0005, "step": 1439 }, { "epoch": 0.71, "logps_train/chosen": -67.54180908203125, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -134.5, "logps_train/rejected": -270.2536315917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11858505755662918, "rewards_train/margins": 13.452922396361828, "rewards_train/rejected": -13.571507453918457, "step": 1439 }, { "epoch": 0.71, "learning_rate": 9.083964682882046e-07, "loss": 0.0002, "step": 1440 }, { "epoch": 0.71, "logps_train/chosen": -68.11670684814453, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -246.65847778320312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4899420738220215, "rewards_train/margins": 11.94734239578247, "rewards_train/rejected": -12.437284469604492, "step": 1440 }, { "epoch": 0.71, "learning_rate": 9.082398357488578e-07, "loss": 0.0008, "step": 1441 }, { "epoch": 0.71, "logps_train/chosen": -64.1840591430664, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -249.75645446777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07929454743862152, "rewards_train/margins": 12.377210274338722, "rewards_train/rejected": -12.456504821777344, "step": 1441 }, { "epoch": 0.71, "learning_rate": 9.080830829393006e-07, "loss": 0.0002, "step": 1442 }, { "epoch": 0.71, "logps_train/chosen": -66.68499755859375, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -252.47854614257812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0012633651494979858, "rewards_train/margins": 12.389268174767494, "rewards_train/rejected": -12.390531539916992, "step": 1442 }, { "epoch": 0.71, "learning_rate": 9.079262099057138e-07, "loss": 0.0001, "step": 1443 }, { "epoch": 0.71, "logps_train/chosen": -64.9578857421875, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -249.1388397216797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13367897272109985, "rewards_train/margins": 12.438065946102142, "rewards_train/rejected": -12.571744918823242, "step": 1443 }, { "epoch": 0.71, "learning_rate": 9.077692166943129e-07, "loss": 0.0001, "step": 1444 }, { "epoch": 0.71, "logps_train/chosen": -67.49667358398438, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -262.1951904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14751853048801422, "rewards_train/margins": 12.951688393950462, "rewards_train/rejected": -13.099206924438477, "step": 1444 }, { "epoch": 0.71, "learning_rate": 9.076121033513491e-07, "loss": 0.0012, "step": 1445 }, { "epoch": 0.71, "logps_train/chosen": -68.93507385253906, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -260.60333251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18379124999046326, "rewards_train/margins": 12.975762277841568, "rewards_train/rejected": -13.159553527832031, "step": 1445 }, { "epoch": 0.71, "learning_rate": 9.074548699231092e-07, "loss": 0.0001, "step": 1446 }, { "epoch": 0.71, "logps_train/chosen": -67.12911987304688, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -255.33595275878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2289278209209442, "rewards_train/margins": 12.660184651613235, "rewards_train/rejected": -12.88911247253418, "step": 1446 }, { "epoch": 0.71, "learning_rate": 9.072975164559153e-07, "loss": 0.0001, "step": 1447 }, { "epoch": 0.71, "logps_train/chosen": -64.76792907714844, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -249.66793823242188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0847521647810936, "rewards_train/margins": 12.237900651991367, "rewards_train/rejected": -12.322652816772461, "step": 1447 }, { "epoch": 0.71, "learning_rate": 9.071400429961246e-07, "loss": 0.0001, "step": 1448 }, { "epoch": 0.71, "logps_train/chosen": -64.45829772949219, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -248.72933959960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.021464437246322632, "rewards_train/margins": 12.585162073373795, "rewards_train/rejected": -12.606626510620117, "step": 1448 }, { "epoch": 0.71, "learning_rate": 9.069824495901299e-07, "loss": 0.0001, "step": 1449 }, { "epoch": 0.71, "logps_train/chosen": -66.45903015136719, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -252.27581787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19238707423210144, "rewards_train/margins": 12.515761882066727, "rewards_train/rejected": -12.708148956298828, "step": 1449 }, { "epoch": 0.71, "learning_rate": 9.068247362843595e-07, "loss": 0.0002, "step": 1450 }, { "epoch": 0.71, "logps_train/chosen": -61.20458221435547, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -247.36859130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2783212959766388, "rewards_train/margins": 12.370991736650467, "rewards_train/rejected": -12.092670440673828, "step": 1450 }, { "epoch": 0.71, "learning_rate": 9.066669031252766e-07, "loss": 0.0013, "step": 1451 }, { "epoch": 0.71, "logps_train/chosen": -67.46989440917969, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -257.8282470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3416188955307007, "rewards_train/margins": 12.869040131568909, "rewards_train/rejected": -13.21065902709961, "step": 1451 }, { "epoch": 0.71, "learning_rate": 9.065089501593799e-07, "loss": 0.0004, "step": 1452 }, { "epoch": 0.71, "logps_train/chosen": -65.66732025146484, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -133.125, "logps_train/rejected": -264.02374267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.056477975100278854, "rewards_train/margins": 13.034372854977846, "rewards_train/rejected": -13.090850830078125, "step": 1452 }, { "epoch": 0.72, "learning_rate": 9.063508774332035e-07, "loss": 0.0, "step": 1453 }, { "epoch": 0.72, "logps_train/chosen": -65.77672576904297, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -253.27418518066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.006116664037108421, "rewards_train/margins": 12.617811953648925, "rewards_train/rejected": -12.611695289611816, "step": 1453 }, { "epoch": 0.72, "learning_rate": 9.061926849933165e-07, "loss": 0.0005, "step": 1454 }, { "epoch": 0.72, "logps_train/chosen": -69.66067504882812, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -258.89306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3412629961967468, "rewards_train/margins": 12.520994007587433, "rewards_train/rejected": -12.86225700378418, "step": 1454 }, { "epoch": 0.72, "learning_rate": 9.060343728863238e-07, "loss": 0.0014, "step": 1455 }, { "epoch": 0.72, "logps_train/chosen": -66.92425537109375, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -257.6450500488281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11952458322048187, "rewards_train/margins": 12.86890734732151, "rewards_train/rejected": -12.988431930541992, "step": 1455 }, { "epoch": 0.72, "learning_rate": 9.05875941158865e-07, "loss": 0.0003, "step": 1456 }, { "epoch": 0.72, "logps_train/chosen": -68.39321899414062, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -245.751708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3556305170059204, "rewards_train/margins": 11.719541907310486, "rewards_train/rejected": -12.075172424316406, "step": 1456 }, { "epoch": 0.72, "learning_rate": 9.057173898576152e-07, "loss": 0.0004, "step": 1457 }, { "epoch": 0.72, "logps_train/chosen": -66.95905303955078, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -256.1402282714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3171449601650238, "rewards_train/margins": 12.480959326028824, "rewards_train/rejected": -12.798104286193848, "step": 1457 }, { "epoch": 0.72, "learning_rate": 9.055587190292846e-07, "loss": 0.0002, "step": 1458 }, { "epoch": 0.72, "logps_train/chosen": -62.719207763671875, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -240.97799682617188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.017581313848495483, "rewards_train/margins": 11.593896239995956, "rewards_train/rejected": -11.576314926147461, "step": 1458 }, { "epoch": 0.72, "learning_rate": 9.053999287206187e-07, "loss": 0.0004, "step": 1459 }, { "epoch": 0.72, "logps_train/chosen": -66.5545654296875, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -256.9779968261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1656130850315094, "rewards_train/margins": 12.671003431081772, "rewards_train/rejected": -12.836616516113281, "step": 1459 }, { "epoch": 0.72, "learning_rate": 9.052410189783983e-07, "loss": 0.0003, "step": 1460 }, { "epoch": 0.72, "logps_train/chosen": -66.34363555908203, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -118.5625, "logps_train/rejected": -241.88247680664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29993972182273865, "rewards_train/margins": 12.02922573685646, "rewards_train/rejected": -12.3291654586792, "step": 1460 }, { "epoch": 0.72, "learning_rate": 9.050819898494393e-07, "loss": 0.0004, "step": 1461 }, { "epoch": 0.72, "logps_train/chosen": -67.74528503417969, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -259.13409423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4184732437133789, "rewards_train/margins": 12.928727149963379, "rewards_train/rejected": -13.347200393676758, "step": 1461 }, { "epoch": 0.72, "learning_rate": 9.049228413805926e-07, "loss": 0.0002, "step": 1462 }, { "epoch": 0.72, "logps_train/chosen": -66.12956237792969, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -255.2470245361328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.048551466315984726, "rewards_train/margins": 12.60525331273675, "rewards_train/rejected": -12.653804779052734, "step": 1462 }, { "epoch": 0.72, "learning_rate": 9.047635736187445e-07, "loss": 0.0001, "step": 1463 }, { "epoch": 0.72, "logps_train/chosen": -64.5491943359375, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -251.69029235839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1710820496082306, "rewards_train/margins": 12.38740012049675, "rewards_train/rejected": -12.55848217010498, "step": 1463 }, { "epoch": 0.72, "learning_rate": 9.046041866108165e-07, "loss": 0.0002, "step": 1464 }, { "epoch": 0.72, "logps_train/chosen": -69.900146484375, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -251.0653839111328, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37560999393463135, "rewards_train/margins": 12.145870566368103, "rewards_train/rejected": -12.521480560302734, "step": 1464 }, { "epoch": 0.72, "learning_rate": 9.044446804037649e-07, "loss": 0.0008, "step": 1465 }, { "epoch": 0.72, "logps_train/chosen": -64.33474731445312, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -250.29168701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0735134705901146, "rewards_train/margins": 12.59344819933176, "rewards_train/rejected": -12.666961669921875, "step": 1465 }, { "epoch": 0.72, "learning_rate": 9.042850550445813e-07, "loss": 0.0005, "step": 1466 }, { "epoch": 0.72, "logps_train/chosen": -64.0755844116211, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -246.50921630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1454002559185028, "rewards_train/margins": 12.057620793581009, "rewards_train/rejected": -12.203021049499512, "step": 1466 }, { "epoch": 0.72, "learning_rate": 9.041253105802925e-07, "loss": 0.0002, "step": 1467 }, { "epoch": 0.72, "logps_train/chosen": -63.376678466796875, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -245.95806884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1598357856273651, "rewards_train/margins": 12.214683562517166, "rewards_train/rejected": -12.374519348144531, "step": 1467 }, { "epoch": 0.72, "learning_rate": 9.039654470579603e-07, "loss": 0.0004, "step": 1468 }, { "epoch": 0.72, "logps_train/chosen": -66.57090759277344, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -252.66876220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1582142412662506, "rewards_train/margins": 12.57165178656578, "rewards_train/rejected": -12.729866027832031, "step": 1468 }, { "epoch": 0.72, "learning_rate": 9.038054645246814e-07, "loss": 0.0001, "step": 1469 }, { "epoch": 0.72, "logps_train/chosen": -67.0206527709961, "logps_train/ref_chosen": -60.96875, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -245.72122192382812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.604555606842041, "rewards_train/margins": 11.888076305389404, "rewards_train/rejected": -12.492631912231445, "step": 1469 }, { "epoch": 0.72, "learning_rate": 9.03645363027588e-07, "loss": 0.0006, "step": 1470 }, { "epoch": 0.72, "logps_train/chosen": -65.50638580322266, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -247.60671997070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36484748125076294, "rewards_train/margins": 12.094360053539276, "rewards_train/rejected": -12.459207534790039, "step": 1470 }, { "epoch": 0.72, "learning_rate": 9.03485142613847e-07, "loss": 0.0002, "step": 1471 }, { "epoch": 0.72, "logps_train/chosen": -71.46206665039062, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -258.19219970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4267728626728058, "rewards_train/margins": 12.310514658689499, "rewards_train/rejected": -12.737287521362305, "step": 1471 }, { "epoch": 0.72, "learning_rate": 9.033248033306601e-07, "loss": 0.0002, "step": 1472 }, { "epoch": 0.72, "logps_train/chosen": -65.62368774414062, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -251.8861083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2308252453804016, "rewards_train/margins": 12.688987910747528, "rewards_train/rejected": -12.91981315612793, "step": 1472 }, { "epoch": 0.73, "learning_rate": 9.031643452252648e-07, "loss": 0.0006, "step": 1473 }, { "epoch": 0.73, "logps_train/chosen": -65.32966613769531, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -254.40890502929688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15996861457824707, "rewards_train/margins": 12.737271070480347, "rewards_train/rejected": -12.897239685058594, "step": 1473 }, { "epoch": 0.73, "learning_rate": 9.03003768344933e-07, "loss": 0.0002, "step": 1474 }, { "epoch": 0.73, "logps_train/chosen": -66.2157211303711, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -243.35733032226562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4143458604812622, "rewards_train/margins": 11.821287989616394, "rewards_train/rejected": -12.235633850097656, "step": 1474 }, { "epoch": 0.73, "learning_rate": 9.028430727369715e-07, "loss": 0.0004, "step": 1475 }, { "epoch": 0.73, "logps_train/chosen": -64.79251861572266, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -261.4591369628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1196327656507492, "rewards_train/margins": 13.097864106297493, "rewards_train/rejected": -13.217496871948242, "step": 1475 }, { "epoch": 0.73, "learning_rate": 9.026822584487226e-07, "loss": 0.0, "step": 1476 }, { "epoch": 0.73, "logps_train/chosen": -64.6769027709961, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -246.43067932128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09771949797868729, "rewards_train/margins": 12.322107009589672, "rewards_train/rejected": -12.41982650756836, "step": 1476 }, { "epoch": 0.73, "learning_rate": 9.025213255275632e-07, "loss": 0.0008, "step": 1477 }, { "epoch": 0.73, "logps_train/chosen": -62.971710205078125, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -254.06320190429688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05146753787994385, "rewards_train/margins": 12.655439734458923, "rewards_train/rejected": -12.706907272338867, "step": 1477 }, { "epoch": 0.73, "learning_rate": 9.023602740209051e-07, "loss": 0.0003, "step": 1478 }, { "epoch": 0.73, "logps_train/chosen": -66.62962341308594, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -249.11764526367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11303587257862091, "rewards_train/margins": 12.213182732462883, "rewards_train/rejected": -12.326218605041504, "step": 1478 }, { "epoch": 0.73, "learning_rate": 9.021991039761951e-07, "loss": 0.0006, "step": 1479 }, { "epoch": 0.73, "logps_train/chosen": -66.58538818359375, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -246.976318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3085876703262329, "rewards_train/margins": 12.111847281455994, "rewards_train/rejected": -12.420434951782227, "step": 1479 }, { "epoch": 0.73, "learning_rate": 9.020378154409154e-07, "loss": 0.0003, "step": 1480 }, { "epoch": 0.73, "logps_train/chosen": -66.02182006835938, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -253.35792541503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.018277544528245926, "rewards_train/margins": 12.613933939486742, "rewards_train/rejected": -12.595656394958496, "step": 1480 }, { "epoch": 0.73, "learning_rate": 9.018764084625822e-07, "loss": 0.0001, "step": 1481 }, { "epoch": 0.73, "logps_train/chosen": -69.5230941772461, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -259.25494384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4516257345676422, "rewards_train/margins": 12.786125272512436, "rewards_train/rejected": -13.237751007080078, "step": 1481 }, { "epoch": 0.73, "learning_rate": 9.017148830887474e-07, "loss": 0.0002, "step": 1482 }, { "epoch": 0.73, "logps_train/chosen": -73.07781982421875, "logps_train/ref_chosen": -69.9375, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -263.6356506347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.31173673272132874, "rewards_train/margins": 13.101242393255234, "rewards_train/rejected": -13.412979125976562, "step": 1482 }, { "epoch": 0.73, "learning_rate": 9.015532393669973e-07, "loss": 0.0003, "step": 1483 }, { "epoch": 0.73, "logps_train/chosen": -66.6446533203125, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -260.75775146484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05916746333241463, "rewards_train/margins": 13.309356290847063, "rewards_train/rejected": -13.250188827514648, "step": 1483 }, { "epoch": 0.73, "learning_rate": 9.013914773449535e-07, "loss": 0.0, "step": 1484 }, { "epoch": 0.73, "logps_train/chosen": -66.83952331542969, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -249.45663452148438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15338566899299622, "rewards_train/margins": 11.985539883375168, "rewards_train/rejected": -12.138925552368164, "step": 1484 }, { "epoch": 0.73, "learning_rate": 9.012295970702717e-07, "loss": 0.0002, "step": 1485 }, { "epoch": 0.73, "logps_train/chosen": -66.81410217285156, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -256.3327331542969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2236216515302658, "rewards_train/margins": 12.775665953755379, "rewards_train/rejected": -12.999287605285645, "step": 1485 }, { "epoch": 0.73, "learning_rate": 9.010675985906434e-07, "loss": 0.0001, "step": 1486 }, { "epoch": 0.73, "logps_train/chosen": -63.70793151855469, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -250.48358154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.02372260019183159, "rewards_train/margins": 12.535769511014223, "rewards_train/rejected": -12.559492111206055, "step": 1486 }, { "epoch": 0.73, "learning_rate": 9.009054819537943e-07, "loss": 0.0001, "step": 1487 }, { "epoch": 0.73, "logps_train/chosen": -64.06642150878906, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -256.52203369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09438267350196838, "rewards_train/margins": 13.028714567422867, "rewards_train/rejected": -12.934331893920898, "step": 1487 }, { "epoch": 0.73, "learning_rate": 9.007432472074847e-07, "loss": 0.0, "step": 1488 }, { "epoch": 0.73, "logps_train/chosen": -67.80776977539062, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -253.43067932128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14298421144485474, "rewards_train/margins": 12.293052852153778, "rewards_train/rejected": -12.436037063598633, "step": 1488 }, { "epoch": 0.73, "learning_rate": 9.005808943995105e-07, "loss": 0.0049, "step": 1489 }, { "epoch": 0.73, "logps_train/chosen": -69.10928344726562, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -257.96240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23290136456489563, "rewards_train/margins": 12.580819338560104, "rewards_train/rejected": -12.813720703125, "step": 1489 }, { "epoch": 0.73, "learning_rate": 9.004184235777019e-07, "loss": 0.0001, "step": 1490 }, { "epoch": 0.73, "logps_train/chosen": -70.55911254882812, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -262.30682373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2843781113624573, "rewards_train/margins": 12.700551927089691, "rewards_train/rejected": -12.984930038452148, "step": 1490 }, { "epoch": 0.73, "learning_rate": 9.002558347899237e-07, "loss": 0.0001, "step": 1491 }, { "epoch": 0.73, "logps_train/chosen": -64.85087585449219, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -254.33718872070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06213851273059845, "rewards_train/margins": 12.79472641646862, "rewards_train/rejected": -12.856864929199219, "step": 1491 }, { "epoch": 0.73, "learning_rate": 9.000931280840758e-07, "loss": 0.0001, "step": 1492 }, { "epoch": 0.73, "logps_train/chosen": -65.78953552246094, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -252.257568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13644862174987793, "rewards_train/margins": 12.453126192092896, "rewards_train/rejected": -12.589574813842773, "step": 1492 }, { "epoch": 0.74, "learning_rate": 8.999303035080925e-07, "loss": 0.0003, "step": 1493 }, { "epoch": 0.74, "logps_train/chosen": -62.63602066040039, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -240.91458129882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13093605637550354, "rewards_train/margins": 11.978343576192856, "rewards_train/rejected": -12.10927963256836, "step": 1493 }, { "epoch": 0.74, "learning_rate": 8.997673611099432e-07, "loss": 0.0008, "step": 1494 }, { "epoch": 0.74, "logps_train/chosen": -65.34292602539062, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -251.03306579589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1675139218568802, "rewards_train/margins": 12.708515241742134, "rewards_train/rejected": -12.541001319885254, "step": 1494 }, { "epoch": 0.74, "learning_rate": 8.996043009376319e-07, "loss": 0.0, "step": 1495 }, { "epoch": 0.74, "logps_train/chosen": -65.39027404785156, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -256.475341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2255021035671234, "rewards_train/margins": 12.79634752869606, "rewards_train/rejected": -13.021849632263184, "step": 1495 }, { "epoch": 0.74, "learning_rate": 8.99441123039197e-07, "loss": 0.0002, "step": 1496 }, { "epoch": 0.74, "logps_train/chosen": -64.39570617675781, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -252.77407836914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07498040050268173, "rewards_train/margins": 12.683540008962154, "rewards_train/rejected": -12.608559608459473, "step": 1496 }, { "epoch": 0.74, "learning_rate": 8.99277827462712e-07, "loss": 0.0001, "step": 1497 }, { "epoch": 0.74, "logps_train/chosen": -66.2522964477539, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -258.225341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10647942870855331, "rewards_train/margins": 12.80843947082758, "rewards_train/rejected": -12.914918899536133, "step": 1497 }, { "epoch": 0.74, "learning_rate": 8.991144142562846e-07, "loss": 0.0001, "step": 1498 }, { "epoch": 0.74, "logps_train/chosen": -70.9778060913086, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -250.31593322753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5540304183959961, "rewards_train/margins": 12.28752326965332, "rewards_train/rejected": -12.841553688049316, "step": 1498 }, { "epoch": 0.74, "learning_rate": 8.989508834680579e-07, "loss": 0.0002, "step": 1499 }, { "epoch": 0.74, "logps_train/chosen": -61.20507049560547, "logps_train/ref_chosen": -60.5625, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -243.7655487060547, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06325571238994598, "rewards_train/margins": 12.220818117260933, "rewards_train/rejected": -12.284073829650879, "step": 1499 }, { "epoch": 0.74, "learning_rate": 8.987872351462087e-07, "loss": 0.0006, "step": 1500 }, { "epoch": 0.74, "logps_train/chosen": -69.61882019042969, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -257.3067626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24142304062843323, "rewards_train/margins": 12.826949685811996, "rewards_train/rejected": -13.06837272644043, "step": 1500 }, { "epoch": 0.74, "learning_rate": 8.986234693389492e-07, "loss": 0.0001, "step": 1501 }, { "epoch": 0.74, "logps_train/chosen": -67.02327728271484, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -243.51095581054688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.338509202003479, "rewards_train/margins": 11.722058653831482, "rewards_train/rejected": -12.060567855834961, "step": 1501 }, { "epoch": 0.74, "learning_rate": 8.984595860945259e-07, "loss": 0.0006, "step": 1502 }, { "epoch": 0.74, "logps_train/chosen": -63.915489196777344, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -254.40655517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.044332314282655716, "rewards_train/margins": 12.889585684984922, "rewards_train/rejected": -12.933917999267578, "step": 1502 }, { "epoch": 0.74, "learning_rate": 8.982955854612196e-07, "loss": 0.0002, "step": 1503 }, { "epoch": 0.74, "logps_train/chosen": -68.83575439453125, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -258.3515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46477657556533813, "rewards_train/margins": 12.550997197628021, "rewards_train/rejected": -13.01577377319336, "step": 1503 }, { "epoch": 0.74, "learning_rate": 8.981314674873464e-07, "loss": 0.0002, "step": 1504 }, { "epoch": 0.74, "logps_train/chosen": -70.76016998291016, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -253.12173461914062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4233066439628601, "rewards_train/margins": 12.546044170856476, "rewards_train/rejected": -12.969350814819336, "step": 1504 }, { "epoch": 0.74, "learning_rate": 8.979672322212564e-07, "loss": 0.0003, "step": 1505 }, { "epoch": 0.74, "logps_train/chosen": -69.84294128417969, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -259.4582214355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36876657605171204, "rewards_train/margins": 12.838287562131882, "rewards_train/rejected": -13.207054138183594, "step": 1505 }, { "epoch": 0.74, "learning_rate": 8.978028797113342e-07, "loss": 0.0003, "step": 1506 }, { "epoch": 0.74, "logps_train/chosen": -66.82098388671875, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -249.31666564941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1584896445274353, "rewards_train/margins": 12.178645670413971, "rewards_train/rejected": -12.337135314941406, "step": 1506 }, { "epoch": 0.74, "learning_rate": 8.976384100059995e-07, "loss": 0.001, "step": 1507 }, { "epoch": 0.74, "logps_train/chosen": -63.81749725341797, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -253.79580688476562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11190229654312134, "rewards_train/margins": 12.872634947299957, "rewards_train/rejected": -12.760732650756836, "step": 1507 }, { "epoch": 0.74, "learning_rate": 8.97473823153706e-07, "loss": 0.0001, "step": 1508 }, { "epoch": 0.74, "logps_train/chosen": -67.39580535888672, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -255.38455200195312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38318416476249695, "rewards_train/margins": 12.421677857637405, "rewards_train/rejected": -12.804862022399902, "step": 1508 }, { "epoch": 0.74, "learning_rate": 8.973091192029422e-07, "loss": 0.0002, "step": 1509 }, { "epoch": 0.74, "logps_train/chosen": -60.80412673950195, "logps_train/ref_chosen": -61.59375, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -246.03399658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.08003655076026917, "rewards_train/margins": 12.49598541855812, "rewards_train/rejected": -12.415948867797852, "step": 1509 }, { "epoch": 0.74, "learning_rate": 8.971442982022309e-07, "loss": 0.0002, "step": 1510 }, { "epoch": 0.74, "logps_train/chosen": -66.04414367675781, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -253.00515747070312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2597854733467102, "rewards_train/margins": 12.689803302288055, "rewards_train/rejected": -12.949588775634766, "step": 1510 }, { "epoch": 0.74, "learning_rate": 8.969793602001295e-07, "loss": 0.0003, "step": 1511 }, { "epoch": 0.74, "logps_train/chosen": -69.07559967041016, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -265.4397888183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3024328351020813, "rewards_train/margins": 13.043107211589813, "rewards_train/rejected": -13.345540046691895, "step": 1511 }, { "epoch": 0.74, "learning_rate": 8.9681430524523e-07, "loss": 0.0002, "step": 1512 }, { "epoch": 0.74, "logps_train/chosen": -64.62472534179688, "logps_train/ref_chosen": -61.21875, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -244.51742553710938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34015828371047974, "rewards_train/margins": 12.030676066875458, "rewards_train/rejected": -12.370834350585938, "step": 1512 }, { "epoch": 0.74, "learning_rate": 8.966491333861584e-07, "loss": 0.0015, "step": 1513 }, { "epoch": 0.74, "logps_train/chosen": -71.500732421875, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -262.21649169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5310788154602051, "rewards_train/margins": 12.790960788726807, "rewards_train/rejected": -13.322039604187012, "step": 1513 }, { "epoch": 0.75, "learning_rate": 8.964838446715755e-07, "loss": 0.0003, "step": 1514 }, { "epoch": 0.75, "logps_train/chosen": -65.68157958984375, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -247.31390380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3050715923309326, "rewards_train/margins": 12.445411920547485, "rewards_train/rejected": -12.750483512878418, "step": 1514 }, { "epoch": 0.75, "learning_rate": 8.963184391501768e-07, "loss": 0.0017, "step": 1515 }, { "epoch": 0.75, "logps_train/chosen": -68.84976196289062, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -134.0, "logps_train/rejected": -269.7971496582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28707557916641235, "rewards_train/margins": 13.28970855474472, "rewards_train/rejected": -13.576784133911133, "step": 1515 }, { "epoch": 0.75, "learning_rate": 8.961529168706916e-07, "loss": 0.0002, "step": 1516 }, { "epoch": 0.75, "logps_train/chosen": -68.52058410644531, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -269.2035827636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3256429433822632, "rewards_train/margins": 13.56932508945465, "rewards_train/rejected": -13.894968032836914, "step": 1516 }, { "epoch": 0.75, "learning_rate": 8.95987277881884e-07, "loss": 0.0, "step": 1517 }, { "epoch": 0.75, "logps_train/chosen": -67.96012115478516, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -256.93023681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3917641341686249, "rewards_train/margins": 12.791590243577957, "rewards_train/rejected": -13.183354377746582, "step": 1517 }, { "epoch": 0.75, "learning_rate": 8.958215222325523e-07, "loss": 0.0001, "step": 1518 }, { "epoch": 0.75, "logps_train/chosen": -66.19938659667969, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -253.09954833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23532050848007202, "rewards_train/margins": 12.718285143375397, "rewards_train/rejected": -12.953605651855469, "step": 1518 }, { "epoch": 0.75, "learning_rate": 8.956556499715291e-07, "loss": 0.0001, "step": 1519 }, { "epoch": 0.75, "logps_train/chosen": -67.22477722167969, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -251.6547393798828, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19615909457206726, "rewards_train/margins": 12.396560937166214, "rewards_train/rejected": -12.592720031738281, "step": 1519 }, { "epoch": 0.75, "learning_rate": 8.954896611476818e-07, "loss": 0.0003, "step": 1520 }, { "epoch": 0.75, "logps_train/chosen": -65.5980224609375, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -258.22296142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24520203471183777, "rewards_train/margins": 12.989005118608475, "rewards_train/rejected": -13.234207153320312, "step": 1520 }, { "epoch": 0.75, "learning_rate": 8.953235558099114e-07, "loss": 0.0001, "step": 1521 }, { "epoch": 0.75, "logps_train/chosen": -68.71656799316406, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -259.641845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2683850824832916, "rewards_train/margins": 12.864645808935165, "rewards_train/rejected": -13.133030891418457, "step": 1521 }, { "epoch": 0.75, "learning_rate": 8.951573340071542e-07, "loss": 0.0012, "step": 1522 }, { "epoch": 0.75, "logps_train/chosen": -66.60267639160156, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -254.07763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21270917356014252, "rewards_train/margins": 12.556675210595131, "rewards_train/rejected": -12.769384384155273, "step": 1522 }, { "epoch": 0.75, "learning_rate": 8.949909957883799e-07, "loss": 0.0, "step": 1523 }, { "epoch": 0.75, "logps_train/chosen": -67.22547912597656, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -252.01966857910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18407109379768372, "rewards_train/margins": 12.615748852491379, "rewards_train/rejected": -12.799819946289062, "step": 1523 }, { "epoch": 0.75, "learning_rate": 8.948245412025928e-07, "loss": 0.0004, "step": 1524 }, { "epoch": 0.75, "logps_train/chosen": -73.86546325683594, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -262.8692321777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5918189287185669, "rewards_train/margins": 12.69696033000946, "rewards_train/rejected": -13.288779258728027, "step": 1524 }, { "epoch": 0.75, "learning_rate": 8.946579702988318e-07, "loss": 0.0001, "step": 1525 }, { "epoch": 0.75, "logps_train/chosen": -67.81477355957031, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -246.02532958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18606749176979065, "rewards_train/margins": 12.144199460744858, "rewards_train/rejected": -12.330266952514648, "step": 1525 }, { "epoch": 0.75, "learning_rate": 8.944912831261697e-07, "loss": 0.0001, "step": 1526 }, { "epoch": 0.75, "logps_train/chosen": -68.66921997070312, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -264.8772277832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3754660487174988, "rewards_train/margins": 13.377784073352814, "rewards_train/rejected": -13.753250122070312, "step": 1526 }, { "epoch": 0.75, "learning_rate": 8.943244797337136e-07, "loss": 0.0001, "step": 1527 }, { "epoch": 0.75, "logps_train/chosen": -64.54794311523438, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -248.0251007080078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14600493013858795, "rewards_train/margins": 12.405626997351646, "rewards_train/rejected": -12.551631927490234, "step": 1527 }, { "epoch": 0.75, "learning_rate": 8.941575601706051e-07, "loss": 0.0001, "step": 1528 }, { "epoch": 0.75, "logps_train/chosen": -64.47648620605469, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -249.22128295898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17235606908798218, "rewards_train/margins": 12.598941385746002, "rewards_train/rejected": -12.771297454833984, "step": 1528 }, { "epoch": 0.75, "learning_rate": 8.939905244860195e-07, "loss": 0.0007, "step": 1529 }, { "epoch": 0.75, "logps_train/chosen": -68.54780578613281, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -256.91241455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2220659852027893, "rewards_train/margins": 12.82229894399643, "rewards_train/rejected": -13.044364929199219, "step": 1529 }, { "epoch": 0.75, "learning_rate": 8.938233727291668e-07, "loss": 0.0001, "step": 1530 }, { "epoch": 0.75, "logps_train/chosen": -67.41897583007812, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -256.6914367675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13271725177764893, "rewards_train/margins": 12.715186953544617, "rewards_train/rejected": -12.847904205322266, "step": 1530 }, { "epoch": 0.75, "learning_rate": 8.936561049492912e-07, "loss": 0.0002, "step": 1531 }, { "epoch": 0.75, "logps_train/chosen": -64.10508728027344, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -254.22146606445312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09273584187030792, "rewards_train/margins": 12.664616033434868, "rewards_train/rejected": -12.757351875305176, "step": 1531 }, { "epoch": 0.75, "learning_rate": 8.934887211956709e-07, "loss": 0.0003, "step": 1532 }, { "epoch": 0.75, "logps_train/chosen": -69.18826293945312, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -270.52197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2781045436859131, "rewards_train/margins": 13.70202088356018, "rewards_train/rejected": -13.980125427246094, "step": 1532 }, { "epoch": 0.75, "learning_rate": 8.93321221517618e-07, "loss": 0.0001, "step": 1533 }, { "epoch": 0.75, "logps_train/chosen": -68.97267150878906, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -257.539306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3625018894672394, "rewards_train/margins": 12.913354128599167, "rewards_train/rejected": -13.275856018066406, "step": 1533 }, { "epoch": 0.76, "learning_rate": 8.931536059644793e-07, "loss": 0.0004, "step": 1534 }, { "epoch": 0.76, "logps_train/chosen": -68.33045196533203, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -259.6219482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28133639693260193, "rewards_train/margins": 12.96684780716896, "rewards_train/rejected": -13.248184204101562, "step": 1534 }, { "epoch": 0.76, "learning_rate": 8.929858745856353e-07, "loss": 0.0001, "step": 1535 }, { "epoch": 0.76, "logps_train/chosen": -65.15540313720703, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -256.6463623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22335289418697357, "rewards_train/margins": 13.010571017861366, "rewards_train/rejected": -13.23392391204834, "step": 1535 }, { "epoch": 0.76, "learning_rate": 8.928180274305008e-07, "loss": 0.0, "step": 1536 }, { "epoch": 0.76, "logps_train/chosen": -69.13380432128906, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -253.84469604492188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4878917634487152, "rewards_train/margins": 12.586908727884293, "rewards_train/rejected": -13.074800491333008, "step": 1536 }, { "epoch": 0.76, "learning_rate": 8.926500645485248e-07, "loss": 0.0005, "step": 1537 }, { "epoch": 0.76, "logps_train/chosen": -67.43815612792969, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -264.7208251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03509043902158737, "rewards_train/margins": 13.432660095393658, "rewards_train/rejected": -13.39756965637207, "step": 1537 }, { "epoch": 0.76, "learning_rate": 8.924819859891905e-07, "loss": 0.0003, "step": 1538 }, { "epoch": 0.76, "logps_train/chosen": -64.53033447265625, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -250.12132263183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23667608201503754, "rewards_train/margins": 12.46212686598301, "rewards_train/rejected": -12.698802947998047, "step": 1538 }, { "epoch": 0.76, "learning_rate": 8.923137918020145e-07, "loss": 0.0006, "step": 1539 }, { "epoch": 0.76, "logps_train/chosen": -64.623046875, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -248.18191528320312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0778808668255806, "rewards_train/margins": 12.501297943294048, "rewards_train/rejected": -12.579178810119629, "step": 1539 }, { "epoch": 0.76, "learning_rate": 8.921454820365482e-07, "loss": 0.002, "step": 1540 }, { "epoch": 0.76, "logps_train/chosen": -67.37260437011719, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -246.760009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3506397008895874, "rewards_train/margins": 11.991279244422913, "rewards_train/rejected": -12.3419189453125, "step": 1540 }, { "epoch": 0.76, "learning_rate": 8.91977056742377e-07, "loss": 0.0005, "step": 1541 }, { "epoch": 0.76, "logps_train/chosen": -67.27761840820312, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -265.44866943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06985211372375488, "rewards_train/margins": 13.46300196647644, "rewards_train/rejected": -13.532854080200195, "step": 1541 }, { "epoch": 0.76, "learning_rate": 8.918085159691198e-07, "loss": 0.0001, "step": 1542 }, { "epoch": 0.76, "logps_train/chosen": -66.9200210571289, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -258.3023376464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2589942216873169, "rewards_train/margins": 12.829151034355164, "rewards_train/rejected": -13.08814525604248, "step": 1542 }, { "epoch": 0.76, "learning_rate": 8.916398597664298e-07, "loss": 0.0001, "step": 1543 }, { "epoch": 0.76, "logps_train/chosen": -64.85670471191406, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -253.85240173339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01945694535970688, "rewards_train/margins": 12.707528196275234, "rewards_train/rejected": -12.688071250915527, "step": 1543 }, { "epoch": 0.76, "learning_rate": 8.914710881839946e-07, "loss": 0.0016, "step": 1544 }, { "epoch": 0.76, "logps_train/chosen": -61.91877746582031, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -245.64315795898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04562215879559517, "rewards_train/margins": 12.30544500425458, "rewards_train/rejected": -12.259822845458984, "step": 1544 }, { "epoch": 0.76, "learning_rate": 8.913022012715353e-07, "loss": 0.0001, "step": 1545 }, { "epoch": 0.76, "logps_train/chosen": -67.63772583007812, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -257.1455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.27051085233688354, "rewards_train/margins": 12.600925266742706, "rewards_train/rejected": -12.87143611907959, "step": 1545 }, { "epoch": 0.76, "learning_rate": 8.911331990788072e-07, "loss": 0.0002, "step": 1546 }, { "epoch": 0.76, "logps_train/chosen": -68.0831298828125, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -252.22134399414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1380007565021515, "rewards_train/margins": 12.42080470919609, "rewards_train/rejected": -12.558805465698242, "step": 1546 }, { "epoch": 0.76, "learning_rate": 8.909640816555991e-07, "loss": 0.0013, "step": 1547 }, { "epoch": 0.76, "logps_train/chosen": -67.39932250976562, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -256.47613525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2879302501678467, "rewards_train/margins": 12.693570375442505, "rewards_train/rejected": -12.981500625610352, "step": 1547 }, { "epoch": 0.76, "learning_rate": 8.907948490517345e-07, "loss": 0.0004, "step": 1548 }, { "epoch": 0.76, "logps_train/chosen": -65.31584930419922, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -248.33245849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.220403254032135, "rewards_train/margins": 12.32314532995224, "rewards_train/rejected": -12.543548583984375, "step": 1548 }, { "epoch": 0.76, "learning_rate": 8.906255013170704e-07, "loss": 0.0018, "step": 1549 }, { "epoch": 0.76, "logps_train/chosen": -67.29258728027344, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -262.30999755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2908305525779724, "rewards_train/margins": 12.921518385410309, "rewards_train/rejected": -13.212348937988281, "step": 1549 }, { "epoch": 0.76, "learning_rate": 8.904560385014979e-07, "loss": 0.0001, "step": 1550 }, { "epoch": 0.76, "logps_train/chosen": -69.36070251464844, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -267.6112060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36190035939216614, "rewards_train/margins": 13.330859154462814, "rewards_train/rejected": -13.69275951385498, "step": 1550 }, { "epoch": 0.76, "learning_rate": 8.902864606549415e-07, "loss": 0.0001, "step": 1551 }, { "epoch": 0.76, "logps_train/chosen": -63.96548080444336, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -251.5621795654297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12589356303215027, "rewards_train/margins": 12.507961302995682, "rewards_train/rejected": -12.633854866027832, "step": 1551 }, { "epoch": 0.76, "learning_rate": 8.901167678273603e-07, "loss": 0.0001, "step": 1552 }, { "epoch": 0.76, "logps_train/chosen": -66.27056121826172, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -258.9641418457031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33608949184417725, "rewards_train/margins": 13.044699311256409, "rewards_train/rejected": -13.380788803100586, "step": 1552 }, { "epoch": 0.76, "learning_rate": 8.89946960068747e-07, "loss": 0.0001, "step": 1553 }, { "epoch": 0.76, "logps_train/chosen": -68.04460144042969, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -265.078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10211635380983353, "rewards_train/margins": 13.54695724695921, "rewards_train/rejected": -13.649073600769043, "step": 1553 }, { "epoch": 0.77, "learning_rate": 8.897770374291279e-07, "loss": 0.0002, "step": 1554 }, { "epoch": 0.77, "logps_train/chosen": -67.63505554199219, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -258.7525939941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24163128435611725, "rewards_train/margins": 12.680211290717125, "rewards_train/rejected": -12.921842575073242, "step": 1554 }, { "epoch": 0.77, "learning_rate": 8.896069999585635e-07, "loss": 0.0002, "step": 1555 }, { "epoch": 0.77, "logps_train/chosen": -62.22819519042969, "logps_train/ref_chosen": -61.71875, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -244.78836059570312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05182349681854248, "rewards_train/margins": 12.248007893562317, "rewards_train/rejected": -12.29983139038086, "step": 1555 }, { "epoch": 0.77, "learning_rate": 8.894368477071478e-07, "loss": 0.0004, "step": 1556 }, { "epoch": 0.77, "logps_train/chosen": -65.91783905029297, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -255.55152893066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1309438943862915, "rewards_train/margins": 12.712977766990662, "rewards_train/rejected": -12.843921661376953, "step": 1556 }, { "epoch": 0.77, "learning_rate": 8.892665807250092e-07, "loss": 0.0001, "step": 1557 }, { "epoch": 0.77, "logps_train/chosen": -68.0901870727539, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -259.8149108886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2918797731399536, "rewards_train/margins": 13.14752185344696, "rewards_train/rejected": -13.439401626586914, "step": 1557 }, { "epoch": 0.77, "learning_rate": 8.890961990623091e-07, "loss": 0.0001, "step": 1558 }, { "epoch": 0.77, "logps_train/chosen": -66.66890716552734, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -253.63082885742188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.27387309074401855, "rewards_train/margins": 12.889649629592896, "rewards_train/rejected": -13.163522720336914, "step": 1558 }, { "epoch": 0.77, "learning_rate": 8.889257027692432e-07, "loss": 0.0001, "step": 1559 }, { "epoch": 0.77, "logps_train/chosen": -67.81640625, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -259.490966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26708996295928955, "rewards_train/margins": 12.911840319633484, "rewards_train/rejected": -13.178930282592773, "step": 1559 }, { "epoch": 0.77, "learning_rate": 8.887550918960409e-07, "loss": 0.0011, "step": 1560 }, { "epoch": 0.77, "logps_train/chosen": -65.14179992675781, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -247.33950805664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30104565620422363, "rewards_train/margins": 12.454097509384155, "rewards_train/rejected": -12.755143165588379, "step": 1560 }, { "epoch": 0.77, "learning_rate": 8.885843664929653e-07, "loss": 0.0001, "step": 1561 }, { "epoch": 0.77, "logps_train/chosen": -68.40554809570312, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -250.77317810058594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3678736984729767, "rewards_train/margins": 12.39005896449089, "rewards_train/rejected": -12.757932662963867, "step": 1561 }, { "epoch": 0.77, "learning_rate": 8.884135266103133e-07, "loss": 0.0007, "step": 1562 }, { "epoch": 0.77, "logps_train/chosen": -65.2323226928711, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -246.21173095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36039024591445923, "rewards_train/margins": 12.118791043758392, "rewards_train/rejected": -12.479181289672852, "step": 1562 }, { "epoch": 0.77, "learning_rate": 8.882425722984154e-07, "loss": 0.0002, "step": 1563 }, { "epoch": 0.77, "logps_train/chosen": -68.469482421875, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -257.40460205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38068848848342896, "rewards_train/margins": 12.838092029094696, "rewards_train/rejected": -13.218780517578125, "step": 1563 }, { "epoch": 0.77, "learning_rate": 8.880715036076358e-07, "loss": 0.0001, "step": 1564 }, { "epoch": 0.77, "logps_train/chosen": -65.60441589355469, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -252.05911254882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08722350001335144, "rewards_train/margins": 12.695348292589188, "rewards_train/rejected": -12.782571792602539, "step": 1564 }, { "epoch": 0.77, "learning_rate": 8.879003205883728e-07, "loss": 0.0002, "step": 1565 }, { "epoch": 0.77, "logps_train/chosen": -64.5283203125, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -245.08560180664062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1848142296075821, "rewards_train/margins": 12.040056452155113, "rewards_train/rejected": -12.224870681762695, "step": 1565 }, { "epoch": 0.77, "learning_rate": 8.877290232910579e-07, "loss": 0.0002, "step": 1566 }, { "epoch": 0.77, "logps_train/chosen": -64.1230697631836, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -249.04989624023438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14084658026695251, "rewards_train/margins": 12.411898285150528, "rewards_train/rejected": -12.55274486541748, "step": 1566 }, { "epoch": 0.77, "learning_rate": 8.875576117661564e-07, "loss": 0.0015, "step": 1567 }, { "epoch": 0.77, "logps_train/chosen": -71.0697250366211, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -259.14068603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42958006262779236, "rewards_train/margins": 12.660465866327286, "rewards_train/rejected": -13.090045928955078, "step": 1567 }, { "epoch": 0.77, "learning_rate": 8.873860860641673e-07, "loss": 0.0001, "step": 1568 }, { "epoch": 0.77, "logps_train/chosen": -64.1456527709961, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -252.23301696777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0964212417602539, "rewards_train/margins": 12.97875690460205, "rewards_train/rejected": -12.882335662841797, "step": 1568 }, { "epoch": 0.77, "learning_rate": 8.872144462356232e-07, "loss": 0.0004, "step": 1569 }, { "epoch": 0.77, "logps_train/chosen": -68.10558319091797, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -257.838623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1064322367310524, "rewards_train/margins": 12.948232375085354, "rewards_train/rejected": -13.054664611816406, "step": 1569 }, { "epoch": 0.77, "learning_rate": 8.870426923310906e-07, "loss": 0.0001, "step": 1570 }, { "epoch": 0.77, "logps_train/chosen": -60.835227966308594, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -244.8672637939453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37316665053367615, "rewards_train/margins": 12.581378549337387, "rewards_train/rejected": -12.208211898803711, "step": 1570 }, { "epoch": 0.77, "learning_rate": 8.868708244011691e-07, "loss": 0.0004, "step": 1571 }, { "epoch": 0.77, "logps_train/chosen": -69.02540588378906, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -246.8155517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41675031185150146, "rewards_train/margins": 12.202159523963928, "rewards_train/rejected": -12.61890983581543, "step": 1571 }, { "epoch": 0.77, "learning_rate": 8.866988424964921e-07, "loss": 0.0004, "step": 1572 }, { "epoch": 0.77, "logps_train/chosen": -63.47407913208008, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -245.22398376464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15185165405273438, "rewards_train/margins": 12.010584831237793, "rewards_train/rejected": -12.162436485290527, "step": 1572 }, { "epoch": 0.77, "learning_rate": 8.865267466677269e-07, "loss": 0.0003, "step": 1573 }, { "epoch": 0.77, "logps_train/chosen": -68.4412612915039, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -256.8439025878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4061870276927948, "rewards_train/margins": 12.562090903520584, "rewards_train/rejected": -12.968277931213379, "step": 1573 }, { "epoch": 0.77, "learning_rate": 8.863545369655739e-07, "loss": 0.0002, "step": 1574 }, { "epoch": 0.77, "logps_train/chosen": -67.13045501708984, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -252.43910217285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08076956868171692, "rewards_train/margins": 12.71460434794426, "rewards_train/rejected": -12.795373916625977, "step": 1574 }, { "epoch": 0.78, "learning_rate": 8.861822134407671e-07, "loss": 0.0006, "step": 1575 }, { "epoch": 0.78, "logps_train/chosen": -63.47727584838867, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -247.16586303710938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.211106538772583, "rewards_train/margins": 12.435167074203491, "rewards_train/rejected": -12.646273612976074, "step": 1575 }, { "epoch": 0.78, "learning_rate": 8.860097761440745e-07, "loss": 0.0003, "step": 1576 }, { "epoch": 0.78, "logps_train/chosen": -65.9197006225586, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -251.62591552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1338404268026352, "rewards_train/margins": 12.690664425492287, "rewards_train/rejected": -12.824504852294922, "step": 1576 }, { "epoch": 0.78, "learning_rate": 8.858372251262972e-07, "loss": 0.0001, "step": 1577 }, { "epoch": 0.78, "logps_train/chosen": -64.24223327636719, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -252.92031860351562, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1869184970855713, "rewards_train/margins": 12.881091833114624, "rewards_train/rejected": -13.068010330200195, "step": 1577 }, { "epoch": 0.78, "learning_rate": 8.856645604382698e-07, "loss": 0.0003, "step": 1578 }, { "epoch": 0.78, "logps_train/chosen": -66.55511474609375, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -263.5526123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.027594253420829773, "rewards_train/margins": 13.501995727419853, "rewards_train/rejected": -13.474401473999023, "step": 1578 }, { "epoch": 0.78, "learning_rate": 8.854917821308605e-07, "loss": 0.0001, "step": 1579 }, { "epoch": 0.78, "logps_train/chosen": -66.67630004882812, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -257.0028991699219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0077913254499435425, "rewards_train/margins": 13.041326716542244, "rewards_train/rejected": -13.049118041992188, "step": 1579 }, { "epoch": 0.78, "learning_rate": 8.853188902549708e-07, "loss": 0.001, "step": 1580 }, { "epoch": 0.78, "logps_train/chosen": -67.81077575683594, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -259.7322082519531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2375233769416809, "rewards_train/margins": 13.090580642223358, "rewards_train/rejected": -13.328104019165039, "step": 1580 }, { "epoch": 0.78, "learning_rate": 8.851458848615363e-07, "loss": 0.0002, "step": 1581 }, { "epoch": 0.78, "logps_train/chosen": -67.042236328125, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -261.4453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11862771213054657, "rewards_train/margins": 13.193187549710274, "rewards_train/rejected": -13.31181526184082, "step": 1581 }, { "epoch": 0.78, "learning_rate": 8.849727660015252e-07, "loss": 0.0, "step": 1582 }, { "epoch": 0.78, "logps_train/chosen": -67.84698486328125, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -253.81146240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24163155257701874, "rewards_train/margins": 12.48971076309681, "rewards_train/rejected": -12.731342315673828, "step": 1582 }, { "epoch": 0.78, "learning_rate": 8.847995337259393e-07, "loss": 0.0003, "step": 1583 }, { "epoch": 0.78, "logps_train/chosen": -67.96025085449219, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -268.87481689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2851123809814453, "rewards_train/margins": 13.625419616699219, "rewards_train/rejected": -13.910531997680664, "step": 1583 }, { "epoch": 0.78, "learning_rate": 8.846261880858145e-07, "loss": 0.0, "step": 1584 }, { "epoch": 0.78, "logps_train/chosen": -66.6138916015625, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -267.21124267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.005945339798927307, "rewards_train/margins": 13.653242245316505, "rewards_train/rejected": -13.647296905517578, "step": 1584 }, { "epoch": 0.78, "learning_rate": 8.844527291322191e-07, "loss": 0.0, "step": 1585 }, { "epoch": 0.78, "logps_train/chosen": -66.8360824584961, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -243.87777709960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.021909192204475403, "rewards_train/margins": 12.36271233856678, "rewards_train/rejected": -12.340803146362305, "step": 1585 }, { "epoch": 0.78, "learning_rate": 8.842791569162557e-07, "loss": 0.0029, "step": 1586 }, { "epoch": 0.78, "logps_train/chosen": -65.12299346923828, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -245.46926879882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1687690019607544, "rewards_train/margins": 12.206186175346375, "rewards_train/rejected": -12.374955177307129, "step": 1586 }, { "epoch": 0.78, "learning_rate": 8.841054714890594e-07, "loss": 0.0021, "step": 1587 }, { "epoch": 0.78, "logps_train/chosen": -69.79930877685547, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -256.61651611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4417470395565033, "rewards_train/margins": 12.802522331476212, "rewards_train/rejected": -13.244269371032715, "step": 1587 }, { "epoch": 0.78, "learning_rate": 8.839316729017996e-07, "loss": 0.0, "step": 1588 }, { "epoch": 0.78, "logps_train/chosen": -66.61241149902344, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -253.10841369628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16465915739536285, "rewards_train/margins": 12.727627143263817, "rewards_train/rejected": -12.89228630065918, "step": 1588 }, { "epoch": 0.78, "learning_rate": 8.837577612056781e-07, "loss": 0.0001, "step": 1589 }, { "epoch": 0.78, "logps_train/chosen": -66.14348602294922, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -257.71453857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29257097840309143, "rewards_train/margins": 12.932789891958237, "rewards_train/rejected": -13.225360870361328, "step": 1589 }, { "epoch": 0.78, "learning_rate": 8.835837364519308e-07, "loss": 0.0002, "step": 1590 }, { "epoch": 0.78, "logps_train/chosen": -66.68719482421875, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -259.83258056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13092653453350067, "rewards_train/margins": 13.084800317883492, "rewards_train/rejected": -13.215726852416992, "step": 1590 }, { "epoch": 0.78, "learning_rate": 8.834095986918264e-07, "loss": 0.0002, "step": 1591 }, { "epoch": 0.78, "logps_train/chosen": -63.80912399291992, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -258.1556396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06328525394201279, "rewards_train/margins": 13.139341928064823, "rewards_train/rejected": -13.202627182006836, "step": 1591 }, { "epoch": 0.78, "learning_rate": 8.83235347976667e-07, "loss": 0.0001, "step": 1592 }, { "epoch": 0.78, "logps_train/chosen": -63.62176513671875, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -254.7600860595703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.008343636989593506, "rewards_train/margins": 12.967714369297028, "rewards_train/rejected": -12.976058006286621, "step": 1592 }, { "epoch": 0.78, "learning_rate": 8.830609843577881e-07, "loss": 0.0007, "step": 1593 }, { "epoch": 0.78, "logps_train/chosen": -65.70838928222656, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -252.37527465820312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16165980696678162, "rewards_train/margins": 12.457410246133804, "rewards_train/rejected": -12.619070053100586, "step": 1593 }, { "epoch": 0.78, "learning_rate": 8.828865078865584e-07, "loss": 0.0002, "step": 1594 }, { "epoch": 0.78, "logps_train/chosen": -67.49825286865234, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -259.609619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22560662031173706, "rewards_train/margins": 12.851909935474396, "rewards_train/rejected": -13.077516555786133, "step": 1594 }, { "epoch": 0.79, "learning_rate": 8.827119186143799e-07, "loss": 0.0012, "step": 1595 }, { "epoch": 0.79, "logps_train/chosen": -69.10546875, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -253.2315673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4924077093601227, "rewards_train/margins": 12.6891470849514, "rewards_train/rejected": -13.181554794311523, "step": 1595 }, { "epoch": 0.79, "learning_rate": 8.825372165926876e-07, "loss": 0.0002, "step": 1596 }, { "epoch": 0.79, "logps_train/chosen": -61.33873748779297, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -244.509033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.16422207653522491, "rewards_train/margins": 12.543935135006905, "rewards_train/rejected": -12.37971305847168, "step": 1596 }, { "epoch": 0.79, "learning_rate": 8.823624018729501e-07, "loss": 0.0003, "step": 1597 }, { "epoch": 0.79, "logps_train/chosen": -64.15107727050781, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -249.3487548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.012919500470161438, "rewards_train/margins": 12.412736967206001, "rewards_train/rejected": -12.39981746673584, "step": 1597 }, { "epoch": 0.79, "learning_rate": 8.82187474506669e-07, "loss": 0.0003, "step": 1598 }, { "epoch": 0.79, "logps_train/chosen": -66.762939453125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -254.83541870117188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17434068024158478, "rewards_train/margins": 12.75422053039074, "rewards_train/rejected": -12.928561210632324, "step": 1598 }, { "epoch": 0.79, "learning_rate": 8.82012434545379e-07, "loss": 0.0005, "step": 1599 }, { "epoch": 0.79, "logps_train/chosen": -69.47164154052734, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -256.6761779785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.43312668800354004, "rewards_train/margins": 12.454023122787476, "rewards_train/rejected": -12.887149810791016, "step": 1599 }, { "epoch": 0.79, "learning_rate": 8.818372820406478e-07, "loss": 0.0002, "step": 1600 }, { "epoch": 0.79, "logps_train/chosen": -65.91065979003906, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -253.49948120117188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3093281090259552, "rewards_train/margins": 12.451508492231369, "rewards_train/rejected": -12.760836601257324, "step": 1600 }, { "epoch": 0.79, "learning_rate": 8.816620170440772e-07, "loss": 0.0001, "step": 1601 }, { "epoch": 0.79, "logps_train/chosen": -69.2925796508789, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -257.8963317871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.27098196744918823, "rewards_train/margins": 12.751366436481476, "rewards_train/rejected": -13.022348403930664, "step": 1601 }, { "epoch": 0.79, "learning_rate": 8.814866396073007e-07, "loss": 0.0003, "step": 1602 }, { "epoch": 0.79, "logps_train/chosen": -64.85993957519531, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -118.9375, "logps_train/rejected": -245.45697021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23101361095905304, "rewards_train/margins": 12.419712707400322, "rewards_train/rejected": -12.650726318359375, "step": 1602 }, { "epoch": 0.79, "learning_rate": 8.81311149781986e-07, "loss": 0.0001, "step": 1603 }, { "epoch": 0.79, "logps_train/chosen": -67.49491882324219, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -263.1182861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06704600155353546, "rewards_train/margins": 13.477935954928398, "rewards_train/rejected": -13.544981956481934, "step": 1603 }, { "epoch": 0.79, "learning_rate": 8.811355476198336e-07, "loss": 0.0001, "step": 1604 }, { "epoch": 0.79, "logps_train/chosen": -67.10966491699219, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -250.61224365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2973436713218689, "rewards_train/margins": 12.662757456302643, "rewards_train/rejected": -12.960101127624512, "step": 1604 }, { "epoch": 0.79, "learning_rate": 8.809598331725771e-07, "loss": 0.0003, "step": 1605 }, { "epoch": 0.79, "logps_train/chosen": -62.33851623535156, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -248.4344482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.047401439398527145, "rewards_train/margins": 12.674412716180086, "rewards_train/rejected": -12.721814155578613, "step": 1605 }, { "epoch": 0.79, "learning_rate": 8.807840064919831e-07, "loss": 0.0, "step": 1606 }, { "epoch": 0.79, "logps_train/chosen": -66.59042358398438, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -257.337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.030819574370980263, "rewards_train/margins": 13.06781323812902, "rewards_train/rejected": -13.0986328125, "step": 1606 }, { "epoch": 0.79, "learning_rate": 8.806080676298515e-07, "loss": 0.0, "step": 1607 }, { "epoch": 0.79, "logps_train/chosen": -68.99031066894531, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -268.2363586425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.391559898853302, "rewards_train/margins": 13.33989018201828, "rewards_train/rejected": -13.731450080871582, "step": 1607 }, { "epoch": 0.79, "learning_rate": 8.804320166380147e-07, "loss": 0.0, "step": 1608 }, { "epoch": 0.79, "logps_train/chosen": -64.55523681640625, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -258.62176513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10355895757675171, "rewards_train/margins": 13.080972135066986, "rewards_train/rejected": -12.977413177490234, "step": 1608 }, { "epoch": 0.79, "learning_rate": 8.802558535683389e-07, "loss": 0.0001, "step": 1609 }, { "epoch": 0.79, "logps_train/chosen": -65.15953826904297, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -258.4322509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13646167516708374, "rewards_train/margins": 13.34328705072403, "rewards_train/rejected": -13.479748725891113, "step": 1609 }, { "epoch": 0.79, "learning_rate": 8.800795784727226e-07, "loss": 0.0002, "step": 1610 }, { "epoch": 0.79, "logps_train/chosen": -68.06947326660156, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -265.81610107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3921530246734619, "rewards_train/margins": 13.32056212425232, "rewards_train/rejected": -13.712715148925781, "step": 1610 }, { "epoch": 0.79, "learning_rate": 8.799031914030979e-07, "loss": 0.0, "step": 1611 }, { "epoch": 0.79, "logps_train/chosen": -62.121864318847656, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -259.6105041503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.22941504418849945, "rewards_train/margins": 13.558776006102562, "rewards_train/rejected": -13.329360961914062, "step": 1611 }, { "epoch": 0.79, "learning_rate": 8.797266924114295e-07, "loss": 0.0003, "step": 1612 }, { "epoch": 0.79, "logps_train/chosen": -65.72488403320312, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -266.1699523925781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14973469078540802, "rewards_train/margins": 13.575854107737541, "rewards_train/rejected": -13.72558879852295, "step": 1612 }, { "epoch": 0.79, "learning_rate": 8.795500815497153e-07, "loss": 0.0002, "step": 1613 }, { "epoch": 0.79, "logps_train/chosen": -67.3958511352539, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -261.60552978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.068491131067276, "rewards_train/margins": 13.31071361899376, "rewards_train/rejected": -13.379204750061035, "step": 1613 }, { "epoch": 0.79, "learning_rate": 8.793733588699858e-07, "loss": 0.0006, "step": 1614 }, { "epoch": 0.79, "logps_train/chosen": -66.05703735351562, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -254.8770294189453, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07128006219863892, "rewards_train/margins": 13.064959943294525, "rewards_train/rejected": -13.136240005493164, "step": 1614 }, { "epoch": 0.8, "learning_rate": 8.791965244243049e-07, "loss": 0.0004, "step": 1615 }, { "epoch": 0.8, "logps_train/chosen": -69.31088256835938, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -260.2551574707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33191803097724915, "rewards_train/margins": 12.978657454252243, "rewards_train/rejected": -13.310575485229492, "step": 1615 }, { "epoch": 0.8, "learning_rate": 8.790195782647691e-07, "loss": 0.0005, "step": 1616 }, { "epoch": 0.8, "logps_train/chosen": -67.7252197265625, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -269.2972412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22188694775104523, "rewards_train/margins": 13.723120376467705, "rewards_train/rejected": -13.94500732421875, "step": 1616 }, { "epoch": 0.8, "learning_rate": 8.788425204435082e-07, "loss": 0.0001, "step": 1617 }, { "epoch": 0.8, "logps_train/chosen": -66.04203033447266, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -261.9562072753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.013773255050182343, "rewards_train/margins": 13.31119317561388, "rewards_train/rejected": -13.324966430664062, "step": 1617 }, { "epoch": 0.8, "learning_rate": 8.786653510126844e-07, "loss": 0.0001, "step": 1618 }, { "epoch": 0.8, "logps_train/chosen": -65.18409729003906, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -256.9444274902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.04218930006027222, "rewards_train/margins": 13.231111824512482, "rewards_train/rejected": -13.273301124572754, "step": 1618 }, { "epoch": 0.8, "learning_rate": 8.78488070024493e-07, "loss": 0.0003, "step": 1619 }, { "epoch": 0.8, "logps_train/chosen": -63.48567581176758, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -260.20526123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01897800713777542, "rewards_train/margins": 13.265417210757732, "rewards_train/rejected": -13.284395217895508, "step": 1619 }, { "epoch": 0.8, "learning_rate": 8.783106775311622e-07, "loss": 0.0003, "step": 1620 }, { "epoch": 0.8, "logps_train/chosen": -70.9964828491211, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -266.80316162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45218756794929504, "rewards_train/margins": 13.25893685221672, "rewards_train/rejected": -13.711124420166016, "step": 1620 }, { "epoch": 0.8, "learning_rate": 8.781331735849531e-07, "loss": 0.0002, "step": 1621 }, { "epoch": 0.8, "logps_train/chosen": -66.88531494140625, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -255.96820068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3333551585674286, "rewards_train/margins": 12.97508504986763, "rewards_train/rejected": -13.308440208435059, "step": 1621 }, { "epoch": 0.8, "learning_rate": 8.779555582381593e-07, "loss": 0.0001, "step": 1622 }, { "epoch": 0.8, "logps_train/chosen": -64.70994567871094, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -258.4393615722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28110212087631226, "rewards_train/margins": 13.192668974399567, "rewards_train/rejected": -13.473771095275879, "step": 1622 }, { "epoch": 0.8, "learning_rate": 8.777778315431079e-07, "loss": 0.0001, "step": 1623 }, { "epoch": 0.8, "logps_train/chosen": -63.80622863769531, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -119.125, "logps_train/rejected": -244.8932342529297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2041090875864029, "rewards_train/margins": 12.369786366820335, "rewards_train/rejected": -12.573895454406738, "step": 1623 }, { "epoch": 0.8, "learning_rate": 8.775999935521582e-07, "loss": 0.0005, "step": 1624 }, { "epoch": 0.8, "logps_train/chosen": -71.87002563476562, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -257.40301513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.609805703163147, "rewards_train/margins": 12.349052786827087, "rewards_train/rejected": -12.958858489990234, "step": 1624 }, { "epoch": 0.8, "learning_rate": 8.774220443177023e-07, "loss": 0.0002, "step": 1625 }, { "epoch": 0.8, "logps_train/chosen": -64.21508026123047, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -256.63116455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0052969008684158325, "rewards_train/margins": 13.023152157664299, "rewards_train/rejected": -13.028449058532715, "step": 1625 }, { "epoch": 0.8, "learning_rate": 8.772439838921655e-07, "loss": 0.0001, "step": 1626 }, { "epoch": 0.8, "logps_train/chosen": -65.4598388671875, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -255.35183715820312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06336664408445358, "rewards_train/margins": 13.117079980671406, "rewards_train/rejected": -13.18044662475586, "step": 1626 }, { "epoch": 0.8, "learning_rate": 8.770658123280055e-07, "loss": 0.0001, "step": 1627 }, { "epoch": 0.8, "logps_train/chosen": -65.5339126586914, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -256.70648193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18888920545578003, "rewards_train/margins": 13.127609550952911, "rewards_train/rejected": -13.316498756408691, "step": 1627 }, { "epoch": 0.8, "learning_rate": 8.768875296777129e-07, "loss": 0.0001, "step": 1628 }, { "epoch": 0.8, "logps_train/chosen": -63.639400482177734, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -261.596923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06711404770612717, "rewards_train/margins": 13.416016407310963, "rewards_train/rejected": -13.48313045501709, "step": 1628 }, { "epoch": 0.8, "learning_rate": 8.767091359938108e-07, "loss": 0.0001, "step": 1629 }, { "epoch": 0.8, "logps_train/chosen": -69.33357238769531, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -254.62530517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.49634605646133423, "rewards_train/margins": 12.298654019832611, "rewards_train/rejected": -12.795000076293945, "step": 1629 }, { "epoch": 0.8, "learning_rate": 8.765306313288553e-07, "loss": 0.0002, "step": 1630 }, { "epoch": 0.8, "logps_train/chosen": -67.46391296386719, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -260.79425048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38857901096343994, "rewards_train/margins": 13.144506812095642, "rewards_train/rejected": -13.533085823059082, "step": 1630 }, { "epoch": 0.8, "learning_rate": 8.763520157354351e-07, "loss": 0.0003, "step": 1631 }, { "epoch": 0.8, "logps_train/chosen": -66.62596893310547, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -257.5945739746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14072184264659882, "rewards_train/margins": 13.044174626469612, "rewards_train/rejected": -13.184896469116211, "step": 1631 }, { "epoch": 0.8, "learning_rate": 8.761732892661715e-07, "loss": 0.0001, "step": 1632 }, { "epoch": 0.8, "logps_train/chosen": -66.70761108398438, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -249.44558715820312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39410051703453064, "rewards_train/margins": 12.449481636285782, "rewards_train/rejected": -12.843582153320312, "step": 1632 }, { "epoch": 0.8, "learning_rate": 8.759944519737185e-07, "loss": 0.0001, "step": 1633 }, { "epoch": 0.8, "logps_train/chosen": -70.05299377441406, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -258.02593994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.413502037525177, "rewards_train/margins": 12.507549941539764, "rewards_train/rejected": -12.921051979064941, "step": 1633 }, { "epoch": 0.8, "learning_rate": 8.758155039107628e-07, "loss": 0.0002, "step": 1634 }, { "epoch": 0.8, "logps_train/chosen": -66.46662139892578, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -258.50738525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07156459242105484, "rewards_train/margins": 13.006323896348476, "rewards_train/rejected": -13.077888488769531, "step": 1634 }, { "epoch": 0.81, "learning_rate": 8.75636445130024e-07, "loss": 0.0001, "step": 1635 }, { "epoch": 0.81, "logps_train/chosen": -71.63578796386719, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -260.9149169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45771992206573486, "rewards_train/margins": 13.109260439872742, "rewards_train/rejected": -13.566980361938477, "step": 1635 }, { "epoch": 0.81, "learning_rate": 8.754572756842536e-07, "loss": 0.0001, "step": 1636 }, { "epoch": 0.81, "logps_train/chosen": -61.92805862426758, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -245.5460205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09835632145404816, "rewards_train/margins": 12.575859144330025, "rewards_train/rejected": -12.477502822875977, "step": 1636 }, { "epoch": 0.81, "learning_rate": 8.752779956262361e-07, "loss": 0.0001, "step": 1637 }, { "epoch": 0.81, "logps_train/chosen": -73.3646240234375, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -273.37255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5701543688774109, "rewards_train/margins": 13.648252308368683, "rewards_train/rejected": -14.218406677246094, "step": 1637 }, { "epoch": 0.81, "learning_rate": 8.75098605008789e-07, "loss": 0.0, "step": 1638 }, { "epoch": 0.81, "logps_train/chosen": -71.55866241455078, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -266.6380615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5252022743225098, "rewards_train/margins": 13.29817533493042, "rewards_train/rejected": -13.82337760925293, "step": 1638 }, { "epoch": 0.81, "learning_rate": 8.749191038847618e-07, "loss": 0.0002, "step": 1639 }, { "epoch": 0.81, "logps_train/chosen": -69.34564971923828, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -265.8583679199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5150823593139648, "rewards_train/margins": 13.286331176757812, "rewards_train/rejected": -13.801413536071777, "step": 1639 }, { "epoch": 0.81, "learning_rate": 8.747394923070368e-07, "loss": 0.0005, "step": 1640 }, { "epoch": 0.81, "logps_train/chosen": -67.35438537597656, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -252.12063598632812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3700336813926697, "rewards_train/margins": 12.645009577274323, "rewards_train/rejected": -13.015043258666992, "step": 1640 }, { "epoch": 0.81, "learning_rate": 8.745597703285285e-07, "loss": 0.0002, "step": 1641 }, { "epoch": 0.81, "logps_train/chosen": -66.41424560546875, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -256.43731689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09323174506425858, "rewards_train/margins": 12.945420674979687, "rewards_train/rejected": -13.038652420043945, "step": 1641 }, { "epoch": 0.81, "learning_rate": 8.743799380021846e-07, "loss": 0.0001, "step": 1642 }, { "epoch": 0.81, "logps_train/chosen": -72.16506958007812, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -262.505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5184111595153809, "rewards_train/margins": 13.08979082107544, "rewards_train/rejected": -13.60820198059082, "step": 1642 }, { "epoch": 0.81, "learning_rate": 8.741999953809846e-07, "loss": 0.0005, "step": 1643 }, { "epoch": 0.81, "logps_train/chosen": -66.75724792480469, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -266.55731201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1912524402141571, "rewards_train/margins": 13.655446320772171, "rewards_train/rejected": -13.846698760986328, "step": 1643 }, { "epoch": 0.81, "learning_rate": 8.740199425179411e-07, "loss": 0.0, "step": 1644 }, { "epoch": 0.81, "logps_train/chosen": -69.19562530517578, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -260.874267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37053924798965454, "rewards_train/margins": 13.218741834163666, "rewards_train/rejected": -13.58928108215332, "step": 1644 }, { "epoch": 0.81, "learning_rate": 8.738397794660985e-07, "loss": 0.0001, "step": 1645 }, { "epoch": 0.81, "logps_train/chosen": -72.15922546386719, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -263.98162841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.49585437774658203, "rewards_train/margins": 13.08565902709961, "rewards_train/rejected": -13.581513404846191, "step": 1645 }, { "epoch": 0.81, "learning_rate": 8.736595062785346e-07, "loss": 0.0004, "step": 1646 }, { "epoch": 0.81, "logps_train/chosen": -64.94500732421875, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -259.08514404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.020867742598056793, "rewards_train/margins": 13.558741174638271, "rewards_train/rejected": -13.579608917236328, "step": 1646 }, { "epoch": 0.81, "learning_rate": 8.734791230083586e-07, "loss": 0.0001, "step": 1647 }, { "epoch": 0.81, "logps_train/chosen": -65.89545440673828, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -252.647705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40915006399154663, "rewards_train/margins": 12.614460051059723, "rewards_train/rejected": -13.02361011505127, "step": 1647 }, { "epoch": 0.81, "learning_rate": 8.732986297087129e-07, "loss": 0.0005, "step": 1648 }, { "epoch": 0.81, "logps_train/chosen": -71.75969696044922, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -269.718505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47011056542396545, "rewards_train/margins": 13.389389365911484, "rewards_train/rejected": -13.85949993133545, "step": 1648 }, { "epoch": 0.81, "learning_rate": 8.731180264327718e-07, "loss": 0.0003, "step": 1649 }, { "epoch": 0.81, "logps_train/chosen": -67.62772369384766, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -259.817626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1697062999010086, "rewards_train/margins": 12.97358326613903, "rewards_train/rejected": -13.143289566040039, "step": 1649 }, { "epoch": 0.81, "learning_rate": 8.729373132337425e-07, "loss": 0.0002, "step": 1650 }, { "epoch": 0.81, "logps_train/chosen": -63.304527282714844, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -247.75997924804688, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13230812549591064, "rewards_train/margins": 12.456825137138367, "rewards_train/rejected": -12.589133262634277, "step": 1650 }, { "epoch": 0.81, "learning_rate": 8.727564901648643e-07, "loss": 0.0001, "step": 1651 }, { "epoch": 0.81, "logps_train/chosen": -69.04475402832031, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -271.37744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36189770698547363, "rewards_train/margins": 13.7155921459198, "rewards_train/rejected": -14.077489852905273, "step": 1651 }, { "epoch": 0.81, "learning_rate": 8.725755572794087e-07, "loss": 0.0003, "step": 1652 }, { "epoch": 0.81, "logps_train/chosen": -67.46444702148438, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -256.1028137207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3856778144836426, "rewards_train/margins": 12.709710597991943, "rewards_train/rejected": -13.095388412475586, "step": 1652 }, { "epoch": 0.81, "learning_rate": 8.723945146306799e-07, "loss": 0.0002, "step": 1653 }, { "epoch": 0.81, "logps_train/chosen": -64.02323150634766, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -257.36138916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1562294363975525, "rewards_train/margins": 12.988016664981842, "rewards_train/rejected": -13.144246101379395, "step": 1653 }, { "epoch": 0.81, "learning_rate": 8.722133622720142e-07, "loss": 0.0, "step": 1654 }, { "epoch": 0.81, "logps_train/chosen": -71.50382232666016, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -265.94927978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5093176364898682, "rewards_train/margins": 13.258365392684937, "rewards_train/rejected": -13.767683029174805, "step": 1654 }, { "epoch": 0.81, "learning_rate": 8.720321002567805e-07, "loss": 0.0001, "step": 1655 }, { "epoch": 0.81, "logps_train/chosen": -68.04349517822266, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -133.125, "logps_train/rejected": -278.339111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24853892624378204, "rewards_train/margins": 14.272676512598991, "rewards_train/rejected": -14.521215438842773, "step": 1655 }, { "epoch": 0.82, "learning_rate": 8.718507286383797e-07, "loss": 0.0, "step": 1656 }, { "epoch": 0.82, "logps_train/chosen": -68.89828491210938, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -267.40081787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39754289388656616, "rewards_train/margins": 13.33833795785904, "rewards_train/rejected": -13.735880851745605, "step": 1656 }, { "epoch": 0.82, "learning_rate": 8.716692474702449e-07, "loss": 0.0001, "step": 1657 }, { "epoch": 0.82, "logps_train/chosen": -66.22515869140625, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -263.884521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24492742121219635, "rewards_train/margins": 13.37951086461544, "rewards_train/rejected": -13.624438285827637, "step": 1657 }, { "epoch": 0.82, "learning_rate": 8.71487656805842e-07, "loss": 0.0003, "step": 1658 }, { "epoch": 0.82, "logps_train/chosen": -65.13876342773438, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -257.80059814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12129847705364227, "rewards_train/margins": 13.113299682736397, "rewards_train/rejected": -13.234598159790039, "step": 1658 }, { "epoch": 0.82, "learning_rate": 8.713059566986688e-07, "loss": 0.0009, "step": 1659 }, { "epoch": 0.82, "logps_train/chosen": -68.92633819580078, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -264.772216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23408925533294678, "rewards_train/margins": 13.392202019691467, "rewards_train/rejected": -13.626291275024414, "step": 1659 }, { "epoch": 0.82, "learning_rate": 8.711241472022551e-07, "loss": 0.0002, "step": 1660 }, { "epoch": 0.82, "logps_train/chosen": -68.9860610961914, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -267.92584228515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.266135036945343, "rewards_train/margins": 13.684262454509735, "rewards_train/rejected": -13.950397491455078, "step": 1660 }, { "epoch": 0.82, "learning_rate": 8.709422283701634e-07, "loss": 0.0001, "step": 1661 }, { "epoch": 0.82, "logps_train/chosen": -70.38969421386719, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -255.82666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5393597483634949, "rewards_train/margins": 12.719379723072052, "rewards_train/rejected": -13.258739471435547, "step": 1661 }, { "epoch": 0.82, "learning_rate": 8.707602002559883e-07, "loss": 0.0004, "step": 1662 }, { "epoch": 0.82, "logps_train/chosen": -64.8798599243164, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -251.74685668945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1293432116508484, "rewards_train/margins": 12.636648952960968, "rewards_train/rejected": -12.765992164611816, "step": 1662 }, { "epoch": 0.82, "learning_rate": 8.705780629133564e-07, "loss": 0.0002, "step": 1663 }, { "epoch": 0.82, "logps_train/chosen": -67.67143249511719, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -260.40582275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4063032865524292, "rewards_train/margins": 12.85102665424347, "rewards_train/rejected": -13.257329940795898, "step": 1663 }, { "epoch": 0.82, "learning_rate": 8.703958163959264e-07, "loss": 0.0001, "step": 1664 }, { "epoch": 0.82, "logps_train/chosen": -62.52292251586914, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -250.89498901367188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.055709850043058395, "rewards_train/margins": 12.963428486138582, "rewards_train/rejected": -13.01913833618164, "step": 1664 }, { "epoch": 0.82, "learning_rate": 8.702134607573896e-07, "loss": 0.0005, "step": 1665 }, { "epoch": 0.82, "logps_train/chosen": -72.0984115600586, "logps_train/ref_chosen": -69.125, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -260.67523193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30071038007736206, "rewards_train/margins": 13.03302413225174, "rewards_train/rejected": -13.333734512329102, "step": 1665 }, { "epoch": 0.82, "learning_rate": 8.700309960514692e-07, "loss": 0.0001, "step": 1666 }, { "epoch": 0.82, "logps_train/chosen": -68.9110107421875, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -264.256591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3601437211036682, "rewards_train/margins": 13.378992974758148, "rewards_train/rejected": -13.739136695861816, "step": 1666 }, { "epoch": 0.82, "learning_rate": 8.698484223319205e-07, "loss": 0.0, "step": 1667 }, { "epoch": 0.82, "logps_train/chosen": -68.05287170410156, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -256.0704040527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4220840334892273, "rewards_train/margins": 12.6564901471138, "rewards_train/rejected": -13.078574180603027, "step": 1667 }, { "epoch": 0.82, "learning_rate": 8.696657396525309e-07, "loss": 0.0003, "step": 1668 }, { "epoch": 0.82, "logps_train/chosen": -75.32711791992188, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -268.82586669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9668426513671875, "rewards_train/margins": 13.163549423217773, "rewards_train/rejected": -14.130392074584961, "step": 1668 }, { "epoch": 0.82, "learning_rate": 8.6948294806712e-07, "loss": 0.0001, "step": 1669 }, { "epoch": 0.82, "logps_train/chosen": -63.41864013671875, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -254.96823120117188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06979376822710037, "rewards_train/margins": 13.107304506003857, "rewards_train/rejected": -13.177098274230957, "step": 1669 }, { "epoch": 0.82, "learning_rate": 8.693000476295395e-07, "loss": 0.0001, "step": 1670 }, { "epoch": 0.82, "logps_train/chosen": -69.5777587890625, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -267.8048095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5917119979858398, "rewards_train/margins": 13.709861755371094, "rewards_train/rejected": -14.301573753356934, "step": 1670 }, { "epoch": 0.82, "learning_rate": 8.691170383936729e-07, "loss": 0.0002, "step": 1671 }, { "epoch": 0.82, "logps_train/chosen": -63.71092987060547, "logps_train/ref_chosen": -60.84375, "logps_train/ref_rejected": -118.1875, "logps_train/rejected": -251.056396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28745028376579285, "rewards_train/margins": 13.00022080540657, "rewards_train/rejected": -13.287671089172363, "step": 1671 }, { "epoch": 0.82, "learning_rate": 8.689339204134361e-07, "loss": 0.0, "step": 1672 }, { "epoch": 0.82, "logps_train/chosen": -66.40177154541016, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -261.67681884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1572175920009613, "rewards_train/margins": 13.461881071329117, "rewards_train/rejected": -13.619098663330078, "step": 1672 }, { "epoch": 0.82, "learning_rate": 8.687506937427769e-07, "loss": 0.0001, "step": 1673 }, { "epoch": 0.82, "logps_train/chosen": -67.89988708496094, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -268.2118835449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23681482672691345, "rewards_train/margins": 13.727781921625137, "rewards_train/rejected": -13.96459674835205, "step": 1673 }, { "epoch": 0.82, "learning_rate": 8.685673584356751e-07, "loss": 0.0001, "step": 1674 }, { "epoch": 0.82, "logps_train/chosen": -66.2468032836914, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -260.6365966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2738504409790039, "rewards_train/margins": 13.32413387298584, "rewards_train/rejected": -13.597984313964844, "step": 1674 }, { "epoch": 0.82, "learning_rate": 8.683839145461425e-07, "loss": 0.0001, "step": 1675 }, { "epoch": 0.82, "logps_train/chosen": -66.30277252197266, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -255.55003356933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19927150011062622, "rewards_train/margins": 12.881806075572968, "rewards_train/rejected": -13.081077575683594, "step": 1675 }, { "epoch": 0.83, "learning_rate": 8.682003621282228e-07, "loss": 0.0005, "step": 1676 }, { "epoch": 0.83, "logps_train/chosen": -66.94917297363281, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -251.42855834960938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3455764651298523, "rewards_train/margins": 12.733022511005402, "rewards_train/rejected": -13.078598976135254, "step": 1676 }, { "epoch": 0.83, "learning_rate": 8.680167012359921e-07, "loss": 0.0003, "step": 1677 }, { "epoch": 0.83, "logps_train/chosen": -68.45761108398438, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -254.33071899414062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.282284140586853, "rewards_train/margins": 12.795905709266663, "rewards_train/rejected": -13.078189849853516, "step": 1677 }, { "epoch": 0.83, "learning_rate": 8.678329319235577e-07, "loss": 0.0007, "step": 1678 }, { "epoch": 0.83, "logps_train/chosen": -67.99478912353516, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -265.8624267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3819502294063568, "rewards_train/margins": 13.440522342920303, "rewards_train/rejected": -13.82247257232666, "step": 1678 }, { "epoch": 0.83, "learning_rate": 8.676490542450597e-07, "loss": 0.0, "step": 1679 }, { "epoch": 0.83, "logps_train/chosen": -68.23194122314453, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -264.99774169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26259851455688477, "rewards_train/margins": 13.209588527679443, "rewards_train/rejected": -13.472187042236328, "step": 1679 }, { "epoch": 0.83, "learning_rate": 8.674650682546692e-07, "loss": 0.0002, "step": 1680 }, { "epoch": 0.83, "logps_train/chosen": -65.3786849975586, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -256.6036376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32488059997558594, "rewards_train/margins": 13.249645233154297, "rewards_train/rejected": -13.574525833129883, "step": 1680 }, { "epoch": 0.83, "learning_rate": 8.672809740065903e-07, "loss": 0.0013, "step": 1681 }, { "epoch": 0.83, "logps_train/chosen": -67.15341186523438, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -260.8511962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17930622398853302, "rewards_train/margins": 13.464506909251213, "rewards_train/rejected": -13.643813133239746, "step": 1681 }, { "epoch": 0.83, "learning_rate": 8.670967715550581e-07, "loss": 0.0025, "step": 1682 }, { "epoch": 0.83, "logps_train/chosen": -62.90040969848633, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -252.2389373779297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14023630321025848, "rewards_train/margins": 12.924722269177437, "rewards_train/rejected": -13.064958572387695, "step": 1682 }, { "epoch": 0.83, "learning_rate": 8.669124609543398e-07, "loss": 0.0001, "step": 1683 }, { "epoch": 0.83, "logps_train/chosen": -64.2413558959961, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -263.7943420410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2399074286222458, "rewards_train/margins": 13.478834941983223, "rewards_train/rejected": -13.718742370605469, "step": 1683 }, { "epoch": 0.83, "learning_rate": 8.667280422587349e-07, "loss": 0.0001, "step": 1684 }, { "epoch": 0.83, "logps_train/chosen": -61.134952545166016, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -117.9375, "logps_train/rejected": -245.32960510253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01950121857225895, "rewards_train/margins": 12.718829622492194, "rewards_train/rejected": -12.738330841064453, "step": 1684 }, { "epoch": 0.83, "learning_rate": 8.66543515522574e-07, "loss": 0.0002, "step": 1685 }, { "epoch": 0.83, "logps_train/chosen": -67.98533630371094, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -260.29241943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35297709703445435, "rewards_train/margins": 12.901021659374237, "rewards_train/rejected": -13.253998756408691, "step": 1685 }, { "epoch": 0.83, "learning_rate": 8.6635888080022e-07, "loss": 0.0002, "step": 1686 }, { "epoch": 0.83, "logps_train/chosen": -67.65634155273438, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -267.93304443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22051751613616943, "rewards_train/margins": 13.625324845314026, "rewards_train/rejected": -13.845842361450195, "step": 1686 }, { "epoch": 0.83, "learning_rate": 8.661741381460676e-07, "loss": 0.0003, "step": 1687 }, { "epoch": 0.83, "logps_train/chosen": -65.12458801269531, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -263.61932373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2738361060619354, "rewards_train/margins": 13.596495658159256, "rewards_train/rejected": -13.870331764221191, "step": 1687 }, { "epoch": 0.83, "learning_rate": 8.659892876145434e-07, "loss": 0.0001, "step": 1688 }, { "epoch": 0.83, "logps_train/chosen": -71.54185485839844, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -270.162841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5628767609596252, "rewards_train/margins": 13.730360925197601, "rewards_train/rejected": -14.293237686157227, "step": 1688 }, { "epoch": 0.83, "learning_rate": 8.658043292601054e-07, "loss": 0.0, "step": 1689 }, { "epoch": 0.83, "logps_train/chosen": -67.80467224121094, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -255.3909454345703, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2998522222042084, "rewards_train/margins": 12.946176677942276, "rewards_train/rejected": -13.246028900146484, "step": 1689 }, { "epoch": 0.83, "learning_rate": 8.656192631372436e-07, "loss": 0.0001, "step": 1690 }, { "epoch": 0.83, "logps_train/chosen": -71.53419494628906, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -249.34149169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7581065893173218, "rewards_train/margins": 12.155778527259827, "rewards_train/rejected": -12.913885116577148, "step": 1690 }, { "epoch": 0.83, "learning_rate": 8.654340893004799e-07, "loss": 0.0004, "step": 1691 }, { "epoch": 0.83, "logps_train/chosen": -69.8366928100586, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -259.75830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45564180612564087, "rewards_train/margins": 12.847141206264496, "rewards_train/rejected": -13.302783012390137, "step": 1691 }, { "epoch": 0.83, "learning_rate": 8.652488078043676e-07, "loss": 0.0003, "step": 1692 }, { "epoch": 0.83, "logps_train/chosen": -70.14627838134766, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -251.55088806152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6050090789794922, "rewards_train/margins": 12.141682624816895, "rewards_train/rejected": -12.746691703796387, "step": 1692 }, { "epoch": 0.83, "learning_rate": 8.650634187034917e-07, "loss": 0.0003, "step": 1693 }, { "epoch": 0.83, "logps_train/chosen": -68.12706756591797, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -259.81427001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4442500174045563, "rewards_train/margins": 12.973749250173569, "rewards_train/rejected": -13.417999267578125, "step": 1693 }, { "epoch": 0.83, "learning_rate": 8.648779220524696e-07, "loss": 0.0001, "step": 1694 }, { "epoch": 0.83, "logps_train/chosen": -62.03595733642578, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -260.0355224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04782074689865112, "rewards_train/margins": 13.532524764537811, "rewards_train/rejected": -13.48470401763916, "step": 1694 }, { "epoch": 0.83, "learning_rate": 8.646923179059493e-07, "loss": 0.0001, "step": 1695 }, { "epoch": 0.83, "logps_train/chosen": -69.15113830566406, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -275.22412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3143572211265564, "rewards_train/margins": 14.192677080631256, "rewards_train/rejected": -14.507034301757812, "step": 1695 }, { "epoch": 0.84, "learning_rate": 8.645066063186114e-07, "loss": 0.0001, "step": 1696 }, { "epoch": 0.84, "logps_train/chosen": -67.02777099609375, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -263.33868408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2891058027744293, "rewards_train/margins": 13.685291856527328, "rewards_train/rejected": -13.974397659301758, "step": 1696 }, { "epoch": 0.84, "learning_rate": 8.643207873451677e-07, "loss": 0.0001, "step": 1697 }, { "epoch": 0.84, "logps_train/chosen": -72.39249420166016, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -270.4676208496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6530191898345947, "rewards_train/margins": 13.584367513656616, "rewards_train/rejected": -14.237386703491211, "step": 1697 }, { "epoch": 0.84, "learning_rate": 8.641348610403616e-07, "loss": 0.0003, "step": 1698 }, { "epoch": 0.84, "logps_train/chosen": -67.78204345703125, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -253.5947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3036932051181793, "rewards_train/margins": 12.80128726363182, "rewards_train/rejected": -13.10498046875, "step": 1698 }, { "epoch": 0.84, "learning_rate": 8.639488274589684e-07, "loss": 0.0002, "step": 1699 }, { "epoch": 0.84, "logps_train/chosen": -67.14045715332031, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -275.56268310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18147774040699005, "rewards_train/margins": 14.044032856822014, "rewards_train/rejected": -14.225510597229004, "step": 1699 }, { "epoch": 0.84, "learning_rate": 8.637626866557948e-07, "loss": 0.0001, "step": 1700 }, { "epoch": 0.84, "logps_train/chosen": -69.00566864013672, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -263.93328857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6948050260543823, "rewards_train/margins": 13.327526211738586, "rewards_train/rejected": -14.022331237792969, "step": 1700 }, { "epoch": 0.84, "learning_rate": 8.635764386856792e-07, "loss": 0.0001, "step": 1701 }, { "epoch": 0.84, "logps_train/chosen": -68.22590637207031, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -257.06793212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33640891313552856, "rewards_train/margins": 12.874780356884003, "rewards_train/rejected": -13.211189270019531, "step": 1701 }, { "epoch": 0.84, "learning_rate": 8.633900836034913e-07, "loss": 0.0001, "step": 1702 }, { "epoch": 0.84, "logps_train/chosen": -65.6384048461914, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -259.72747802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33915773034095764, "rewards_train/margins": 13.246332496404648, "rewards_train/rejected": -13.585490226745605, "step": 1702 }, { "epoch": 0.84, "learning_rate": 8.632036214641327e-07, "loss": 0.0001, "step": 1703 }, { "epoch": 0.84, "logps_train/chosen": -67.76341247558594, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -260.8531799316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32033491134643555, "rewards_train/margins": 13.30707311630249, "rewards_train/rejected": -13.627408027648926, "step": 1703 }, { "epoch": 0.84, "learning_rate": 8.630170523225365e-07, "loss": 0.0016, "step": 1704 }, { "epoch": 0.84, "logps_train/chosen": -72.00729370117188, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -266.6075134277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6262906789779663, "rewards_train/margins": 13.189637780189514, "rewards_train/rejected": -13.81592845916748, "step": 1704 }, { "epoch": 0.84, "learning_rate": 8.628303762336671e-07, "loss": 0.0026, "step": 1705 }, { "epoch": 0.84, "logps_train/chosen": -67.5698013305664, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -266.2326965332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3564187288284302, "rewards_train/margins": 13.773883700370789, "rewards_train/rejected": -14.130302429199219, "step": 1705 }, { "epoch": 0.84, "learning_rate": 8.626435932525205e-07, "loss": 0.0005, "step": 1706 }, { "epoch": 0.84, "logps_train/chosen": -66.50239562988281, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -258.23126220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12138137221336365, "rewards_train/margins": 13.209605604410172, "rewards_train/rejected": -13.330986976623535, "step": 1706 }, { "epoch": 0.84, "learning_rate": 8.624567034341244e-07, "loss": 0.0002, "step": 1707 }, { "epoch": 0.84, "logps_train/chosen": -68.04037475585938, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -264.3802490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22444799542427063, "rewards_train/margins": 13.431841105222702, "rewards_train/rejected": -13.656289100646973, "step": 1707 }, { "epoch": 0.84, "learning_rate": 8.622697068335376e-07, "loss": 0.0001, "step": 1708 }, { "epoch": 0.84, "logps_train/chosen": -67.78778076171875, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -267.06646728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3220399022102356, "rewards_train/margins": 13.5762078166008, "rewards_train/rejected": -13.898247718811035, "step": 1708 }, { "epoch": 0.84, "learning_rate": 8.620826035058507e-07, "loss": 0.0001, "step": 1709 }, { "epoch": 0.84, "logps_train/chosen": -67.08342742919922, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -268.78692626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37113580107688904, "rewards_train/margins": 13.78319063782692, "rewards_train/rejected": -14.154326438903809, "step": 1709 }, { "epoch": 0.84, "learning_rate": 8.618953935061857e-07, "loss": 0.0001, "step": 1710 }, { "epoch": 0.84, "logps_train/chosen": -69.8184814453125, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -263.76043701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7356570959091187, "rewards_train/margins": 13.054548859596252, "rewards_train/rejected": -13.790205955505371, "step": 1710 }, { "epoch": 0.84, "learning_rate": 8.617080768896956e-07, "loss": 0.0002, "step": 1711 }, { "epoch": 0.84, "logps_train/chosen": -64.60192108154297, "logps_train/ref_chosen": -61.09375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -261.66912841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34888818860054016, "rewards_train/margins": 13.562994211912155, "rewards_train/rejected": -13.911882400512695, "step": 1711 }, { "epoch": 0.84, "learning_rate": 8.615206537115654e-07, "loss": 0.0001, "step": 1712 }, { "epoch": 0.84, "logps_train/chosen": -73.88414764404297, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -272.21966552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7879757285118103, "rewards_train/margins": 13.526323854923248, "rewards_train/rejected": -14.314299583435059, "step": 1712 }, { "epoch": 0.84, "learning_rate": 8.613331240270111e-07, "loss": 0.0, "step": 1713 }, { "epoch": 0.84, "logps_train/chosen": -67.98495483398438, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -263.9187927246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.537020742893219, "rewards_train/margins": 13.263356149196625, "rewards_train/rejected": -13.800376892089844, "step": 1713 }, { "epoch": 0.84, "learning_rate": 8.611454878912803e-07, "loss": 0.0002, "step": 1714 }, { "epoch": 0.84, "logps_train/chosen": -64.42462921142578, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -266.5529479980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0417165607213974, "rewards_train/margins": 13.638905510306358, "rewards_train/rejected": -13.597188949584961, "step": 1714 }, { "epoch": 0.84, "learning_rate": 8.609577453596519e-07, "loss": 0.0, "step": 1715 }, { "epoch": 0.84, "logps_train/chosen": -64.53488159179688, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -260.0391540527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1712610274553299, "rewards_train/margins": 13.329822346568108, "rewards_train/rejected": -13.501083374023438, "step": 1715 }, { "epoch": 0.84, "learning_rate": 8.60769896487436e-07, "loss": 0.0001, "step": 1716 }, { "epoch": 0.84, "logps_train/chosen": -66.6624755859375, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -261.7745361328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.31849342584609985, "rewards_train/margins": 13.62321799993515, "rewards_train/rejected": -13.94171142578125, "step": 1716 }, { "epoch": 0.85, "learning_rate": 8.605819413299742e-07, "loss": 0.0001, "step": 1717 }, { "epoch": 0.85, "logps_train/chosen": -68.63721466064453, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -265.9717102050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3956550061702728, "rewards_train/margins": 13.44878163933754, "rewards_train/rejected": -13.844436645507812, "step": 1717 }, { "epoch": 0.85, "learning_rate": 8.603938799426394e-07, "loss": 0.0004, "step": 1718 }, { "epoch": 0.85, "logps_train/chosen": -66.91981506347656, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -261.1018371582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46185457706451416, "rewards_train/margins": 13.224552512168884, "rewards_train/rejected": -13.686407089233398, "step": 1718 }, { "epoch": 0.85, "learning_rate": 8.602057123808358e-07, "loss": 0.0003, "step": 1719 }, { "epoch": 0.85, "logps_train/chosen": -69.74514770507812, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -272.81414794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48447585105895996, "rewards_train/margins": 13.694254159927368, "rewards_train/rejected": -14.178730010986328, "step": 1719 }, { "epoch": 0.85, "learning_rate": 8.600174386999987e-07, "loss": 0.0006, "step": 1720 }, { "epoch": 0.85, "logps_train/chosen": -59.46123504638672, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -246.822509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.19147415459156036, "rewards_train/margins": 12.427532389760017, "rewards_train/rejected": -12.236058235168457, "step": 1720 }, { "epoch": 0.85, "learning_rate": 8.598290589555948e-07, "loss": 0.0003, "step": 1721 }, { "epoch": 0.85, "logps_train/chosen": -69.57764434814453, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -266.9541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28637760877609253, "rewards_train/margins": 13.68754893541336, "rewards_train/rejected": -13.973926544189453, "step": 1721 }, { "epoch": 0.85, "learning_rate": 8.59640573203122e-07, "loss": 0.0001, "step": 1722 }, { "epoch": 0.85, "logps_train/chosen": -68.31828308105469, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -261.0229797363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3840738832950592, "rewards_train/margins": 13.438927978277206, "rewards_train/rejected": -13.823001861572266, "step": 1722 }, { "epoch": 0.85, "learning_rate": 8.594519814981096e-07, "loss": 0.0001, "step": 1723 }, { "epoch": 0.85, "logps_train/chosen": -66.96786499023438, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -258.7241516113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29219695925712585, "rewards_train/margins": 13.225919991731644, "rewards_train/rejected": -13.51811695098877, "step": 1723 }, { "epoch": 0.85, "learning_rate": 8.592632838961181e-07, "loss": 0.0004, "step": 1724 }, { "epoch": 0.85, "logps_train/chosen": -65.60308837890625, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -264.9121398925781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09507440775632858, "rewards_train/margins": 13.529342897236347, "rewards_train/rejected": -13.624417304992676, "step": 1724 }, { "epoch": 0.85, "learning_rate": 8.590744804527388e-07, "loss": 0.0, "step": 1725 }, { "epoch": 0.85, "logps_train/chosen": -69.10527038574219, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -275.1793212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3836219906806946, "rewards_train/margins": 14.101596057415009, "rewards_train/rejected": -14.485218048095703, "step": 1725 }, { "epoch": 0.85, "learning_rate": 8.588855712235947e-07, "loss": 0.0, "step": 1726 }, { "epoch": 0.85, "logps_train/chosen": -69.88825988769531, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -271.36083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19824948906898499, "rewards_train/margins": 13.766742080450058, "rewards_train/rejected": -13.964991569519043, "step": 1726 }, { "epoch": 0.85, "learning_rate": 8.586965562643395e-07, "loss": 0.0, "step": 1727 }, { "epoch": 0.85, "logps_train/chosen": -68.01469421386719, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -255.38722229003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5476118326187134, "rewards_train/margins": 12.672358870506287, "rewards_train/rejected": -13.219970703125, "step": 1727 }, { "epoch": 0.85, "learning_rate": 8.585074356306588e-07, "loss": 0.0001, "step": 1728 }, { "epoch": 0.85, "logps_train/chosen": -66.16522216796875, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -259.5301513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32584819197654724, "rewards_train/margins": 13.351190954446793, "rewards_train/rejected": -13.67703914642334, "step": 1728 }, { "epoch": 0.85, "learning_rate": 8.583182093782681e-07, "loss": 0.0, "step": 1729 }, { "epoch": 0.85, "logps_train/chosen": -64.862548828125, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -255.10189819335938, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1811031997203827, "rewards_train/margins": 13.198763400316238, "rewards_train/rejected": -13.379866600036621, "step": 1729 }, { "epoch": 0.85, "learning_rate": 8.58128877562915e-07, "loss": 0.0001, "step": 1730 }, { "epoch": 0.85, "logps_train/chosen": -65.1551742553711, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -267.113525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28206953406333923, "rewards_train/margins": 13.907114654779434, "rewards_train/rejected": -14.189184188842773, "step": 1730 }, { "epoch": 0.85, "learning_rate": 8.579394402403782e-07, "loss": 0.0002, "step": 1731 }, { "epoch": 0.85, "logps_train/chosen": -62.88520431518555, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -251.74118041992188, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.004072040319442749, "rewards_train/margins": 13.148367077112198, "rewards_train/rejected": -13.15243911743164, "step": 1731 }, { "epoch": 0.85, "learning_rate": 8.57749897466467e-07, "loss": 0.0001, "step": 1732 }, { "epoch": 0.85, "logps_train/chosen": -70.83393859863281, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -272.7056884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47084546089172363, "rewards_train/margins": 13.948357343673706, "rewards_train/rejected": -14.41920280456543, "step": 1732 }, { "epoch": 0.85, "learning_rate": 8.57560249297022e-07, "loss": 0.0002, "step": 1733 }, { "epoch": 0.85, "logps_train/chosen": -71.77157592773438, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -270.8681945800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5950286388397217, "rewards_train/margins": 13.52460503578186, "rewards_train/rejected": -14.119633674621582, "step": 1733 }, { "epoch": 0.85, "learning_rate": 8.573704957879148e-07, "loss": 0.0, "step": 1734 }, { "epoch": 0.85, "logps_train/chosen": -65.77400207519531, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -259.5255126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3273998498916626, "rewards_train/margins": 13.501226782798767, "rewards_train/rejected": -13.82862663269043, "step": 1734 }, { "epoch": 0.85, "learning_rate": 8.57180636995048e-07, "loss": 0.0001, "step": 1735 }, { "epoch": 0.85, "logps_train/chosen": -67.55734252929688, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -262.9955749511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.27438652515411377, "rewards_train/margins": 13.39382255077362, "rewards_train/rejected": -13.668209075927734, "step": 1735 }, { "epoch": 0.85, "learning_rate": 8.569906729743555e-07, "loss": 0.0001, "step": 1736 }, { "epoch": 0.85, "logps_train/chosen": -65.80012512207031, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -255.4142303466797, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15008141100406647, "rewards_train/margins": 13.083968386054039, "rewards_train/rejected": -13.234049797058105, "step": 1736 }, { "epoch": 0.86, "learning_rate": 8.568006037818018e-07, "loss": 0.0002, "step": 1737 }, { "epoch": 0.86, "logps_train/chosen": -66.71035766601562, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -118.25, "logps_train/rejected": -245.25466918945312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2922274172306061, "rewards_train/margins": 12.411021560430527, "rewards_train/rejected": -12.703248977661133, "step": 1737 }, { "epoch": 0.86, "learning_rate": 8.566104294733826e-07, "loss": 0.0001, "step": 1738 }, { "epoch": 0.86, "logps_train/chosen": -66.229248046875, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -260.6846618652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12907665967941284, "rewards_train/margins": 13.441245377063751, "rewards_train/rejected": -13.570322036743164, "step": 1738 }, { "epoch": 0.86, "learning_rate": 8.564201501051246e-07, "loss": 0.0, "step": 1739 }, { "epoch": 0.86, "logps_train/chosen": -67.5850830078125, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -256.987548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23385068774223328, "rewards_train/margins": 13.351476460695267, "rewards_train/rejected": -13.5853271484375, "step": 1739 }, { "epoch": 0.86, "learning_rate": 8.562297657330852e-07, "loss": 0.0002, "step": 1740 }, { "epoch": 0.86, "logps_train/chosen": -68.28495025634766, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -264.8974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32092714309692383, "rewards_train/margins": 13.466474056243896, "rewards_train/rejected": -13.78740119934082, "step": 1740 }, { "epoch": 0.86, "learning_rate": 8.560392764133532e-07, "loss": 0.0023, "step": 1741 }, { "epoch": 0.86, "logps_train/chosen": -69.89515686035156, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -265.69964599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34683936834335327, "rewards_train/margins": 13.377226412296295, "rewards_train/rejected": -13.724065780639648, "step": 1741 }, { "epoch": 0.86, "learning_rate": 8.55848682202048e-07, "loss": 0.0001, "step": 1742 }, { "epoch": 0.86, "logps_train/chosen": -62.535499572753906, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -251.18472290039062, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.016294226050376892, "rewards_train/margins": 13.018293634057045, "rewards_train/rejected": -13.034587860107422, "step": 1742 }, { "epoch": 0.86, "learning_rate": 8.556579831553197e-07, "loss": 0.0002, "step": 1743 }, { "epoch": 0.86, "logps_train/chosen": -75.88488006591797, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -269.49810791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9388294816017151, "rewards_train/margins": 13.339593827724457, "rewards_train/rejected": -14.278423309326172, "step": 1743 }, { "epoch": 0.86, "learning_rate": 8.554671793293498e-07, "loss": 0.0, "step": 1744 }, { "epoch": 0.86, "logps_train/chosen": -68.50466918945312, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -272.1484069824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30159059166908264, "rewards_train/margins": 13.949478477239609, "rewards_train/rejected": -14.251069068908691, "step": 1744 }, { "epoch": 0.86, "learning_rate": 8.552762707803501e-07, "loss": 0.0001, "step": 1745 }, { "epoch": 0.86, "logps_train/chosen": -68.96019744873047, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -267.2872314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4515857994556427, "rewards_train/margins": 13.595983475446701, "rewards_train/rejected": -14.047569274902344, "step": 1745 }, { "epoch": 0.86, "learning_rate": 8.550852575645637e-07, "loss": 0.0005, "step": 1746 }, { "epoch": 0.86, "logps_train/chosen": -67.59965515136719, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -253.2520751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44080084562301636, "rewards_train/margins": 12.79622346162796, "rewards_train/rejected": -13.237024307250977, "step": 1746 }, { "epoch": 0.86, "learning_rate": 8.548941397382645e-07, "loss": 0.0003, "step": 1747 }, { "epoch": 0.86, "logps_train/chosen": -69.49473571777344, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -267.147216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4841904640197754, "rewards_train/margins": 13.579844951629639, "rewards_train/rejected": -14.064035415649414, "step": 1747 }, { "epoch": 0.86, "learning_rate": 8.547029173577569e-07, "loss": 0.0002, "step": 1748 }, { "epoch": 0.86, "logps_train/chosen": -64.84026336669922, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -263.3089599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2152271568775177, "rewards_train/margins": 13.853658646345139, "rewards_train/rejected": -14.068885803222656, "step": 1748 }, { "epoch": 0.86, "learning_rate": 8.545115904793763e-07, "loss": 0.0001, "step": 1749 }, { "epoch": 0.86, "logps_train/chosen": -68.77330017089844, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -270.11370849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1626332700252533, "rewards_train/margins": 13.859236389398575, "rewards_train/rejected": -14.021869659423828, "step": 1749 }, { "epoch": 0.86, "learning_rate": 8.543201591594893e-07, "loss": 0.0, "step": 1750 }, { "epoch": 0.86, "logps_train/chosen": -66.6455078125, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -262.3166809082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3562498986721039, "rewards_train/margins": 13.245436578989029, "rewards_train/rejected": -13.601686477661133, "step": 1750 }, { "epoch": 0.86, "learning_rate": 8.541286234544922e-07, "loss": 0.0009, "step": 1751 }, { "epoch": 0.86, "logps_train/chosen": -70.24008178710938, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -254.93128967285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5411468744277954, "rewards_train/margins": 12.622537970542908, "rewards_train/rejected": -13.163684844970703, "step": 1751 }, { "epoch": 0.86, "learning_rate": 8.539369834208134e-07, "loss": 0.0004, "step": 1752 }, { "epoch": 0.86, "logps_train/chosen": -68.86663055419922, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -276.3409729003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09818647801876068, "rewards_train/margins": 14.468431487679482, "rewards_train/rejected": -14.566617965698242, "step": 1752 }, { "epoch": 0.86, "learning_rate": 8.537452391149107e-07, "loss": 0.0003, "step": 1753 }, { "epoch": 0.86, "logps_train/chosen": -68.06892395019531, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -266.16949462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23999793827533722, "rewards_train/margins": 13.494237825274467, "rewards_train/rejected": -13.734235763549805, "step": 1753 }, { "epoch": 0.86, "learning_rate": 8.535533905932737e-07, "loss": 0.0002, "step": 1754 }, { "epoch": 0.86, "logps_train/chosen": -64.28136444091797, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -266.60723876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1713489592075348, "rewards_train/margins": 13.803729623556137, "rewards_train/rejected": -13.975078582763672, "step": 1754 }, { "epoch": 0.86, "learning_rate": 8.533614379124221e-07, "loss": 0.0006, "step": 1755 }, { "epoch": 0.86, "logps_train/chosen": -68.91371154785156, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -258.3249206542969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.522034764289856, "rewards_train/margins": 13.275984644889832, "rewards_train/rejected": -13.798019409179688, "step": 1755 }, { "epoch": 0.86, "learning_rate": 8.531693811289066e-07, "loss": 0.0004, "step": 1756 }, { "epoch": 0.86, "logps_train/chosen": -69.13123321533203, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -264.14892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5823131799697876, "rewards_train/margins": 13.317443251609802, "rewards_train/rejected": -13.89975643157959, "step": 1756 }, { "epoch": 0.87, "learning_rate": 8.529772202993082e-07, "loss": 0.0001, "step": 1757 }, { "epoch": 0.87, "logps_train/chosen": -67.80667114257812, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -269.52459716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2879909574985504, "rewards_train/margins": 13.675309747457504, "rewards_train/rejected": -13.963300704956055, "step": 1757 }, { "epoch": 0.87, "learning_rate": 8.527849554802388e-07, "loss": 0.0001, "step": 1758 }, { "epoch": 0.87, "logps_train/chosen": -68.86651611328125, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -266.5604248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2331358790397644, "rewards_train/margins": 13.75596171617508, "rewards_train/rejected": -13.989097595214844, "step": 1758 }, { "epoch": 0.87, "learning_rate": 8.525925867283412e-07, "loss": 0.0002, "step": 1759 }, { "epoch": 0.87, "logps_train/chosen": -65.58876037597656, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -267.2762145996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.011875264346599579, "rewards_train/margins": 14.15668498724699, "rewards_train/rejected": -14.14480972290039, "step": 1759 }, { "epoch": 0.87, "learning_rate": 8.524001141002884e-07, "loss": 0.0, "step": 1760 }, { "epoch": 0.87, "logps_train/chosen": -65.60514831542969, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -269.294677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2234056293964386, "rewards_train/margins": 13.90625974535942, "rewards_train/rejected": -14.12966537475586, "step": 1760 }, { "epoch": 0.87, "learning_rate": 8.522075376527839e-07, "loss": 0.0, "step": 1761 }, { "epoch": 0.87, "logps_train/chosen": -72.46180725097656, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -261.91802978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8690322637557983, "rewards_train/margins": 13.011346459388733, "rewards_train/rejected": -13.880378723144531, "step": 1761 }, { "epoch": 0.87, "learning_rate": 8.520148574425623e-07, "loss": 0.0008, "step": 1762 }, { "epoch": 0.87, "logps_train/chosen": -64.86673736572266, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -252.17681884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18210801482200623, "rewards_train/margins": 12.684598833322525, "rewards_train/rejected": -12.866706848144531, "step": 1762 }, { "epoch": 0.87, "learning_rate": 8.518220735263884e-07, "loss": 0.0004, "step": 1763 }, { "epoch": 0.87, "logps_train/chosen": -65.78463745117188, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -260.0677490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2364477813243866, "rewards_train/margins": 13.232145816087723, "rewards_train/rejected": -13.46859359741211, "step": 1763 }, { "epoch": 0.87, "learning_rate": 8.516291859610574e-07, "loss": 0.0002, "step": 1764 }, { "epoch": 0.87, "logps_train/chosen": -68.90638732910156, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -266.22332763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4452284276485443, "rewards_train/margins": 13.433059841394424, "rewards_train/rejected": -13.878288269042969, "step": 1764 }, { "epoch": 0.87, "learning_rate": 8.514361948033956e-07, "loss": 0.0007, "step": 1765 }, { "epoch": 0.87, "logps_train/chosen": -70.00997924804688, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -266.1181945800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.578781247138977, "rewards_train/margins": 13.590265154838562, "rewards_train/rejected": -14.169046401977539, "step": 1765 }, { "epoch": 0.87, "learning_rate": 8.512431001102596e-07, "loss": 0.0, "step": 1766 }, { "epoch": 0.87, "logps_train/chosen": -69.90821075439453, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -259.7886047363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8649911880493164, "rewards_train/margins": 12.940821647644043, "rewards_train/rejected": -13.80581283569336, "step": 1766 }, { "epoch": 0.87, "learning_rate": 8.510499019385361e-07, "loss": 0.0004, "step": 1767 }, { "epoch": 0.87, "logps_train/chosen": -66.35073852539062, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -268.7477722167969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21690955758094788, "rewards_train/margins": 13.97002586722374, "rewards_train/rejected": -14.186935424804688, "step": 1767 }, { "epoch": 0.87, "learning_rate": 8.508566003451425e-07, "loss": 0.0004, "step": 1768 }, { "epoch": 0.87, "logps_train/chosen": -62.04189682006836, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -261.3500061035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12251948565244675, "rewards_train/margins": 13.536427490413189, "rewards_train/rejected": -13.413908004760742, "step": 1768 }, { "epoch": 0.87, "learning_rate": 8.506631953870271e-07, "loss": 0.0002, "step": 1769 }, { "epoch": 0.87, "logps_train/chosen": -66.9225845336914, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -261.7783508300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38273680210113525, "rewards_train/margins": 13.301495909690857, "rewards_train/rejected": -13.684232711791992, "step": 1769 }, { "epoch": 0.87, "learning_rate": 8.504696871211683e-07, "loss": 0.0004, "step": 1770 }, { "epoch": 0.87, "logps_train/chosen": -66.98938751220703, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -255.53477478027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29376283288002014, "rewards_train/margins": 13.192235320806503, "rewards_train/rejected": -13.485998153686523, "step": 1770 }, { "epoch": 0.87, "learning_rate": 8.502760756045746e-07, "loss": 0.0001, "step": 1771 }, { "epoch": 0.87, "logps_train/chosen": -68.08500671386719, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -271.1971435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16631346940994263, "rewards_train/margins": 14.284993827342987, "rewards_train/rejected": -14.45130729675293, "step": 1771 }, { "epoch": 0.87, "learning_rate": 8.500823608942855e-07, "loss": 0.0, "step": 1772 }, { "epoch": 0.87, "logps_train/chosen": -68.24093627929688, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -266.21527099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3748258352279663, "rewards_train/margins": 13.520822167396545, "rewards_train/rejected": -13.895648002624512, "step": 1772 }, { "epoch": 0.87, "learning_rate": 8.498885430473706e-07, "loss": 0.0004, "step": 1773 }, { "epoch": 0.87, "logps_train/chosen": -64.95022583007812, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -255.2075958251953, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1946314126253128, "rewards_train/margins": 13.049665614962578, "rewards_train/rejected": -13.24429702758789, "step": 1773 }, { "epoch": 0.87, "learning_rate": 8.496946221209299e-07, "loss": 0.0001, "step": 1774 }, { "epoch": 0.87, "logps_train/chosen": -66.3644790649414, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -118.6875, "logps_train/rejected": -247.49803161621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45885977149009705, "rewards_train/margins": 12.421948105096817, "rewards_train/rejected": -12.880807876586914, "step": 1774 }, { "epoch": 0.87, "learning_rate": 8.495005981720939e-07, "loss": 0.0011, "step": 1775 }, { "epoch": 0.87, "logps_train/chosen": -69.20033264160156, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -119.375, "logps_train/rejected": -257.13458251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6301403641700745, "rewards_train/margins": 13.14322942495346, "rewards_train/rejected": -13.773369789123535, "step": 1775 }, { "epoch": 0.87, "learning_rate": 8.493064712580233e-07, "loss": 0.0003, "step": 1776 }, { "epoch": 0.87, "logps_train/chosen": -66.31275939941406, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -265.6539306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2120865136384964, "rewards_train/margins": 13.741295978426933, "rewards_train/rejected": -13.95338249206543, "step": 1776 }, { "epoch": 0.87, "learning_rate": 8.491122414359093e-07, "loss": 0.0001, "step": 1777 }, { "epoch": 0.87, "logps_train/chosen": -70.12886047363281, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -262.120849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6459909677505493, "rewards_train/margins": 13.276349425315857, "rewards_train/rejected": -13.922340393066406, "step": 1777 }, { "epoch": 0.88, "learning_rate": 8.489179087629732e-07, "loss": 0.0003, "step": 1778 }, { "epoch": 0.88, "logps_train/chosen": -70.77316284179688, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -268.6342468261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6283912658691406, "rewards_train/margins": 13.555198669433594, "rewards_train/rejected": -14.183589935302734, "step": 1778 }, { "epoch": 0.88, "learning_rate": 8.487234732964668e-07, "loss": 0.0, "step": 1779 }, { "epoch": 0.88, "logps_train/chosen": -68.56964111328125, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -262.2734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4934147000312805, "rewards_train/margins": 13.407754123210907, "rewards_train/rejected": -13.901168823242188, "step": 1779 }, { "epoch": 0.88, "learning_rate": 8.48528935093672e-07, "loss": 0.0002, "step": 1780 }, { "epoch": 0.88, "logps_train/chosen": -65.0457763671875, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -257.187744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11356183141469955, "rewards_train/margins": 13.232116498053074, "rewards_train/rejected": -13.345678329467773, "step": 1780 }, { "epoch": 0.88, "learning_rate": 8.483342942119013e-07, "loss": 0.0006, "step": 1781 }, { "epoch": 0.88, "logps_train/chosen": -68.24098205566406, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -265.34368896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46233102679252625, "rewards_train/margins": 13.580731183290482, "rewards_train/rejected": -14.043062210083008, "step": 1781 }, { "epoch": 0.88, "learning_rate": 8.48139550708497e-07, "loss": 0.0001, "step": 1782 }, { "epoch": 0.88, "logps_train/chosen": -64.8314208984375, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -264.26934814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1701045036315918, "rewards_train/margins": 13.68749475479126, "rewards_train/rejected": -13.857599258422852, "step": 1782 }, { "epoch": 0.88, "learning_rate": 8.479447046408317e-07, "loss": 0.0003, "step": 1783 }, { "epoch": 0.88, "logps_train/chosen": -68.12831115722656, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -272.92193603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2186412215232849, "rewards_train/margins": 14.061491072177887, "rewards_train/rejected": -14.280132293701172, "step": 1783 }, { "epoch": 0.88, "learning_rate": 8.477497560663087e-07, "loss": 0.0, "step": 1784 }, { "epoch": 0.88, "logps_train/chosen": -67.6736068725586, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -263.485107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45954829454421997, "rewards_train/margins": 13.469872176647186, "rewards_train/rejected": -13.929420471191406, "step": 1784 }, { "epoch": 0.88, "learning_rate": 8.47554705042361e-07, "loss": 0.0003, "step": 1785 }, { "epoch": 0.88, "logps_train/chosen": -68.38152313232422, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -266.152099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4826836884021759, "rewards_train/margins": 13.500590771436691, "rewards_train/rejected": -13.983274459838867, "step": 1785 }, { "epoch": 0.88, "learning_rate": 8.47359551626452e-07, "loss": 0.0001, "step": 1786 }, { "epoch": 0.88, "logps_train/chosen": -71.83332824707031, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -272.6778869628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5617024898529053, "rewards_train/margins": 13.923959016799927, "rewards_train/rejected": -14.485661506652832, "step": 1786 }, { "epoch": 0.88, "learning_rate": 8.471642958760752e-07, "loss": 0.0004, "step": 1787 }, { "epoch": 0.88, "logps_train/chosen": -68.40398406982422, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -265.46881103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2929866313934326, "rewards_train/margins": 13.676452875137329, "rewards_train/rejected": -13.969439506530762, "step": 1787 }, { "epoch": 0.88, "learning_rate": 8.469689378487543e-07, "loss": 0.0001, "step": 1788 }, { "epoch": 0.88, "logps_train/chosen": -67.14180755615234, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -266.553466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32965946197509766, "rewards_train/margins": 13.810843467712402, "rewards_train/rejected": -14.1405029296875, "step": 1788 }, { "epoch": 0.88, "learning_rate": 8.46773477602043e-07, "loss": 0.0002, "step": 1789 }, { "epoch": 0.88, "logps_train/chosen": -69.56817626953125, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -266.23431396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5263983011245728, "rewards_train/margins": 13.649768233299255, "rewards_train/rejected": -14.176166534423828, "step": 1789 }, { "epoch": 0.88, "learning_rate": 8.465779151935251e-07, "loss": 0.0001, "step": 1790 }, { "epoch": 0.88, "logps_train/chosen": -66.86258697509766, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -264.4004821777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3331337571144104, "rewards_train/margins": 13.4978808760643, "rewards_train/rejected": -13.831014633178711, "step": 1790 }, { "epoch": 0.88, "learning_rate": 8.46382250680815e-07, "loss": 0.0002, "step": 1791 }, { "epoch": 0.88, "logps_train/chosen": -67.94058227539062, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -271.93853759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5066069960594177, "rewards_train/margins": 13.82069593667984, "rewards_train/rejected": -14.327302932739258, "step": 1791 }, { "epoch": 0.88, "learning_rate": 8.461864841215565e-07, "loss": 0.0, "step": 1792 }, { "epoch": 0.88, "logps_train/chosen": -71.57164001464844, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -272.82757568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5946638584136963, "rewards_train/margins": 13.785459280014038, "rewards_train/rejected": -14.380123138427734, "step": 1792 }, { "epoch": 0.88, "learning_rate": 8.459906155734238e-07, "loss": 0.0, "step": 1793 }, { "epoch": 0.88, "logps_train/chosen": -66.17691040039062, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -261.71380615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26842373609542847, "rewards_train/margins": 13.56916743516922, "rewards_train/rejected": -13.837591171264648, "step": 1793 }, { "epoch": 0.88, "learning_rate": 8.457946450941209e-07, "loss": 0.0003, "step": 1794 }, { "epoch": 0.88, "logps_train/chosen": -63.74036407470703, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -260.76690673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.016077488660812378, "rewards_train/margins": 13.434928447008133, "rewards_train/rejected": -13.451005935668945, "step": 1794 }, { "epoch": 0.88, "learning_rate": 8.455985727413823e-07, "loss": 0.0004, "step": 1795 }, { "epoch": 0.88, "logps_train/chosen": -72.06825256347656, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -271.6756286621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.747742772102356, "rewards_train/margins": 13.693158984184265, "rewards_train/rejected": -14.440901756286621, "step": 1795 }, { "epoch": 0.88, "learning_rate": 8.454023985729724e-07, "loss": 0.0001, "step": 1796 }, { "epoch": 0.88, "logps_train/chosen": -66.63544464111328, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -269.88433837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2372746616601944, "rewards_train/margins": 13.921668514609337, "rewards_train/rejected": -14.158943176269531, "step": 1796 }, { "epoch": 0.88, "learning_rate": 8.452061226466849e-07, "loss": 0.0001, "step": 1797 }, { "epoch": 0.88, "logps_train/chosen": -67.39105987548828, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -262.0298767089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2917429804801941, "rewards_train/margins": 13.590297043323517, "rewards_train/rejected": -13.882040023803711, "step": 1797 }, { "epoch": 0.89, "learning_rate": 8.450097450203444e-07, "loss": 0.0002, "step": 1798 }, { "epoch": 0.89, "logps_train/chosen": -73.72822570800781, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -133.125, "logps_train/rejected": -280.93499755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5154981017112732, "rewards_train/margins": 14.260619223117828, "rewards_train/rejected": -14.776117324829102, "step": 1798 }, { "epoch": 0.89, "learning_rate": 8.448132657518049e-07, "loss": 0.0001, "step": 1799 }, { "epoch": 0.89, "logps_train/chosen": -66.888427734375, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -255.9225311279297, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3792227804660797, "rewards_train/margins": 12.912153333425522, "rewards_train/rejected": -13.291376113891602, "step": 1799 }, { "epoch": 0.89, "learning_rate": 8.446166848989507e-07, "loss": 0.0006, "step": 1800 }, { "epoch": 0.89, "logps_train/chosen": -70.0891342163086, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -272.1455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46176984906196594, "rewards_train/margins": 13.988277643918991, "rewards_train/rejected": -14.450047492980957, "step": 1800 }, { "epoch": 0.89, "learning_rate": 8.444200025196957e-07, "loss": 0.0001, "step": 1801 }, { "epoch": 0.89, "logps_train/chosen": -72.03190612792969, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -272.9035339355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4953784942626953, "rewards_train/margins": 13.921293258666992, "rewards_train/rejected": -14.416671752929688, "step": 1801 }, { "epoch": 0.89, "learning_rate": 8.442232186719839e-07, "loss": 0.0002, "step": 1802 }, { "epoch": 0.89, "logps_train/chosen": -68.89669799804688, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -272.2379150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15475735068321228, "rewards_train/margins": 14.109368473291397, "rewards_train/rejected": -14.26412582397461, "step": 1802 }, { "epoch": 0.89, "learning_rate": 8.440263334137892e-07, "loss": 0.0, "step": 1803 }, { "epoch": 0.89, "logps_train/chosen": -68.33747863769531, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -267.8548583984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4065505266189575, "rewards_train/margins": 13.757742762565613, "rewards_train/rejected": -14.16429328918457, "step": 1803 }, { "epoch": 0.89, "learning_rate": 8.43829346803115e-07, "loss": 0.0, "step": 1804 }, { "epoch": 0.89, "logps_train/chosen": -73.01261901855469, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -260.37872314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8107348680496216, "rewards_train/margins": 12.940709948539734, "rewards_train/rejected": -13.751444816589355, "step": 1804 }, { "epoch": 0.89, "learning_rate": 8.436322588979954e-07, "loss": 0.0002, "step": 1805 }, { "epoch": 0.89, "logps_train/chosen": -70.91474914550781, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -275.0940246582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6014609336853027, "rewards_train/margins": 14.028058528900146, "rewards_train/rejected": -14.62951946258545, "step": 1805 }, { "epoch": 0.89, "learning_rate": 8.434350697564936e-07, "loss": 0.0, "step": 1806 }, { "epoch": 0.89, "logps_train/chosen": -71.00487518310547, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -274.04718017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3166983723640442, "rewards_train/margins": 14.056669890880585, "rewards_train/rejected": -14.373368263244629, "step": 1806 }, { "epoch": 0.89, "learning_rate": 8.432377794367027e-07, "loss": 0.0001, "step": 1807 }, { "epoch": 0.89, "logps_train/chosen": -67.62711334228516, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -271.0042419433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16764241456985474, "rewards_train/margins": 14.07897299528122, "rewards_train/rejected": -14.246615409851074, "step": 1807 }, { "epoch": 0.89, "learning_rate": 8.430403879967459e-07, "loss": 0.0, "step": 1808 }, { "epoch": 0.89, "logps_train/chosen": -70.6846694946289, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -269.77069091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6384373903274536, "rewards_train/margins": 13.460020899772644, "rewards_train/rejected": -14.098458290100098, "step": 1808 }, { "epoch": 0.89, "learning_rate": 8.428428954947762e-07, "loss": 0.0003, "step": 1809 }, { "epoch": 0.89, "logps_train/chosen": -71.51506042480469, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -269.97943115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5419847965240479, "rewards_train/margins": 13.812988996505737, "rewards_train/rejected": -14.354973793029785, "step": 1809 }, { "epoch": 0.89, "learning_rate": 8.426453019889759e-07, "loss": 0.0002, "step": 1810 }, { "epoch": 0.89, "logps_train/chosen": -71.6019515991211, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -263.162841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7278708219528198, "rewards_train/margins": 13.16360867023468, "rewards_train/rejected": -13.8914794921875, "step": 1810 }, { "epoch": 0.89, "learning_rate": 8.424476075375576e-07, "loss": 0.0004, "step": 1811 }, { "epoch": 0.89, "logps_train/chosen": -65.09092712402344, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -258.2159729003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1786240190267563, "rewards_train/margins": 13.437701359391212, "rewards_train/rejected": -13.616325378417969, "step": 1811 }, { "epoch": 0.89, "learning_rate": 8.422498121987634e-07, "loss": 0.0001, "step": 1812 }, { "epoch": 0.89, "logps_train/chosen": -64.71759796142578, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -258.2425537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12478680908679962, "rewards_train/margins": 13.083062693476677, "rewards_train/rejected": -13.207849502563477, "step": 1812 }, { "epoch": 0.89, "learning_rate": 8.42051916030865e-07, "loss": 0.0001, "step": 1813 }, { "epoch": 0.89, "logps_train/chosen": -69.85362243652344, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -271.5792541503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6643180251121521, "rewards_train/margins": 13.634330809116364, "rewards_train/rejected": -14.298648834228516, "step": 1813 }, { "epoch": 0.89, "learning_rate": 8.418539190921642e-07, "loss": 0.0002, "step": 1814 }, { "epoch": 0.89, "logps_train/chosen": -70.22811889648438, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -268.197998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5451754927635193, "rewards_train/margins": 13.491956770420074, "rewards_train/rejected": -14.037132263183594, "step": 1814 }, { "epoch": 0.89, "learning_rate": 8.416558214409919e-07, "loss": 0.0002, "step": 1815 }, { "epoch": 0.89, "logps_train/chosen": -69.22737121582031, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -268.66241455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7107252478599548, "rewards_train/margins": 13.631296694278717, "rewards_train/rejected": -14.342021942138672, "step": 1815 }, { "epoch": 0.89, "learning_rate": 8.414576231357093e-07, "loss": 0.0001, "step": 1816 }, { "epoch": 0.89, "logps_train/chosen": -69.96597290039062, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -271.73956298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3603184223175049, "rewards_train/margins": 13.962660551071167, "rewards_train/rejected": -14.322978973388672, "step": 1816 }, { "epoch": 0.89, "learning_rate": 8.412593242347069e-07, "loss": 0.0, "step": 1817 }, { "epoch": 0.89, "logps_train/chosen": -74.37701416015625, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -278.7275085449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6853087544441223, "rewards_train/margins": 14.367814719676971, "rewards_train/rejected": -15.053123474121094, "step": 1817 }, { "epoch": 0.9, "learning_rate": 8.410609247964048e-07, "loss": 0.0001, "step": 1818 }, { "epoch": 0.9, "logps_train/chosen": -67.0068359375, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -267.516845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.25358909368515015, "rewards_train/margins": 13.93217808008194, "rewards_train/rejected": -14.18576717376709, "step": 1818 }, { "epoch": 0.9, "learning_rate": 8.408624248792528e-07, "loss": 0.0002, "step": 1819 }, { "epoch": 0.9, "logps_train/chosen": -69.66943359375, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -267.551513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3740226924419403, "rewards_train/margins": 13.563794881105423, "rewards_train/rejected": -13.937817573547363, "step": 1819 }, { "epoch": 0.9, "learning_rate": 8.406638245417304e-07, "loss": 0.0005, "step": 1820 }, { "epoch": 0.9, "logps_train/chosen": -72.32209777832031, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -271.56414794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.64300137758255, "rewards_train/margins": 14.061119258403778, "rewards_train/rejected": -14.704120635986328, "step": 1820 }, { "epoch": 0.9, "learning_rate": 8.404651238423468e-07, "loss": 0.0001, "step": 1821 }, { "epoch": 0.9, "logps_train/chosen": -70.9705810546875, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -271.9715576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6951043605804443, "rewards_train/margins": 13.818945169448853, "rewards_train/rejected": -14.514049530029297, "step": 1821 }, { "epoch": 0.9, "learning_rate": 8.402663228396401e-07, "loss": 0.0007, "step": 1822 }, { "epoch": 0.9, "logps_train/chosen": -64.1184310913086, "logps_train/ref_chosen": -61.40625, "logps_train/ref_rejected": -116.75, "logps_train/rejected": -251.67050170898438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26975324749946594, "rewards_train/margins": 13.219269961118698, "rewards_train/rejected": -13.489023208618164, "step": 1822 }, { "epoch": 0.9, "learning_rate": 8.400674215921786e-07, "loss": 0.0001, "step": 1823 }, { "epoch": 0.9, "logps_train/chosen": -73.31594848632812, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -271.07476806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8145051002502441, "rewards_train/margins": 13.436134815216064, "rewards_train/rejected": -14.250639915466309, "step": 1823 }, { "epoch": 0.9, "learning_rate": 8.398684201585602e-07, "loss": 0.0001, "step": 1824 }, { "epoch": 0.9, "logps_train/chosen": -67.21986389160156, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -257.74609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26627373695373535, "rewards_train/margins": 13.246810674667358, "rewards_train/rejected": -13.513084411621094, "step": 1824 }, { "epoch": 0.9, "learning_rate": 8.396693185974118e-07, "loss": 0.0001, "step": 1825 }, { "epoch": 0.9, "logps_train/chosen": -68.9330825805664, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -261.65087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5425270795822144, "rewards_train/margins": 13.284430623054504, "rewards_train/rejected": -13.826957702636719, "step": 1825 }, { "epoch": 0.9, "learning_rate": 8.3947011696739e-07, "loss": 0.0005, "step": 1826 }, { "epoch": 0.9, "logps_train/chosen": -66.17716217041016, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -266.9318542480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15082140266895294, "rewards_train/margins": 13.769121453166008, "rewards_train/rejected": -13.919942855834961, "step": 1826 }, { "epoch": 0.9, "learning_rate": 8.392708153271813e-07, "loss": 0.0001, "step": 1827 }, { "epoch": 0.9, "logps_train/chosen": -75.5296859741211, "logps_train/ref_chosen": -69.4375, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -281.10467529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6107323169708252, "rewards_train/margins": 14.275225400924683, "rewards_train/rejected": -14.885957717895508, "step": 1827 }, { "epoch": 0.9, "learning_rate": 8.390714137355008e-07, "loss": 0.0001, "step": 1828 }, { "epoch": 0.9, "logps_train/chosen": -66.92826843261719, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -261.8515930175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23191380500793457, "rewards_train/margins": 13.49606966972351, "rewards_train/rejected": -13.727983474731445, "step": 1828 }, { "epoch": 0.9, "learning_rate": 8.388719122510941e-07, "loss": 0.0, "step": 1829 }, { "epoch": 0.9, "logps_train/chosen": -65.69127655029297, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -256.326416015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4754754304885864, "rewards_train/margins": 13.091490626335144, "rewards_train/rejected": -13.56696605682373, "step": 1829 }, { "epoch": 0.9, "learning_rate": 8.386723109327354e-07, "loss": 0.0002, "step": 1830 }, { "epoch": 0.9, "logps_train/chosen": -64.9903564453125, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -262.1202697753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3236454427242279, "rewards_train/margins": 13.394143253564835, "rewards_train/rejected": -13.717788696289062, "step": 1830 }, { "epoch": 0.9, "learning_rate": 8.384726098392285e-07, "loss": 0.0, "step": 1831 }, { "epoch": 0.9, "logps_train/chosen": -69.75401306152344, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -257.1343994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5031356811523438, "rewards_train/margins": 13.16889762878418, "rewards_train/rejected": -13.672033309936523, "step": 1831 }, { "epoch": 0.9, "learning_rate": 8.382728090294068e-07, "loss": 0.0003, "step": 1832 }, { "epoch": 0.9, "logps_train/chosen": -68.91921997070312, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -259.6290283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5312773585319519, "rewards_train/margins": 13.290999710559845, "rewards_train/rejected": -13.822277069091797, "step": 1832 }, { "epoch": 0.9, "learning_rate": 8.380729085621329e-07, "loss": 0.0002, "step": 1833 }, { "epoch": 0.9, "logps_train/chosen": -67.5821304321289, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -268.4890441894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1803814023733139, "rewards_train/margins": 14.047136679291725, "rewards_train/rejected": -14.227518081665039, "step": 1833 }, { "epoch": 0.9, "learning_rate": 8.378729084962992e-07, "loss": 0.0001, "step": 1834 }, { "epoch": 0.9, "logps_train/chosen": -70.06034851074219, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -267.5252380371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40300804376602173, "rewards_train/margins": 13.624612271785736, "rewards_train/rejected": -14.027620315551758, "step": 1834 }, { "epoch": 0.9, "learning_rate": 8.376728088908266e-07, "loss": 0.0001, "step": 1835 }, { "epoch": 0.9, "logps_train/chosen": -65.97395324707031, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -254.61135864257812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3937332332134247, "rewards_train/margins": 13.087080746889114, "rewards_train/rejected": -13.480813980102539, "step": 1835 }, { "epoch": 0.9, "learning_rate": 8.374726098046659e-07, "loss": 0.0001, "step": 1836 }, { "epoch": 0.9, "logps_train/chosen": -64.39569091796875, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -262.69036865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12501835823059082, "rewards_train/margins": 13.862963438034058, "rewards_train/rejected": -13.987981796264648, "step": 1836 }, { "epoch": 0.9, "learning_rate": 8.372723112967973e-07, "loss": 0.0001, "step": 1837 }, { "epoch": 0.9, "logps_train/chosen": -68.30194854736328, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -270.5714416503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3990424871444702, "rewards_train/margins": 14.056686043739319, "rewards_train/rejected": -14.455728530883789, "step": 1837 }, { "epoch": 0.9, "learning_rate": 8.370719134262298e-07, "loss": 0.0001, "step": 1838 }, { "epoch": 0.9, "logps_train/chosen": -66.84381866455078, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -260.4574279785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30259472131729126, "rewards_train/margins": 13.253647267818451, "rewards_train/rejected": -13.556241989135742, "step": 1838 }, { "epoch": 0.91, "learning_rate": 8.368714162520023e-07, "loss": 0.0001, "step": 1839 }, { "epoch": 0.91, "logps_train/chosen": -71.99496459960938, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -272.83172607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4826756417751312, "rewards_train/margins": 13.78311339020729, "rewards_train/rejected": -14.265789031982422, "step": 1839 }, { "epoch": 0.91, "learning_rate": 8.366708198331825e-07, "loss": 0.0005, "step": 1840 }, { "epoch": 0.91, "logps_train/chosen": -72.893798828125, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -269.640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5480219125747681, "rewards_train/margins": 13.605395674705505, "rewards_train/rejected": -14.153417587280273, "step": 1840 }, { "epoch": 0.91, "learning_rate": 8.364701242288672e-07, "loss": 0.0, "step": 1841 }, { "epoch": 0.91, "logps_train/chosen": -66.78074645996094, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -259.05419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4500468373298645, "rewards_train/margins": 13.222757995128632, "rewards_train/rejected": -13.672804832458496, "step": 1841 }, { "epoch": 0.91, "learning_rate": 8.362693294981828e-07, "loss": 0.0001, "step": 1842 }, { "epoch": 0.91, "logps_train/chosen": -69.34305572509766, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -269.7205810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5529086589813232, "rewards_train/margins": 13.603232622146606, "rewards_train/rejected": -14.15614128112793, "step": 1842 }, { "epoch": 0.91, "learning_rate": 8.360684357002852e-07, "loss": 0.0001, "step": 1843 }, { "epoch": 0.91, "logps_train/chosen": -68.19279479980469, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -263.40020751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4640549421310425, "rewards_train/margins": 13.470154881477356, "rewards_train/rejected": -13.934209823608398, "step": 1843 }, { "epoch": 0.91, "learning_rate": 8.358674428943585e-07, "loss": 0.0001, "step": 1844 }, { "epoch": 0.91, "logps_train/chosen": -68.6237564086914, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -272.3193054199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4145244359970093, "rewards_train/margins": 13.928049683570862, "rewards_train/rejected": -14.342574119567871, "step": 1844 }, { "epoch": 0.91, "learning_rate": 8.356663511396168e-07, "loss": 0.0, "step": 1845 }, { "epoch": 0.91, "logps_train/chosen": -67.03670501708984, "logps_train/ref_chosen": -61.59375, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -261.1767272949219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5453206300735474, "rewards_train/margins": 13.34197986125946, "rewards_train/rejected": -13.887300491333008, "step": 1845 }, { "epoch": 0.91, "learning_rate": 8.354651604953032e-07, "loss": 0.0001, "step": 1846 }, { "epoch": 0.91, "logps_train/chosen": -71.93861389160156, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -267.045654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5171770453453064, "rewards_train/margins": 13.408580362796783, "rewards_train/rejected": -13.92575740814209, "step": 1846 }, { "epoch": 0.91, "learning_rate": 8.352638710206894e-07, "loss": 0.0001, "step": 1847 }, { "epoch": 0.91, "logps_train/chosen": -72.76221466064453, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -280.5289306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6159188747406006, "rewards_train/margins": 14.218615770339966, "rewards_train/rejected": -14.834534645080566, "step": 1847 }, { "epoch": 0.91, "learning_rate": 8.350624827750771e-07, "loss": 0.0002, "step": 1848 }, { "epoch": 0.91, "logps_train/chosen": -68.89056396484375, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -263.8060607910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45687854290008545, "rewards_train/margins": 13.64096462726593, "rewards_train/rejected": -14.097843170166016, "step": 1848 }, { "epoch": 0.91, "learning_rate": 8.348609958177963e-07, "loss": 0.0003, "step": 1849 }, { "epoch": 0.91, "logps_train/chosen": -70.61148834228516, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -135.0, "logps_train/rejected": -283.9881286621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4092447757720947, "rewards_train/margins": 14.493669271469116, "rewards_train/rejected": -14.902914047241211, "step": 1849 }, { "epoch": 0.91, "learning_rate": 8.346594102082067e-07, "loss": 0.0001, "step": 1850 }, { "epoch": 0.91, "logps_train/chosen": -67.35589599609375, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -269.09954833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4499448537826538, "rewards_train/margins": 13.897217392921448, "rewards_train/rejected": -14.347162246704102, "step": 1850 }, { "epoch": 0.91, "learning_rate": 8.344577260056969e-07, "loss": 0.0001, "step": 1851 }, { "epoch": 0.91, "logps_train/chosen": -67.98210906982422, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -268.4560546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3055844306945801, "rewards_train/margins": 13.84944486618042, "rewards_train/rejected": -14.155029296875, "step": 1851 }, { "epoch": 0.91, "learning_rate": 8.342559432696837e-07, "loss": 0.0, "step": 1852 }, { "epoch": 0.91, "logps_train/chosen": -67.83920288085938, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -260.5623474121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3401951193809509, "rewards_train/margins": 13.61687046289444, "rewards_train/rejected": -13.95706558227539, "step": 1852 }, { "epoch": 0.91, "learning_rate": 8.340540620596144e-07, "loss": 0.0001, "step": 1853 }, { "epoch": 0.91, "logps_train/chosen": -68.20278930664062, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -267.42413330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34020036458969116, "rewards_train/margins": 13.729850828647614, "rewards_train/rejected": -14.070051193237305, "step": 1853 }, { "epoch": 0.91, "learning_rate": 8.338520824349643e-07, "loss": 0.0004, "step": 1854 }, { "epoch": 0.91, "logps_train/chosen": -74.9783935546875, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -277.07489013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8373907208442688, "rewards_train/margins": 13.776836574077606, "rewards_train/rejected": -14.614227294921875, "step": 1854 }, { "epoch": 0.91, "learning_rate": 8.336500044552379e-07, "loss": 0.0, "step": 1855 }, { "epoch": 0.91, "logps_train/chosen": -70.0873031616211, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -273.44482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5237208604812622, "rewards_train/margins": 13.862069964408875, "rewards_train/rejected": -14.385790824890137, "step": 1855 }, { "epoch": 0.91, "learning_rate": 8.334478281799689e-07, "loss": 0.0001, "step": 1856 }, { "epoch": 0.91, "logps_train/chosen": -65.62765502929688, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -276.20782470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11042146384716034, "rewards_train/margins": 14.565535262227058, "rewards_train/rejected": -14.675956726074219, "step": 1856 }, { "epoch": 0.91, "learning_rate": 8.332455536687195e-07, "loss": 0.0001, "step": 1857 }, { "epoch": 0.91, "logps_train/chosen": -69.22393035888672, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -263.68603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5538873076438904, "rewards_train/margins": 13.51691347360611, "rewards_train/rejected": -14.07080078125, "step": 1857 }, { "epoch": 0.91, "learning_rate": 8.330431809810814e-07, "loss": 0.0001, "step": 1858 }, { "epoch": 0.91, "logps_train/chosen": -62.59782791137695, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -252.09133911132812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10411898791790009, "rewards_train/margins": 12.900181129574776, "rewards_train/rejected": -13.004300117492676, "step": 1858 }, { "epoch": 0.92, "learning_rate": 8.32840710176675e-07, "loss": 0.0002, "step": 1859 }, { "epoch": 0.92, "logps_train/chosen": -69.99505615234375, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -269.06591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3600529134273529, "rewards_train/margins": 13.88813892006874, "rewards_train/rejected": -14.248191833496094, "step": 1859 }, { "epoch": 0.92, "learning_rate": 8.326381413151494e-07, "loss": 0.0, "step": 1860 }, { "epoch": 0.92, "logps_train/chosen": -70.64596557617188, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -266.94232177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5595179796218872, "rewards_train/margins": 13.39467704296112, "rewards_train/rejected": -13.954195022583008, "step": 1860 }, { "epoch": 0.92, "learning_rate": 8.324354744561828e-07, "loss": 0.0001, "step": 1861 }, { "epoch": 0.92, "logps_train/chosen": -77.27056121826172, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -268.8397216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1255908012390137, "rewards_train/margins": 13.456136226654053, "rewards_train/rejected": -14.581727027893066, "step": 1861 }, { "epoch": 0.92, "learning_rate": 8.322327096594821e-07, "loss": 0.0004, "step": 1862 }, { "epoch": 0.92, "logps_train/chosen": -73.15238952636719, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -272.6630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5994669795036316, "rewards_train/margins": 14.086374580860138, "rewards_train/rejected": -14.68584156036377, "step": 1862 }, { "epoch": 0.92, "learning_rate": 8.320298469847835e-07, "loss": 0.0, "step": 1863 }, { "epoch": 0.92, "logps_train/chosen": -73.61489868164062, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -276.22955322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7184237837791443, "rewards_train/margins": 13.838319838047028, "rewards_train/rejected": -14.556743621826172, "step": 1863 }, { "epoch": 0.92, "learning_rate": 8.318268864918514e-07, "loss": 0.0001, "step": 1864 }, { "epoch": 0.92, "logps_train/chosen": -69.42318725585938, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -267.60198974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5293302536010742, "rewards_train/margins": 13.465049743652344, "rewards_train/rejected": -13.994379997253418, "step": 1864 }, { "epoch": 0.92, "learning_rate": 8.316238282404795e-07, "loss": 0.0026, "step": 1865 }, { "epoch": 0.92, "logps_train/chosen": -68.48358154296875, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -262.06744384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5480645895004272, "rewards_train/margins": 13.52225649356842, "rewards_train/rejected": -14.070321083068848, "step": 1865 }, { "epoch": 0.92, "learning_rate": 8.314206722904901e-07, "loss": 0.0003, "step": 1866 }, { "epoch": 0.92, "logps_train/chosen": -73.1315689086914, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -278.4473876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7971901893615723, "rewards_train/margins": 13.888856410980225, "rewards_train/rejected": -14.686046600341797, "step": 1866 }, { "epoch": 0.92, "learning_rate": 8.312174187017342e-07, "loss": 0.0, "step": 1867 }, { "epoch": 0.92, "logps_train/chosen": -68.59623718261719, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -276.1847229003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3054244816303253, "rewards_train/margins": 14.27124997973442, "rewards_train/rejected": -14.576674461364746, "step": 1867 }, { "epoch": 0.92, "learning_rate": 8.310140675340917e-07, "loss": 0.0002, "step": 1868 }, { "epoch": 0.92, "logps_train/chosen": -69.79537200927734, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -272.1798400878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6325642466545105, "rewards_train/margins": 13.943476021289825, "rewards_train/rejected": -14.576040267944336, "step": 1868 }, { "epoch": 0.92, "learning_rate": 8.308106188474715e-07, "loss": 0.0, "step": 1869 }, { "epoch": 0.92, "logps_train/chosen": -68.90299987792969, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -267.7018127441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29479169845581055, "rewards_train/margins": 14.069430828094482, "rewards_train/rejected": -14.364222526550293, "step": 1869 }, { "epoch": 0.92, "learning_rate": 8.306070727018108e-07, "loss": 0.0, "step": 1870 }, { "epoch": 0.92, "logps_train/chosen": -71.96607208251953, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -267.40625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6287848949432373, "rewards_train/margins": 13.462232828140259, "rewards_train/rejected": -14.091017723083496, "step": 1870 }, { "epoch": 0.92, "learning_rate": 8.304034291570756e-07, "loss": 0.0002, "step": 1871 }, { "epoch": 0.92, "logps_train/chosen": -72.49855041503906, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -134.25, "logps_train/rejected": -285.5660095214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4787614047527313, "rewards_train/margins": 14.659773141145706, "rewards_train/rejected": -15.138534545898438, "step": 1871 }, { "epoch": 0.92, "learning_rate": 8.301996882732605e-07, "loss": 0.0, "step": 1872 }, { "epoch": 0.92, "logps_train/chosen": -65.8093490600586, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -266.6019287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3069605231285095, "rewards_train/margins": 13.779209673404694, "rewards_train/rejected": -14.086170196533203, "step": 1872 }, { "epoch": 0.92, "learning_rate": 8.299958501103892e-07, "loss": 0.0001, "step": 1873 }, { "epoch": 0.92, "logps_train/chosen": -68.19824981689453, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -270.42999267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16821354627609253, "rewards_train/margins": 14.096757233142853, "rewards_train/rejected": -14.264970779418945, "step": 1873 }, { "epoch": 0.92, "learning_rate": 8.297919147285137e-07, "loss": 0.0001, "step": 1874 }, { "epoch": 0.92, "logps_train/chosen": -71.36050415039062, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -271.3824462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7171543836593628, "rewards_train/margins": 13.894971013069153, "rewards_train/rejected": -14.612125396728516, "step": 1874 }, { "epoch": 0.92, "learning_rate": 8.295878821877149e-07, "loss": 0.0001, "step": 1875 }, { "epoch": 0.92, "logps_train/chosen": -67.50663757324219, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -267.08148193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33044934272766113, "rewards_train/margins": 13.837270498275757, "rewards_train/rejected": -14.167719841003418, "step": 1875 }, { "epoch": 0.92, "learning_rate": 8.29383752548102e-07, "loss": 0.0, "step": 1876 }, { "epoch": 0.92, "logps_train/chosen": -66.35254669189453, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -275.1755676269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18515737354755402, "rewards_train/margins": 14.395877286791801, "rewards_train/rejected": -14.581034660339355, "step": 1876 }, { "epoch": 0.92, "learning_rate": 8.291795258698128e-07, "loss": 0.0, "step": 1877 }, { "epoch": 0.92, "logps_train/chosen": -66.43156433105469, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -265.5531311035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24325403571128845, "rewards_train/margins": 13.73857370018959, "rewards_train/rejected": -13.981827735900879, "step": 1877 }, { "epoch": 0.92, "learning_rate": 8.289752022130143e-07, "loss": 0.0001, "step": 1878 }, { "epoch": 0.92, "logps_train/chosen": -68.654052734375, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -266.31671142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35581067204475403, "rewards_train/margins": 13.54309794306755, "rewards_train/rejected": -13.898908615112305, "step": 1878 }, { "epoch": 0.93, "learning_rate": 8.287707816379013e-07, "loss": 0.0001, "step": 1879 }, { "epoch": 0.93, "logps_train/chosen": -64.243896484375, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -265.5997314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0684322640299797, "rewards_train/margins": 14.061902590095997, "rewards_train/rejected": -14.130334854125977, "step": 1879 }, { "epoch": 0.93, "learning_rate": 8.285662642046976e-07, "loss": 0.0002, "step": 1880 }, { "epoch": 0.93, "logps_train/chosen": -68.03602600097656, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -278.14239501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2995988428592682, "rewards_train/margins": 14.505414813756943, "rewards_train/rejected": -14.805013656616211, "step": 1880 }, { "epoch": 0.93, "learning_rate": 8.283616499736553e-07, "loss": 0.0, "step": 1881 }, { "epoch": 0.93, "logps_train/chosen": -72.17794799804688, "logps_train/ref_chosen": -68.4375, "logps_train/ref_rejected": -134.625, "logps_train/rejected": -280.15191650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3746798634529114, "rewards_train/margins": 14.177428901195526, "rewards_train/rejected": -14.552108764648438, "step": 1881 }, { "epoch": 0.93, "learning_rate": 8.281569390050551e-07, "loss": 0.0001, "step": 1882 }, { "epoch": 0.93, "logps_train/chosen": -65.9361572265625, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -257.69219970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3758912682533264, "rewards_train/margins": 13.269694745540619, "rewards_train/rejected": -13.645586013793945, "step": 1882 }, { "epoch": 0.93, "learning_rate": 8.279521313592065e-07, "loss": 0.0003, "step": 1883 }, { "epoch": 0.93, "logps_train/chosen": -68.38832092285156, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -262.17449951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4513314962387085, "rewards_train/margins": 13.090925812721252, "rewards_train/rejected": -13.542257308959961, "step": 1883 }, { "epoch": 0.93, "learning_rate": 8.277472270964469e-07, "loss": 0.0001, "step": 1884 }, { "epoch": 0.93, "logps_train/chosen": -67.96833038330078, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -257.022216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5370181798934937, "rewards_train/margins": 13.10119116306305, "rewards_train/rejected": -13.638209342956543, "step": 1884 }, { "epoch": 0.93, "learning_rate": 8.275422262771429e-07, "loss": 0.0002, "step": 1885 }, { "epoch": 0.93, "logps_train/chosen": -68.15579223632812, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -265.0005187988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26816779375076294, "rewards_train/margins": 13.533691108226776, "rewards_train/rejected": -13.801858901977539, "step": 1885 }, { "epoch": 0.93, "learning_rate": 8.273371289616888e-07, "loss": 0.0, "step": 1886 }, { "epoch": 0.93, "logps_train/chosen": -69.58099365234375, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -273.7020263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4105410575866699, "rewards_train/margins": 14.11561918258667, "rewards_train/rejected": -14.52616024017334, "step": 1886 }, { "epoch": 0.93, "learning_rate": 8.271319352105076e-07, "loss": 0.0002, "step": 1887 }, { "epoch": 0.93, "logps_train/chosen": -71.91165161132812, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -264.63092041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.49492478370666504, "rewards_train/margins": 13.64849066734314, "rewards_train/rejected": -14.143415451049805, "step": 1887 }, { "epoch": 0.93, "learning_rate": 8.269266450840509e-07, "loss": 0.0004, "step": 1888 }, { "epoch": 0.93, "logps_train/chosen": -70.80026245117188, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -277.2255554199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4593229293823242, "rewards_train/margins": 14.32045841217041, "rewards_train/rejected": -14.779781341552734, "step": 1888 }, { "epoch": 0.93, "learning_rate": 8.267212586427985e-07, "loss": 0.0, "step": 1889 }, { "epoch": 0.93, "logps_train/chosen": -66.20347595214844, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -265.1166687011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24769139289855957, "rewards_train/margins": 13.846007108688354, "rewards_train/rejected": -14.093698501586914, "step": 1889 }, { "epoch": 0.93, "learning_rate": 8.265157759472587e-07, "loss": 0.0004, "step": 1890 }, { "epoch": 0.93, "logps_train/chosen": -76.89933013916016, "logps_train/ref_chosen": -68.8125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -276.47491455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8087315559387207, "rewards_train/margins": 13.768057346343994, "rewards_train/rejected": -14.576788902282715, "step": 1890 }, { "epoch": 0.93, "learning_rate": 8.263101970579683e-07, "loss": 0.0, "step": 1891 }, { "epoch": 0.93, "logps_train/chosen": -64.36958312988281, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -118.6875, "logps_train/rejected": -254.63775634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2633739411830902, "rewards_train/margins": 13.330188184976578, "rewards_train/rejected": -13.593562126159668, "step": 1891 }, { "epoch": 0.93, "learning_rate": 8.261045220354916e-07, "loss": 0.0002, "step": 1892 }, { "epoch": 0.93, "logps_train/chosen": -65.7306137084961, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -265.6072998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22892078757286072, "rewards_train/margins": 13.835716396570206, "rewards_train/rejected": -14.064637184143066, "step": 1892 }, { "epoch": 0.93, "learning_rate": 8.258987509404225e-07, "loss": 0.0001, "step": 1893 }, { "epoch": 0.93, "logps_train/chosen": -70.37761688232422, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -269.0863342285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.43949514627456665, "rewards_train/margins": 13.595602929592133, "rewards_train/rejected": -14.0350980758667, "step": 1893 }, { "epoch": 0.93, "learning_rate": 8.256928838333821e-07, "loss": 0.0001, "step": 1894 }, { "epoch": 0.93, "logps_train/chosen": -68.04913330078125, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -270.18218994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1347476840019226, "rewards_train/margins": 14.233225643634796, "rewards_train/rejected": -14.367973327636719, "step": 1894 }, { "epoch": 0.93, "learning_rate": 8.254869207750206e-07, "loss": 0.0001, "step": 1895 }, { "epoch": 0.93, "logps_train/chosen": -67.36788940429688, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -265.7074279785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33273667097091675, "rewards_train/margins": 13.545623123645782, "rewards_train/rejected": -13.8783597946167, "step": 1895 }, { "epoch": 0.93, "learning_rate": 8.252808618260158e-07, "loss": 0.0025, "step": 1896 }, { "epoch": 0.93, "logps_train/chosen": -65.23896026611328, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -270.4723205566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21798813343048096, "rewards_train/margins": 14.216841578483582, "rewards_train/rejected": -14.434829711914062, "step": 1896 }, { "epoch": 0.93, "learning_rate": 8.250747070470743e-07, "loss": 0.0002, "step": 1897 }, { "epoch": 0.93, "logps_train/chosen": -74.69175720214844, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -264.4610900878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0401721000671387, "rewards_train/margins": 13.29436445236206, "rewards_train/rejected": -14.3345365524292, "step": 1897 }, { "epoch": 0.93, "learning_rate": 8.248684564989304e-07, "loss": 0.0003, "step": 1898 }, { "epoch": 0.93, "logps_train/chosen": -71.69093322753906, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -257.4170837402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7021985054016113, "rewards_train/margins": 12.85240125656128, "rewards_train/rejected": -13.55459976196289, "step": 1898 }, { "epoch": 0.94, "learning_rate": 8.246621102423472e-07, "loss": 0.0009, "step": 1899 }, { "epoch": 0.94, "logps_train/chosen": -66.80999755859375, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -258.4145812988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36054104566574097, "rewards_train/margins": 13.305625259876251, "rewards_train/rejected": -13.666166305541992, "step": 1899 }, { "epoch": 0.94, "learning_rate": 8.244556683381157e-07, "loss": 0.0002, "step": 1900 }, { "epoch": 0.94, "logps_train/chosen": -68.69253540039062, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -259.68621826171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4025542736053467, "rewards_train/margins": 13.263966798782349, "rewards_train/rejected": -13.666521072387695, "step": 1900 }, { "epoch": 0.94, "learning_rate": 8.242491308470548e-07, "loss": 0.0002, "step": 1901 }, { "epoch": 0.94, "logps_train/chosen": -67.78977966308594, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -271.8150939941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24645864963531494, "rewards_train/margins": 14.255265593528748, "rewards_train/rejected": -14.501724243164062, "step": 1901 }, { "epoch": 0.94, "learning_rate": 8.240424978300119e-07, "loss": 0.0001, "step": 1902 }, { "epoch": 0.94, "logps_train/chosen": -68.71397399902344, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -266.03143310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5545516610145569, "rewards_train/margins": 13.697519719600677, "rewards_train/rejected": -14.252071380615234, "step": 1902 }, { "epoch": 0.94, "learning_rate": 8.238357693478628e-07, "loss": 0.0011, "step": 1903 }, { "epoch": 0.94, "logps_train/chosen": -68.21514892578125, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -267.528076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3013004660606384, "rewards_train/margins": 13.882563173770905, "rewards_train/rejected": -14.183863639831543, "step": 1903 }, { "epoch": 0.94, "learning_rate": 8.236289454615107e-07, "loss": 0.0, "step": 1904 }, { "epoch": 0.94, "logps_train/chosen": -68.11083984375, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -257.86529541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3429689407348633, "rewards_train/margins": 13.259332656860352, "rewards_train/rejected": -13.602301597595215, "step": 1904 }, { "epoch": 0.94, "learning_rate": 8.234220262318876e-07, "loss": 0.0, "step": 1905 }, { "epoch": 0.94, "logps_train/chosen": -65.68891906738281, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -263.26434326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1760203242301941, "rewards_train/margins": 13.493283569812775, "rewards_train/rejected": -13.669303894042969, "step": 1905 }, { "epoch": 0.94, "learning_rate": 8.232150117199533e-07, "loss": 0.0003, "step": 1906 }, { "epoch": 0.94, "logps_train/chosen": -72.53862762451172, "logps_train/ref_chosen": -68.8125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -269.2220764160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37232014536857605, "rewards_train/margins": 13.887438803911209, "rewards_train/rejected": -14.259758949279785, "step": 1906 }, { "epoch": 0.94, "learning_rate": 8.230079019866953e-07, "loss": 0.0004, "step": 1907 }, { "epoch": 0.94, "logps_train/chosen": -64.034912109375, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -277.02020263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11154751479625702, "rewards_train/margins": 14.787738755345345, "rewards_train/rejected": -14.899286270141602, "step": 1907 }, { "epoch": 0.94, "learning_rate": 8.228006970931301e-07, "loss": 0.0, "step": 1908 }, { "epoch": 0.94, "logps_train/chosen": -70.09321594238281, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -271.99847412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6153761148452759, "rewards_train/margins": 13.936423659324646, "rewards_train/rejected": -14.551799774169922, "step": 1908 }, { "epoch": 0.94, "learning_rate": 8.22593397100301e-07, "loss": 0.0002, "step": 1909 }, { "epoch": 0.94, "logps_train/chosen": -68.48588562011719, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -256.763916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42319801449775696, "rewards_train/margins": 13.0699402987957, "rewards_train/rejected": -13.493138313293457, "step": 1909 }, { "epoch": 0.94, "learning_rate": 8.223860020692805e-07, "loss": 0.0004, "step": 1910 }, { "epoch": 0.94, "logps_train/chosen": -65.69003295898438, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -272.6568603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26382699608802795, "rewards_train/margins": 14.467973083257675, "rewards_train/rejected": -14.731800079345703, "step": 1910 }, { "epoch": 0.94, "learning_rate": 8.221785120611685e-07, "loss": 0.0, "step": 1911 }, { "epoch": 0.94, "logps_train/chosen": -66.16224670410156, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -263.182373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28272807598114014, "rewards_train/margins": 13.218664288520813, "rewards_train/rejected": -13.501392364501953, "step": 1911 }, { "epoch": 0.94, "learning_rate": 8.219709271370928e-07, "loss": 0.0001, "step": 1912 }, { "epoch": 0.94, "logps_train/chosen": -70.14130401611328, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -278.4808349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4408881962299347, "rewards_train/margins": 14.407048434019089, "rewards_train/rejected": -14.847936630249023, "step": 1912 }, { "epoch": 0.94, "learning_rate": 8.217632473582095e-07, "loss": 0.0011, "step": 1913 }, { "epoch": 0.94, "logps_train/chosen": -70.33969116210938, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -271.3626708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2953943908214569, "rewards_train/margins": 14.234137088060379, "rewards_train/rejected": -14.529531478881836, "step": 1913 }, { "epoch": 0.94, "learning_rate": 8.215554727857021e-07, "loss": 0.0006, "step": 1914 }, { "epoch": 0.94, "logps_train/chosen": -67.65107727050781, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -271.46868896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21330034732818604, "rewards_train/margins": 14.271801352500916, "rewards_train/rejected": -14.485101699829102, "step": 1914 }, { "epoch": 0.94, "learning_rate": 8.213476034807826e-07, "loss": 0.0001, "step": 1915 }, { "epoch": 0.94, "logps_train/chosen": -67.93525695800781, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -271.6691589355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5034376382827759, "rewards_train/margins": 14.187355399131775, "rewards_train/rejected": -14.69079303741455, "step": 1915 }, { "epoch": 0.94, "learning_rate": 8.21139639504691e-07, "loss": 0.0001, "step": 1916 }, { "epoch": 0.94, "logps_train/chosen": -69.91203308105469, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -274.01593017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.507462203502655, "rewards_train/margins": 13.926944077014923, "rewards_train/rejected": -14.434406280517578, "step": 1916 }, { "epoch": 0.94, "learning_rate": 8.209315809186945e-07, "loss": 0.0, "step": 1917 }, { "epoch": 0.94, "logps_train/chosen": -66.01060485839844, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -257.91827392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09656854718923569, "rewards_train/margins": 13.73695806413889, "rewards_train/rejected": -13.833526611328125, "step": 1917 }, { "epoch": 0.94, "learning_rate": 8.207234277840889e-07, "loss": 0.0, "step": 1918 }, { "epoch": 0.94, "logps_train/chosen": -71.32988739013672, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -276.6741638183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5926330089569092, "rewards_train/margins": 14.335427522659302, "rewards_train/rejected": -14.928060531616211, "step": 1918 }, { "epoch": 0.94, "learning_rate": 8.20515180162197e-07, "loss": 0.0, "step": 1919 }, { "epoch": 0.94, "logps_train/chosen": -69.65507507324219, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -274.9286804199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5054494738578796, "rewards_train/margins": 14.109000980854034, "rewards_train/rejected": -14.614450454711914, "step": 1919 }, { "epoch": 0.95, "learning_rate": 8.203068381143704e-07, "loss": 0.0001, "step": 1920 }, { "epoch": 0.95, "logps_train/chosen": -65.5202407836914, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -264.90142822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38722866773605347, "rewards_train/margins": 13.937387764453888, "rewards_train/rejected": -14.324616432189941, "step": 1920 }, { "epoch": 0.95, "learning_rate": 8.200984017019879e-07, "loss": 0.0002, "step": 1921 }, { "epoch": 0.95, "logps_train/chosen": -66.0267562866211, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -265.61529541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23885732889175415, "rewards_train/margins": 13.81241887807846, "rewards_train/rejected": -14.051276206970215, "step": 1921 }, { "epoch": 0.95, "learning_rate": 8.198898709864563e-07, "loss": 0.0001, "step": 1922 }, { "epoch": 0.95, "logps_train/chosen": -73.07386016845703, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -288.04656982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7149543762207031, "rewards_train/margins": 14.825834274291992, "rewards_train/rejected": -15.540788650512695, "step": 1922 }, { "epoch": 0.95, "learning_rate": 8.196812460292104e-07, "loss": 0.0, "step": 1923 }, { "epoch": 0.95, "logps_train/chosen": -66.04203033447266, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -263.19879150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4367714524269104, "rewards_train/margins": 13.785744607448578, "rewards_train/rejected": -14.222516059875488, "step": 1923 }, { "epoch": 0.95, "learning_rate": 8.194725268917119e-07, "loss": 0.0001, "step": 1924 }, { "epoch": 0.95, "logps_train/chosen": -65.24076843261719, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -257.16912841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22119668126106262, "rewards_train/margins": 13.482582539319992, "rewards_train/rejected": -13.703779220581055, "step": 1924 }, { "epoch": 0.95, "learning_rate": 8.192637136354515e-07, "loss": 0.0, "step": 1925 }, { "epoch": 0.95, "logps_train/chosen": -67.76044464111328, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -261.62542724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33712828159332275, "rewards_train/margins": 13.379418730735779, "rewards_train/rejected": -13.716547012329102, "step": 1925 }, { "epoch": 0.95, "learning_rate": 8.190548063219467e-07, "loss": 0.0034, "step": 1926 }, { "epoch": 0.95, "logps_train/chosen": -69.70466613769531, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -263.3667907714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5366290807723999, "rewards_train/margins": 13.33535349369049, "rewards_train/rejected": -13.87198257446289, "step": 1926 }, { "epoch": 0.95, "learning_rate": 8.188458050127431e-07, "loss": 0.0002, "step": 1927 }, { "epoch": 0.95, "logps_train/chosen": -68.540771484375, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -266.06103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4001955986022949, "rewards_train/margins": 13.522264957427979, "rewards_train/rejected": -13.922460556030273, "step": 1927 }, { "epoch": 0.95, "learning_rate": 8.186367097694138e-07, "loss": 0.0002, "step": 1928 }, { "epoch": 0.95, "logps_train/chosen": -69.8957290649414, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -275.2976989746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35978761315345764, "rewards_train/margins": 14.513049453496933, "rewards_train/rejected": -14.87283706665039, "step": 1928 }, { "epoch": 0.95, "learning_rate": 8.184275206535598e-07, "loss": 0.0, "step": 1929 }, { "epoch": 0.95, "logps_train/chosen": -68.02710723876953, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -264.99139404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41281813383102417, "rewards_train/margins": 13.548917591571808, "rewards_train/rejected": -13.961735725402832, "step": 1929 }, { "epoch": 0.95, "learning_rate": 8.182182377268095e-07, "loss": 0.0, "step": 1930 }, { "epoch": 0.95, "logps_train/chosen": -63.686466217041016, "logps_train/ref_chosen": -60.65625, "logps_train/ref_rejected": -116.8125, "logps_train/rejected": -248.69583129882812, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3027530908584595, "rewards_train/margins": 12.887972235679626, "rewards_train/rejected": -13.190725326538086, "step": 1930 }, { "epoch": 0.95, "learning_rate": 8.180088610508189e-07, "loss": 0.0002, "step": 1931 }, { "epoch": 0.95, "logps_train/chosen": -66.77162170410156, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -258.29498291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38077476620674133, "rewards_train/margins": 13.410637587308884, "rewards_train/rejected": -13.791412353515625, "step": 1931 }, { "epoch": 0.95, "learning_rate": 8.177993906872722e-07, "loss": 0.0001, "step": 1932 }, { "epoch": 0.95, "logps_train/chosen": -70.73765563964844, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -266.57806396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7226917743682861, "rewards_train/margins": 13.605965375900269, "rewards_train/rejected": -14.328657150268555, "step": 1932 }, { "epoch": 0.95, "learning_rate": 8.175898266978805e-07, "loss": 0.0003, "step": 1933 }, { "epoch": 0.95, "logps_train/chosen": -74.03375244140625, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -280.3594665527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.714752733707428, "rewards_train/margins": 14.37324470281601, "rewards_train/rejected": -15.087997436523438, "step": 1933 }, { "epoch": 0.95, "learning_rate": 8.173801691443829e-07, "loss": 0.0, "step": 1934 }, { "epoch": 0.95, "logps_train/chosen": -69.02428436279297, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -118.875, "logps_train/rejected": -256.9813232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.557506799697876, "rewards_train/margins": 13.252979040145874, "rewards_train/rejected": -13.81048583984375, "step": 1934 }, { "epoch": 0.95, "learning_rate": 8.171704180885457e-07, "loss": 0.0002, "step": 1935 }, { "epoch": 0.95, "logps_train/chosen": -63.822654724121094, "logps_train/ref_chosen": -60.4375, "logps_train/ref_rejected": -118.625, "logps_train/rejected": -251.27352905273438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3389308452606201, "rewards_train/margins": 12.92440915107727, "rewards_train/rejected": -13.26333999633789, "step": 1935 }, { "epoch": 0.95, "learning_rate": 8.169605735921632e-07, "loss": 0.0005, "step": 1936 }, { "epoch": 0.95, "logps_train/chosen": -67.81852722167969, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -272.21234130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4289235472679138, "rewards_train/margins": 14.039868414402008, "rewards_train/rejected": -14.468791961669922, "step": 1936 }, { "epoch": 0.95, "learning_rate": 8.16750635717057e-07, "loss": 0.0, "step": 1937 }, { "epoch": 0.95, "logps_train/chosen": -69.89257049560547, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -269.1326599121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4559561014175415, "rewards_train/margins": 13.809460043907166, "rewards_train/rejected": -14.265416145324707, "step": 1937 }, { "epoch": 0.95, "learning_rate": 8.165406045250762e-07, "loss": 0.0001, "step": 1938 }, { "epoch": 0.95, "logps_train/chosen": -65.1786117553711, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -267.51934814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2693750560283661, "rewards_train/margins": 14.226212292909622, "rewards_train/rejected": -14.495587348937988, "step": 1938 }, { "epoch": 0.95, "learning_rate": 8.163304800780974e-07, "loss": 0.0001, "step": 1939 }, { "epoch": 0.95, "logps_train/chosen": -69.14126586914062, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -267.7548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42438027262687683, "rewards_train/margins": 13.79720214009285, "rewards_train/rejected": -14.221582412719727, "step": 1939 }, { "epoch": 0.96, "learning_rate": 8.161202624380246e-07, "loss": 0.0, "step": 1940 }, { "epoch": 0.96, "logps_train/chosen": -71.53675079345703, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -268.1686706542969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5696905255317688, "rewards_train/margins": 14.074814021587372, "rewards_train/rejected": -14.64450454711914, "step": 1940 }, { "epoch": 0.96, "learning_rate": 8.159099516667893e-07, "loss": 0.0, "step": 1941 }, { "epoch": 0.96, "logps_train/chosen": -69.75274658203125, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -273.19970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5738595128059387, "rewards_train/margins": 14.091520965099335, "rewards_train/rejected": -14.665380477905273, "step": 1941 }, { "epoch": 0.96, "learning_rate": 8.156995478263507e-07, "loss": 0.0002, "step": 1942 }, { "epoch": 0.96, "logps_train/chosen": -68.94612121582031, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -272.2665710449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33025670051574707, "rewards_train/margins": 14.247328519821167, "rewards_train/rejected": -14.577585220336914, "step": 1942 }, { "epoch": 0.96, "learning_rate": 8.15489050978695e-07, "loss": 0.0001, "step": 1943 }, { "epoch": 0.96, "logps_train/chosen": -68.66838836669922, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -270.28240966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40472936630249023, "rewards_train/margins": 13.677122592926025, "rewards_train/rejected": -14.081851959228516, "step": 1943 }, { "epoch": 0.96, "learning_rate": 8.152784611858359e-07, "loss": 0.0001, "step": 1944 }, { "epoch": 0.96, "logps_train/chosen": -70.56482696533203, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -266.33551025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4721567630767822, "rewards_train/margins": 13.622283697128296, "rewards_train/rejected": -14.094440460205078, "step": 1944 }, { "epoch": 0.96, "learning_rate": 8.150677785098148e-07, "loss": 0.0001, "step": 1945 }, { "epoch": 0.96, "logps_train/chosen": -70.00338745117188, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -274.1121520996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5295383930206299, "rewards_train/margins": 13.967418432235718, "rewards_train/rejected": -14.496956825256348, "step": 1945 }, { "epoch": 0.96, "learning_rate": 8.148570030127002e-07, "loss": 0.0005, "step": 1946 }, { "epoch": 0.96, "logps_train/chosen": -70.26213073730469, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -276.70489501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.507218599319458, "rewards_train/margins": 14.368643522262573, "rewards_train/rejected": -14.875862121582031, "step": 1946 }, { "epoch": 0.96, "learning_rate": 8.146461347565877e-07, "loss": 0.0028, "step": 1947 }, { "epoch": 0.96, "logps_train/chosen": -67.84532165527344, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -263.43206787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39747124910354614, "rewards_train/margins": 13.308821856975555, "rewards_train/rejected": -13.706293106079102, "step": 1947 }, { "epoch": 0.96, "learning_rate": 8.14435173803601e-07, "loss": 0.0006, "step": 1948 }, { "epoch": 0.96, "logps_train/chosen": -74.66816711425781, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -281.090576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8469435572624207, "rewards_train/margins": 14.072858154773712, "rewards_train/rejected": -14.919801712036133, "step": 1948 }, { "epoch": 0.96, "learning_rate": 8.142241202158904e-07, "loss": 0.0, "step": 1949 }, { "epoch": 0.96, "logps_train/chosen": -69.28721618652344, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -265.7753601074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5753523707389832, "rewards_train/margins": 13.768686592578888, "rewards_train/rejected": -14.344038963317871, "step": 1949 }, { "epoch": 0.96, "learning_rate": 8.140129740556335e-07, "loss": 0.0, "step": 1950 }, { "epoch": 0.96, "logps_train/chosen": -63.029659271240234, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -253.31912231445312, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07291248440742493, "rewards_train/margins": 13.195277899503708, "rewards_train/rejected": -13.268190383911133, "step": 1950 }, { "epoch": 0.96, "learning_rate": 8.138017353850356e-07, "loss": 0.0002, "step": 1951 }, { "epoch": 0.96, "logps_train/chosen": -62.42676544189453, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -269.90093994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06201237440109253, "rewards_train/margins": 14.520171463489532, "rewards_train/rejected": -14.582183837890625, "step": 1951 }, { "epoch": 0.96, "learning_rate": 8.135904042663289e-07, "loss": 0.0001, "step": 1952 }, { "epoch": 0.96, "logps_train/chosen": -64.9752426147461, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -268.6538391113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21622541546821594, "rewards_train/margins": 13.972742289304733, "rewards_train/rejected": -14.18896770477295, "step": 1952 }, { "epoch": 0.96, "learning_rate": 8.133789807617733e-07, "loss": 0.0001, "step": 1953 }, { "epoch": 0.96, "logps_train/chosen": -70.14733123779297, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -271.76116943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6046257019042969, "rewards_train/margins": 14.300642013549805, "rewards_train/rejected": -14.905267715454102, "step": 1953 }, { "epoch": 0.96, "learning_rate": 8.131674649336553e-07, "loss": 0.0002, "step": 1954 }, { "epoch": 0.96, "logps_train/chosen": -70.80659484863281, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -269.9317626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4413529932498932, "rewards_train/margins": 13.858220905065536, "rewards_train/rejected": -14.29957389831543, "step": 1954 }, { "epoch": 0.96, "learning_rate": 8.129558568442889e-07, "loss": 0.0002, "step": 1955 }, { "epoch": 0.96, "logps_train/chosen": -70.04736328125, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -272.73333740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4634764790534973, "rewards_train/margins": 14.417182624340057, "rewards_train/rejected": -14.880659103393555, "step": 1955 }, { "epoch": 0.96, "learning_rate": 8.127441565560153e-07, "loss": 0.0, "step": 1956 }, { "epoch": 0.96, "logps_train/chosen": -72.59475708007812, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -276.80865478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7808628678321838, "rewards_train/margins": 14.152593553066254, "rewards_train/rejected": -14.933456420898438, "step": 1956 }, { "epoch": 0.96, "learning_rate": 8.125323641312029e-07, "loss": 0.0001, "step": 1957 }, { "epoch": 0.96, "logps_train/chosen": -71.34718322753906, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -270.0149841308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.600318193435669, "rewards_train/margins": 13.779499769210815, "rewards_train/rejected": -14.379817962646484, "step": 1957 }, { "epoch": 0.96, "learning_rate": 8.12320479632247e-07, "loss": 0.0001, "step": 1958 }, { "epoch": 0.96, "logps_train/chosen": -67.37410736083984, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -270.94287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2794031500816345, "rewards_train/margins": 14.097306787967682, "rewards_train/rejected": -14.376709938049316, "step": 1958 }, { "epoch": 0.96, "learning_rate": 8.121085031215705e-07, "loss": 0.0002, "step": 1959 }, { "epoch": 0.96, "logps_train/chosen": -72.40103149414062, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -273.4503479003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7368077039718628, "rewards_train/margins": 13.901684880256653, "rewards_train/rejected": -14.638492584228516, "step": 1959 }, { "epoch": 0.97, "learning_rate": 8.118964346616228e-07, "loss": 0.0001, "step": 1960 }, { "epoch": 0.97, "logps_train/chosen": -70.9595718383789, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -264.68170166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7367775440216064, "rewards_train/margins": 13.513084173202515, "rewards_train/rejected": -14.249861717224121, "step": 1960 }, { "epoch": 0.97, "learning_rate": 8.11684274314881e-07, "loss": 0.0, "step": 1961 }, { "epoch": 0.97, "logps_train/chosen": -66.71100616455078, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -259.381591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.25103163719177246, "rewards_train/margins": 13.436101198196411, "rewards_train/rejected": -13.687132835388184, "step": 1961 }, { "epoch": 0.97, "learning_rate": 8.114720221438487e-07, "loss": 0.0001, "step": 1962 }, { "epoch": 0.97, "logps_train/chosen": -69.17792510986328, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -267.576416015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38366174697875977, "rewards_train/margins": 13.827590465545654, "rewards_train/rejected": -14.211252212524414, "step": 1962 }, { "epoch": 0.97, "learning_rate": 8.11259678211057e-07, "loss": 0.0006, "step": 1963 }, { "epoch": 0.97, "logps_train/chosen": -70.90762329101562, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -270.60894775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5651761293411255, "rewards_train/margins": 14.004995226860046, "rewards_train/rejected": -14.570171356201172, "step": 1963 }, { "epoch": 0.97, "learning_rate": 8.110472425790639e-07, "loss": 0.0, "step": 1964 }, { "epoch": 0.97, "logps_train/chosen": -73.09940338134766, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -272.21826171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8678501844406128, "rewards_train/margins": 13.962864995002747, "rewards_train/rejected": -14.83071517944336, "step": 1964 }, { "epoch": 0.97, "learning_rate": 8.108347153104541e-07, "loss": 0.0001, "step": 1965 }, { "epoch": 0.97, "logps_train/chosen": -66.00888061523438, "logps_train/ref_chosen": -61.40625, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -256.7513732910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46055617928504944, "rewards_train/margins": 13.14055809378624, "rewards_train/rejected": -13.601114273071289, "step": 1965 }, { "epoch": 0.97, "learning_rate": 8.106220964678398e-07, "loss": 0.0002, "step": 1966 }, { "epoch": 0.97, "logps_train/chosen": -71.81351470947266, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -271.3092041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7505651712417603, "rewards_train/margins": 13.900670409202576, "rewards_train/rejected": -14.651235580444336, "step": 1966 }, { "epoch": 0.97, "learning_rate": 8.104093861138599e-07, "loss": 0.0001, "step": 1967 }, { "epoch": 0.97, "logps_train/chosen": -68.6290054321289, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -274.1444091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37466782331466675, "rewards_train/margins": 14.377568542957306, "rewards_train/rejected": -14.752236366271973, "step": 1967 }, { "epoch": 0.97, "learning_rate": 8.101965843111802e-07, "loss": 0.0002, "step": 1968 }, { "epoch": 0.97, "logps_train/chosen": -71.13990783691406, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -278.2403564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6135027408599854, "rewards_train/margins": 14.215807676315308, "rewards_train/rejected": -14.829310417175293, "step": 1968 }, { "epoch": 0.97, "learning_rate": 8.099836911224937e-07, "loss": 0.0001, "step": 1969 }, { "epoch": 0.97, "logps_train/chosen": -69.6527099609375, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -265.2523193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4679567515850067, "rewards_train/margins": 13.58500936627388, "rewards_train/rejected": -14.052966117858887, "step": 1969 }, { "epoch": 0.97, "learning_rate": 8.0977070661052e-07, "loss": 0.0001, "step": 1970 }, { "epoch": 0.97, "logps_train/chosen": -70.69869995117188, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -272.89434814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6044409275054932, "rewards_train/margins": 14.078840017318726, "rewards_train/rejected": -14.683280944824219, "step": 1970 }, { "epoch": 0.97, "learning_rate": 8.09557630838006e-07, "loss": 0.0, "step": 1971 }, { "epoch": 0.97, "logps_train/chosen": -70.56104278564453, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -271.4461669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5793465375900269, "rewards_train/margins": 13.908777356147766, "rewards_train/rejected": -14.488123893737793, "step": 1971 }, { "epoch": 0.97, "learning_rate": 8.093444638677252e-07, "loss": 0.0005, "step": 1972 }, { "epoch": 0.97, "logps_train/chosen": -70.18095397949219, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -274.0172119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3707318902015686, "rewards_train/margins": 14.305304944515228, "rewards_train/rejected": -14.676036834716797, "step": 1972 }, { "epoch": 0.97, "learning_rate": 8.091312057624777e-07, "loss": 0.0002, "step": 1973 }, { "epoch": 0.97, "logps_train/chosen": -71.65863037109375, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -283.9958190917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4654240608215332, "rewards_train/margins": 14.826198101043701, "rewards_train/rejected": -15.291622161865234, "step": 1973 }, { "epoch": 0.97, "learning_rate": 8.089178565850912e-07, "loss": 0.0001, "step": 1974 }, { "epoch": 0.97, "logps_train/chosen": -71.3785400390625, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -279.557373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.562463641166687, "rewards_train/margins": 14.744300961494446, "rewards_train/rejected": -15.306764602661133, "step": 1974 }, { "epoch": 0.97, "learning_rate": 8.087044163984195e-07, "loss": 0.0, "step": 1975 }, { "epoch": 0.97, "logps_train/chosen": -70.18797302246094, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -269.7105407714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5838854312896729, "rewards_train/margins": 13.893908262252808, "rewards_train/rejected": -14.47779369354248, "step": 1975 }, { "epoch": 0.97, "learning_rate": 8.084908852653438e-07, "loss": 0.0001, "step": 1976 }, { "epoch": 0.97, "logps_train/chosen": -69.22419738769531, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -273.63140869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4228590130805969, "rewards_train/margins": 14.301316440105438, "rewards_train/rejected": -14.724175453186035, "step": 1976 }, { "epoch": 0.97, "learning_rate": 8.082772632487717e-07, "loss": 0.0001, "step": 1977 }, { "epoch": 0.97, "logps_train/chosen": -72.13079833984375, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -281.7481384277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7831965684890747, "rewards_train/margins": 14.37550437450409, "rewards_train/rejected": -15.158700942993164, "step": 1977 }, { "epoch": 0.97, "learning_rate": 8.080635504116376e-07, "loss": 0.0001, "step": 1978 }, { "epoch": 0.97, "logps_train/chosen": -71.9104995727539, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -267.9494934082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8801124095916748, "rewards_train/margins": 13.447747468948364, "rewards_train/rejected": -14.327859878540039, "step": 1978 }, { "epoch": 0.97, "learning_rate": 8.078497468169029e-07, "loss": 0.0, "step": 1979 }, { "epoch": 0.97, "logps_train/chosen": -75.0500717163086, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -273.42718505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8069603443145752, "rewards_train/margins": 13.703919172286987, "rewards_train/rejected": -14.510879516601562, "step": 1979 }, { "epoch": 0.97, "learning_rate": 8.076358525275556e-07, "loss": 0.0001, "step": 1980 }, { "epoch": 0.97, "logps_train/chosen": -67.68736267089844, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -280.66253662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3071156144142151, "rewards_train/margins": 14.96587747335434, "rewards_train/rejected": -15.272993087768555, "step": 1980 }, { "epoch": 0.98, "learning_rate": 8.074218676066101e-07, "loss": 0.0, "step": 1981 }, { "epoch": 0.98, "logps_train/chosen": -67.50411987304688, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -276.5825500488281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35194945335388184, "rewards_train/margins": 14.552204370498657, "rewards_train/rejected": -14.904153823852539, "step": 1981 }, { "epoch": 0.98, "learning_rate": 8.072077921171079e-07, "loss": 0.0001, "step": 1982 }, { "epoch": 0.98, "logps_train/chosen": -65.50726318359375, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -258.0847473144531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29769885540008545, "rewards_train/margins": 13.555160641670227, "rewards_train/rejected": -13.852859497070312, "step": 1982 }, { "epoch": 0.98, "learning_rate": 8.069936261221173e-07, "loss": 0.0004, "step": 1983 }, { "epoch": 0.98, "logps_train/chosen": -65.05686950683594, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -119.0, "logps_train/rejected": -257.36651611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21115605533123016, "rewards_train/margins": 13.623201206326485, "rewards_train/rejected": -13.834357261657715, "step": 1983 }, { "epoch": 0.98, "learning_rate": 8.067793696847329e-07, "loss": 0.0001, "step": 1984 }, { "epoch": 0.98, "logps_train/chosen": -68.48875427246094, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -269.07489013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6915030479431152, "rewards_train/margins": 13.84264612197876, "rewards_train/rejected": -14.534149169921875, "step": 1984 }, { "epoch": 0.98, "learning_rate": 8.06565022868076e-07, "loss": 0.0001, "step": 1985 }, { "epoch": 0.98, "logps_train/chosen": -66.13015747070312, "logps_train/ref_chosen": -61.125, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -255.2604522705078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5005640983581543, "rewards_train/margins": 12.93900728225708, "rewards_train/rejected": -13.439571380615234, "step": 1985 }, { "epoch": 0.98, "learning_rate": 8.06350585735295e-07, "loss": 0.0004, "step": 1986 }, { "epoch": 0.98, "logps_train/chosen": -68.41044616699219, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -267.651611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41321253776550293, "rewards_train/margins": 13.942181825637817, "rewards_train/rejected": -14.35539436340332, "step": 1986 }, { "epoch": 0.98, "learning_rate": 8.061360583495642e-07, "loss": 0.0002, "step": 1987 }, { "epoch": 0.98, "logps_train/chosen": -75.56538391113281, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -278.08172607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9071736335754395, "rewards_train/margins": 13.99313497543335, "rewards_train/rejected": -14.900308609008789, "step": 1987 }, { "epoch": 0.98, "learning_rate": 8.059214407740848e-07, "loss": 0.0001, "step": 1988 }, { "epoch": 0.98, "logps_train/chosen": -70.22577667236328, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -269.1455993652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6463567018508911, "rewards_train/margins": 13.920108675956726, "rewards_train/rejected": -14.566465377807617, "step": 1988 }, { "epoch": 0.98, "learning_rate": 8.057067330720846e-07, "loss": 0.0001, "step": 1989 }, { "epoch": 0.98, "logps_train/chosen": -70.32505798339844, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -275.8402099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6467143297195435, "rewards_train/margins": 14.324122309684753, "rewards_train/rejected": -14.970836639404297, "step": 1989 }, { "epoch": 0.98, "learning_rate": 8.054919353068181e-07, "loss": 0.0001, "step": 1990 }, { "epoch": 0.98, "logps_train/chosen": -70.08824920654297, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -278.4375915527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6689810752868652, "rewards_train/margins": 14.364864826202393, "rewards_train/rejected": -15.033845901489258, "step": 1990 }, { "epoch": 0.98, "learning_rate": 8.05277047541566e-07, "loss": 0.0001, "step": 1991 }, { "epoch": 0.98, "logps_train/chosen": -70.04922485351562, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -278.70947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6397371292114258, "rewards_train/margins": 14.48267650604248, "rewards_train/rejected": -15.122413635253906, "step": 1991 }, { "epoch": 0.98, "learning_rate": 8.050620698396359e-07, "loss": 0.0, "step": 1992 }, { "epoch": 0.98, "logps_train/chosen": -68.42216491699219, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -275.1529541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35627901554107666, "rewards_train/margins": 14.44681203365326, "rewards_train/rejected": -14.803091049194336, "step": 1992 }, { "epoch": 0.98, "learning_rate": 8.048470022643615e-07, "loss": 0.0, "step": 1993 }, { "epoch": 0.98, "logps_train/chosen": -71.4851303100586, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -269.5706787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6650658845901489, "rewards_train/margins": 13.436141848564148, "rewards_train/rejected": -14.101207733154297, "step": 1993 }, { "epoch": 0.98, "learning_rate": 8.04631844879103e-07, "loss": 0.0008, "step": 1994 }, { "epoch": 0.98, "logps_train/chosen": -66.60574340820312, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -270.0957336425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4593053162097931, "rewards_train/margins": 14.038159817457199, "rewards_train/rejected": -14.497465133666992, "step": 1994 }, { "epoch": 0.98, "learning_rate": 8.044165977472475e-07, "loss": 0.0, "step": 1995 }, { "epoch": 0.98, "logps_train/chosen": -72.31806945800781, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -273.19775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5866892337799072, "rewards_train/margins": 14.06677794456482, "rewards_train/rejected": -14.653467178344727, "step": 1995 }, { "epoch": 0.98, "learning_rate": 8.042012609322083e-07, "loss": 0.0001, "step": 1996 }, { "epoch": 0.98, "logps_train/chosen": -69.28742218017578, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -263.7333679199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5585519075393677, "rewards_train/margins": 13.541348338127136, "rewards_train/rejected": -14.099900245666504, "step": 1996 }, { "epoch": 0.98, "learning_rate": 8.039858344974249e-07, "loss": 0.0, "step": 1997 }, { "epoch": 0.98, "logps_train/chosen": -65.00040435791016, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -272.4922790527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3093177378177643, "rewards_train/margins": 14.339566081762314, "rewards_train/rejected": -14.648883819580078, "step": 1997 }, { "epoch": 0.98, "learning_rate": 8.037703185063633e-07, "loss": 0.0001, "step": 1998 }, { "epoch": 0.98, "logps_train/chosen": -70.52037048339844, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -267.82122802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6688336133956909, "rewards_train/margins": 13.763388752937317, "rewards_train/rejected": -14.432222366333008, "step": 1998 }, { "epoch": 0.98, "learning_rate": 8.035547130225163e-07, "loss": 0.0001, "step": 1999 }, { "epoch": 0.98, "logps_train/chosen": -73.63667297363281, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -280.06927490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7958935499191284, "rewards_train/margins": 14.42523968219757, "rewards_train/rejected": -15.2211332321167, "step": 1999 }, { "epoch": 0.98, "learning_rate": 8.033390181094024e-07, "loss": 0.0, "step": 2000 }, { "epoch": 0.98, "logps_train/chosen": -71.43444061279297, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -114.875, "logps_train/rejected": -251.89503479003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7317745685577393, "rewards_train/margins": 12.970619916915894, "rewards_train/rejected": -13.702394485473633, "step": 2000 }, { "epoch": 0.99, "learning_rate": 8.031232338305669e-07, "loss": 0.0028, "step": 2001 }, { "epoch": 0.99, "logps_train/chosen": -69.53590393066406, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -270.7543640136719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6813734769821167, "rewards_train/margins": 14.194843411445618, "rewards_train/rejected": -14.876216888427734, "step": 2001 }, { "epoch": 0.99, "learning_rate": 8.029073602495815e-07, "loss": 0.0004, "step": 2002 }, { "epoch": 0.99, "logps_train/chosen": -67.87731170654297, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -267.82977294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4647332727909088, "rewards_train/margins": 14.163848727941513, "rewards_train/rejected": -14.628582000732422, "step": 2002 }, { "epoch": 0.99, "learning_rate": 8.026913974300436e-07, "loss": 0.0001, "step": 2003 }, { "epoch": 0.99, "logps_train/chosen": -64.88819885253906, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -274.4691467285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1216321736574173, "rewards_train/margins": 14.472694799304008, "rewards_train/rejected": -14.594326972961426, "step": 2003 }, { "epoch": 0.99, "learning_rate": 8.024753454355776e-07, "loss": 0.0001, "step": 2004 }, { "epoch": 0.99, "logps_train/chosen": -71.67898559570312, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -269.64404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5966581106185913, "rewards_train/margins": 13.909251809120178, "rewards_train/rejected": -14.50590991973877, "step": 2004 }, { "epoch": 0.99, "learning_rate": 8.022592043298337e-07, "loss": 0.0001, "step": 2005 }, { "epoch": 0.99, "logps_train/chosen": -71.48957824707031, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -286.31939697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6294759511947632, "rewards_train/margins": 14.872971177101135, "rewards_train/rejected": -15.502447128295898, "step": 2005 }, { "epoch": 0.99, "learning_rate": 8.020429741764889e-07, "loss": 0.0001, "step": 2006 }, { "epoch": 0.99, "logps_train/chosen": -74.541259765625, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -278.7948303222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0532469749450684, "rewards_train/margins": 14.242250919342041, "rewards_train/rejected": -15.29549789428711, "step": 2006 }, { "epoch": 0.99, "learning_rate": 8.018266550392456e-07, "loss": 0.0, "step": 2007 }, { "epoch": 0.99, "logps_train/chosen": -68.73155975341797, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -277.3477783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38565582036972046, "rewards_train/margins": 14.530469477176666, "rewards_train/rejected": -14.916125297546387, "step": 2007 }, { "epoch": 0.99, "learning_rate": 8.016102469818331e-07, "loss": 0.0002, "step": 2008 }, { "epoch": 0.99, "logps_train/chosen": -67.8422622680664, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -274.0979919433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45859187841415405, "rewards_train/margins": 14.475863993167877, "rewards_train/rejected": -14.934455871582031, "step": 2008 }, { "epoch": 0.99, "learning_rate": 8.013937500680067e-07, "loss": 0.0, "step": 2009 }, { "epoch": 0.99, "logps_train/chosen": -73.28622436523438, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -272.6673889160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8076748847961426, "rewards_train/margins": 13.993879795074463, "rewards_train/rejected": -14.801554679870605, "step": 2009 }, { "epoch": 0.99, "learning_rate": 8.011771643615477e-07, "loss": 0.0007, "step": 2010 }, { "epoch": 0.99, "logps_train/chosen": -67.9999008178711, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -267.14581298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44388633966445923, "rewards_train/margins": 13.970551908016205, "rewards_train/rejected": -14.414438247680664, "step": 2010 }, { "epoch": 0.99, "learning_rate": 8.009604899262638e-07, "loss": 0.0002, "step": 2011 }, { "epoch": 0.99, "logps_train/chosen": -71.87486267089844, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -275.52801513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7793321013450623, "rewards_train/margins": 14.069369375705719, "rewards_train/rejected": -14.848701477050781, "step": 2011 }, { "epoch": 0.99, "learning_rate": 8.007437268259889e-07, "loss": 0.0002, "step": 2012 }, { "epoch": 0.99, "logps_train/chosen": -71.3834228515625, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -280.83154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7516242265701294, "rewards_train/margins": 14.24886405467987, "rewards_train/rejected": -15.00048828125, "step": 2012 }, { "epoch": 0.99, "learning_rate": 8.005268751245826e-07, "loss": 0.0001, "step": 2013 }, { "epoch": 0.99, "logps_train/chosen": -70.88502502441406, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -264.6651611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.725123405456543, "rewards_train/margins": 13.428014755249023, "rewards_train/rejected": -14.153138160705566, "step": 2013 }, { "epoch": 0.99, "learning_rate": 8.003099348859312e-07, "loss": 0.0009, "step": 2014 }, { "epoch": 0.99, "logps_train/chosen": -69.88540649414062, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -273.58648681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38712507486343384, "rewards_train/margins": 13.928359925746918, "rewards_train/rejected": -14.315485000610352, "step": 2014 }, { "epoch": 0.99, "learning_rate": 8.000929061739463e-07, "loss": 0.0003, "step": 2015 }, { "epoch": 0.99, "logps_train/chosen": -70.78284454345703, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -274.6882019042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6275034546852112, "rewards_train/margins": 14.10869973897934, "rewards_train/rejected": -14.73620319366455, "step": 2015 }, { "epoch": 0.99, "learning_rate": 7.998757890525665e-07, "loss": 0.0001, "step": 2016 }, { "epoch": 0.99, "logps_train/chosen": -69.9221420288086, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -282.39117431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19782963395118713, "rewards_train/margins": 14.946951478719711, "rewards_train/rejected": -15.144781112670898, "step": 2016 }, { "epoch": 0.99, "learning_rate": 7.996585835857555e-07, "loss": 0.0001, "step": 2017 }, { "epoch": 0.99, "logps_train/chosen": -66.33613586425781, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -117.6875, "logps_train/rejected": -253.60995483398438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47238314151763916, "rewards_train/margins": 13.118299841880798, "rewards_train/rejected": -13.590682983398438, "step": 2017 }, { "epoch": 0.99, "learning_rate": 7.99441289837504e-07, "loss": 0.0036, "step": 2018 }, { "epoch": 0.99, "logps_train/chosen": -70.99080657958984, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -278.9940185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48360246419906616, "rewards_train/margins": 14.41736227273941, "rewards_train/rejected": -14.900964736938477, "step": 2018 }, { "epoch": 0.99, "learning_rate": 7.992239078718278e-07, "loss": 0.0, "step": 2019 }, { "epoch": 0.99, "logps_train/chosen": -71.84947204589844, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -287.271240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45482078194618225, "rewards_train/margins": 15.149257510900497, "rewards_train/rejected": -15.60407829284668, "step": 2019 }, { "epoch": 0.99, "learning_rate": 7.990064377527692e-07, "loss": 0.0003, "step": 2020 }, { "epoch": 0.99, "logps_train/chosen": -68.70990753173828, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -274.3914794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5451855063438416, "rewards_train/margins": 14.523454248905182, "rewards_train/rejected": -15.068639755249023, "step": 2020 }, { "epoch": 1.0, "learning_rate": 7.987888795443966e-07, "loss": 0.0, "step": 2021 }, { "epoch": 1.0, "logps_train/chosen": -72.2466812133789, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -277.84478759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5973727703094482, "rewards_train/margins": 14.26200795173645, "rewards_train/rejected": -14.859380722045898, "step": 2021 }, { "epoch": 1.0, "learning_rate": 7.985712333108039e-07, "loss": 0.0, "step": 2022 }, { "epoch": 1.0, "logps_train/chosen": -67.35400390625, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -119.5, "logps_train/rejected": -259.279541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4750484228134155, "rewards_train/margins": 13.502761483192444, "rewards_train/rejected": -13.97780990600586, "step": 2022 }, { "epoch": 1.0, "learning_rate": 7.983534991161112e-07, "loss": 0.0002, "step": 2023 }, { "epoch": 1.0, "logps_train/chosen": -64.39552307128906, "logps_train/ref_chosen": -61.15625, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -266.04425048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3227555751800537, "rewards_train/margins": 13.877028703689575, "rewards_train/rejected": -14.199784278869629, "step": 2023 }, { "epoch": 1.0, "learning_rate": 7.981356770244643e-07, "loss": 0.0001, "step": 2024 }, { "epoch": 1.0, "logps_train/chosen": -67.76290893554688, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -265.86053466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3692108690738678, "rewards_train/margins": 13.927000373601913, "rewards_train/rejected": -14.296211242675781, "step": 2024 }, { "epoch": 1.0, "learning_rate": 7.979177671000352e-07, "loss": 0.0004, "step": 2025 }, { "epoch": 1.0, "logps_train/chosen": -66.2415771484375, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -263.18695068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4866582751274109, "rewards_train/margins": 13.678374111652374, "rewards_train/rejected": -14.165032386779785, "step": 2025 }, { "epoch": 1.0, "learning_rate": 7.976997694070218e-07, "loss": 0.0, "step": 2026 }, { "epoch": 1.0, "logps_train/chosen": -71.21742248535156, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -267.5501403808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6864888072013855, "rewards_train/margins": 13.498506844043732, "rewards_train/rejected": -14.184995651245117, "step": 2026 }, { "epoch": 1.0, "learning_rate": 7.974816840096474e-07, "loss": 0.0025, "step": 2027 }, { "epoch": 1.0, "logps_train/chosen": -74.18881225585938, "logps_train/ref_chosen": -69.6875, "logps_train/ref_rejected": -135.875, "logps_train/rejected": -291.64703369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4497411251068115, "rewards_train/margins": 15.12404465675354, "rewards_train/rejected": -15.573785781860352, "step": 2027 }, { "epoch": 1.0, "learning_rate": 7.972635109721616e-07, "loss": 0.0001, "step": 2028 }, { "epoch": 1.0, "logps_train/chosen": -65.82121276855469, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -269.762451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04886035621166229, "rewards_train/margins": 14.368953511118889, "rewards_train/rejected": -14.320093154907227, "step": 2028 }, { "epoch": 1.0, "learning_rate": 7.970452503588397e-07, "loss": 0.0, "step": 2029 }, { "epoch": 1.0, "logps_train/chosen": -67.98855590820312, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -272.62249755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.329958975315094, "rewards_train/margins": 14.172135293483734, "rewards_train/rejected": -14.502094268798828, "step": 2029 }, { "epoch": 1.0, "learning_rate": 7.968269022339824e-07, "loss": 0.0, "step": 2030 }, { "epoch": 1.0, "logps_train/chosen": -69.15369415283203, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -279.12957763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.20348024368286133, "rewards_train/margins": 14.54873514175415, "rewards_train/rejected": -14.752215385437012, "step": 2030 }, { "epoch": 1.0, "learning_rate": 7.966084666619169e-07, "loss": 0.0, "step": 2031 }, { "epoch": 1.0, "logps_train/chosen": -67.99313354492188, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -277.93939208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.04838597774505615, "rewards_train/margins": 15.035592675209045, "rewards_train/rejected": -15.083978652954102, "step": 2031 }, { "epoch": 1.0, "learning_rate": 7.963899437069959e-07, "loss": 0.0, "step": 2032 }, { "epoch": 1.0, "logps_train/chosen": -67.38388061523438, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -272.8568420410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4565522074699402, "rewards_train/margins": 14.192219078540802, "rewards_train/rejected": -14.648771286010742, "step": 2032 }, { "epoch": 1.0, "learning_rate": 7.961713334335972e-07, "loss": 0.0001, "step": 2033 }, { "epoch": 1.0, "logps_train/chosen": -70.58831787109375, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -268.99310302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6236028671264648, "rewards_train/margins": 13.978928565979004, "rewards_train/rejected": -14.602531433105469, "step": 2033 }, { "epoch": 1.0, "learning_rate": 7.959526359061253e-07, "loss": 0.0001, "step": 2034 }, { "epoch": 1.0, "logps_train/chosen": -66.57659149169922, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -276.240478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39144861698150635, "rewards_train/margins": 14.41209065914154, "rewards_train/rejected": -14.803539276123047, "step": 2034 }, { "epoch": 1.0, "learning_rate": 7.957338511890099e-07, "loss": 0.0, "step": 2035 }, { "epoch": 1.0, "logps_train/chosen": -73.62957763671875, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -277.34771728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7278022766113281, "rewards_train/margins": 14.351158142089844, "rewards_train/rejected": -15.078960418701172, "step": 2035 }, { "epoch": 1.0, "learning_rate": 7.955149793467062e-07, "loss": 0.0001, "step": 2036 }, { "epoch": 1.0, "logps_train/chosen": -66.47325897216797, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -265.3789978027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.43848854303359985, "rewards_train/margins": 13.650974690914154, "rewards_train/rejected": -14.089463233947754, "step": 2036 }, { "epoch": 1.0, "learning_rate": 7.952960204436957e-07, "loss": 0.0, "step": 2037 }, { "epoch": 1.0, "logps_train/chosen": -74.65016174316406, "logps_train/ref_chosen": -68.4375, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -280.75787353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6191664934158325, "rewards_train/margins": 14.165018916130066, "rewards_train/rejected": -14.784185409545898, "step": 2037 }, { "epoch": 1.0, "learning_rate": 7.95076974544485e-07, "loss": 0.0001, "step": 2038 }, { "epoch": 1.0, "logps_train/chosen": -69.7919921875, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -275.0008544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34306663274765015, "rewards_train/margins": 14.468933641910553, "rewards_train/rejected": -14.812000274658203, "step": 2038 }, { "epoch": 1.0, "learning_rate": 7.948578417136065e-07, "loss": 0.0, "step": 2039 }, { "epoch": 1.0, "logps_train/chosen": -67.05047607421875, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -263.0897521972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21632686257362366, "rewards_train/margins": 13.858323901891708, "rewards_train/rejected": -14.074650764465332, "step": 2039 }, { "epoch": 1.0, "learning_rate": 7.94638622015618e-07, "loss": 0.0002, "step": 2040 }, { "epoch": 1.0, "logps_train/chosen": -71.0953369140625, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -283.8428955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45108675956726074, "rewards_train/margins": 15.051562547683716, "rewards_train/rejected": -15.502649307250977, "step": 2040 }, { "epoch": 1.0, "learning_rate": 7.944193155151035e-07, "loss": 0.0, "step": 2041 }, { "epoch": 1.0, "logps_train/chosen": -73.92986297607422, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -268.6395568847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7963554859161377, "rewards_train/margins": 13.436008214950562, "rewards_train/rejected": -14.2323637008667, "step": 2041 }, { "epoch": 1.01, "learning_rate": 7.94199922276672e-07, "loss": 0.0002, "step": 2042 }, { "epoch": 1.01, "logps_train/chosen": -73.0815200805664, "logps_train/ref_chosen": -68.75, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -280.7962646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4313449561595917, "rewards_train/margins": 14.403553038835526, "rewards_train/rejected": -14.834897994995117, "step": 2042 }, { "epoch": 1.01, "learning_rate": 7.93980442364958e-07, "loss": 0.0, "step": 2043 }, { "epoch": 1.01, "logps_train/chosen": -65.80908966064453, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -271.449462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3219490647315979, "rewards_train/margins": 14.123631417751312, "rewards_train/rejected": -14.44558048248291, "step": 2043 }, { "epoch": 1.01, "learning_rate": 7.937608758446222e-07, "loss": 0.0001, "step": 2044 }, { "epoch": 1.01, "logps_train/chosen": -73.69696807861328, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -277.7273254394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8140814900398254, "rewards_train/margins": 14.389509856700897, "rewards_train/rejected": -15.203591346740723, "step": 2044 }, { "epoch": 1.01, "learning_rate": 7.935412227803501e-07, "loss": 0.0, "step": 2045 }, { "epoch": 1.01, "logps_train/chosen": -69.1105728149414, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -279.23590087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30461227893829346, "rewards_train/margins": 14.771321177482605, "rewards_train/rejected": -15.075933456420898, "step": 2045 }, { "epoch": 1.01, "learning_rate": 7.933214832368529e-07, "loss": 0.0, "step": 2046 }, { "epoch": 1.01, "logps_train/chosen": -65.1551742553711, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -269.2724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24798814952373505, "rewards_train/margins": 14.139367654919624, "rewards_train/rejected": -14.38735580444336, "step": 2046 }, { "epoch": 1.01, "learning_rate": 7.931016572788675e-07, "loss": 0.0, "step": 2047 }, { "epoch": 1.01, "logps_train/chosen": -68.67803955078125, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -271.6611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.377765417098999, "rewards_train/margins": 14.296993494033813, "rewards_train/rejected": -14.674758911132812, "step": 2047 }, { "epoch": 1.01, "learning_rate": 7.928817449711562e-07, "loss": 0.0001, "step": 2048 }, { "epoch": 1.01, "logps_train/chosen": -69.38228607177734, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -282.7930603027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4763147830963135, "rewards_train/margins": 14.753381490707397, "rewards_train/rejected": -15.229696273803711, "step": 2048 }, { "epoch": 1.01, "learning_rate": 7.926617463785065e-07, "loss": 0.0, "step": 2049 }, { "epoch": 1.01, "logps_train/chosen": -67.04660034179688, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -266.7639465332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28219878673553467, "rewards_train/margins": 14.24390423297882, "rewards_train/rejected": -14.526103019714355, "step": 2049 }, { "epoch": 1.01, "learning_rate": 7.924416615657315e-07, "loss": 0.0002, "step": 2050 }, { "epoch": 1.01, "logps_train/chosen": -71.93721008300781, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -274.1287841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6880564093589783, "rewards_train/margins": 14.057734072208405, "rewards_train/rejected": -14.745790481567383, "step": 2050 }, { "epoch": 1.01, "learning_rate": 7.922214905976697e-07, "loss": 0.0004, "step": 2051 }, { "epoch": 1.01, "logps_train/chosen": -64.62847137451172, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -258.7218933105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3137749135494232, "rewards_train/margins": 13.596744686365128, "rewards_train/rejected": -13.91051959991455, "step": 2051 }, { "epoch": 1.01, "learning_rate": 7.920012335391848e-07, "loss": 0.0001, "step": 2052 }, { "epoch": 1.01, "logps_train/chosen": -72.60322570800781, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -274.08428955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7293654680252075, "rewards_train/margins": 13.969201922416687, "rewards_train/rejected": -14.698567390441895, "step": 2052 }, { "epoch": 1.01, "learning_rate": 7.917808904551662e-07, "loss": 0.0001, "step": 2053 }, { "epoch": 1.01, "logps_train/chosen": -66.91646575927734, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -271.4964599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44316065311431885, "rewards_train/margins": 14.061709761619568, "rewards_train/rejected": -14.504870414733887, "step": 2053 }, { "epoch": 1.01, "learning_rate": 7.915604614105285e-07, "loss": 0.0001, "step": 2054 }, { "epoch": 1.01, "logps_train/chosen": -75.36700439453125, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -286.1429748535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8457334041595459, "rewards_train/margins": 14.649423837661743, "rewards_train/rejected": -15.495157241821289, "step": 2054 }, { "epoch": 1.01, "learning_rate": 7.913399464702113e-07, "loss": 0.0001, "step": 2055 }, { "epoch": 1.01, "logps_train/chosen": -63.22613525390625, "logps_train/ref_chosen": -60.96875, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -259.0081787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22698378562927246, "rewards_train/margins": 13.462748765945435, "rewards_train/rejected": -13.689732551574707, "step": 2055 }, { "epoch": 1.01, "learning_rate": 7.911193456991802e-07, "loss": 0.0002, "step": 2056 }, { "epoch": 1.01, "logps_train/chosen": -67.9413833618164, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -281.8485412597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4379862844944, "rewards_train/margins": 15.015322774648666, "rewards_train/rejected": -15.453309059143066, "step": 2056 }, { "epoch": 1.01, "learning_rate": 7.908986591624252e-07, "loss": 0.0, "step": 2057 }, { "epoch": 1.01, "logps_train/chosen": -72.21430969238281, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -270.992431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.574604868888855, "rewards_train/margins": 14.161940693855286, "rewards_train/rejected": -14.73654556274414, "step": 2057 }, { "epoch": 1.01, "learning_rate": 7.906778869249626e-07, "loss": 0.0, "step": 2058 }, { "epoch": 1.01, "logps_train/chosen": -70.68687438964844, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -282.97589111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47664642333984375, "rewards_train/margins": 14.558052062988281, "rewards_train/rejected": -15.034698486328125, "step": 2058 }, { "epoch": 1.01, "learning_rate": 7.904570290518329e-07, "loss": 0.0, "step": 2059 }, { "epoch": 1.01, "logps_train/chosen": -69.1263656616211, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -269.80487060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6475487947463989, "rewards_train/margins": 14.15618121623993, "rewards_train/rejected": -14.803730010986328, "step": 2059 }, { "epoch": 1.01, "learning_rate": 7.902360856081025e-07, "loss": 0.0002, "step": 2060 }, { "epoch": 1.01, "logps_train/chosen": -70.06541442871094, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -285.7191162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3724101781845093, "rewards_train/margins": 14.992372155189514, "rewards_train/rejected": -15.364782333374023, "step": 2060 }, { "epoch": 1.01, "learning_rate": 7.900150566588628e-07, "loss": 0.0002, "step": 2061 }, { "epoch": 1.01, "logps_train/chosen": -71.10238647460938, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -274.4147644042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5137059688568115, "rewards_train/margins": 14.128553628921509, "rewards_train/rejected": -14.64225959777832, "step": 2061 }, { "epoch": 1.02, "learning_rate": 7.897939422692306e-07, "loss": 0.0, "step": 2062 }, { "epoch": 1.02, "logps_train/chosen": -73.06391906738281, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -287.6861572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.524653971195221, "rewards_train/margins": 15.017208516597748, "rewards_train/rejected": -15.541862487792969, "step": 2062 }, { "epoch": 1.02, "learning_rate": 7.895727425043475e-07, "loss": 0.0, "step": 2063 }, { "epoch": 1.02, "logps_train/chosen": -66.04226684570312, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -278.3106689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3266139626502991, "rewards_train/margins": 14.790489614009857, "rewards_train/rejected": -15.117103576660156, "step": 2063 }, { "epoch": 1.02, "learning_rate": 7.893514574293804e-07, "loss": 0.0002, "step": 2064 }, { "epoch": 1.02, "logps_train/chosen": -63.096900939941406, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -266.23553466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.04264932870864868, "rewards_train/margins": 14.039206445217133, "rewards_train/rejected": -14.081855773925781, "step": 2064 }, { "epoch": 1.02, "learning_rate": 7.891300871095215e-07, "loss": 0.0001, "step": 2065 }, { "epoch": 1.02, "logps_train/chosen": -68.8123779296875, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -260.6327819824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5424193739891052, "rewards_train/margins": 13.257674276828766, "rewards_train/rejected": -13.800093650817871, "step": 2065 }, { "epoch": 1.02, "learning_rate": 7.88908631609988e-07, "loss": 0.0001, "step": 2066 }, { "epoch": 1.02, "logps_train/chosen": -68.66499328613281, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -260.9136047363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6864209771156311, "rewards_train/margins": 13.480867803096771, "rewards_train/rejected": -14.167288780212402, "step": 2066 }, { "epoch": 1.02, "learning_rate": 7.886870909960222e-07, "loss": 0.0001, "step": 2067 }, { "epoch": 1.02, "logps_train/chosen": -69.6841049194336, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -271.0723876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4974139332771301, "rewards_train/margins": 14.10401600599289, "rewards_train/rejected": -14.60142993927002, "step": 2067 }, { "epoch": 1.02, "learning_rate": 7.884654653328913e-07, "loss": 0.0, "step": 2068 }, { "epoch": 1.02, "logps_train/chosen": -64.10549926757812, "logps_train/ref_chosen": -60.6875, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -264.2439270019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3424346446990967, "rewards_train/margins": 13.836451768875122, "rewards_train/rejected": -14.178886413574219, "step": 2068 }, { "epoch": 1.02, "learning_rate": 7.882437546858878e-07, "loss": 0.0001, "step": 2069 }, { "epoch": 1.02, "logps_train/chosen": -68.30276489257812, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -275.31884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4287630617618561, "rewards_train/margins": 14.46845468878746, "rewards_train/rejected": -14.897217750549316, "step": 2069 }, { "epoch": 1.02, "learning_rate": 7.880219591203292e-07, "loss": 0.0, "step": 2070 }, { "epoch": 1.02, "logps_train/chosen": -71.0097427368164, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -280.10015869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5430639386177063, "rewards_train/margins": 14.541271388530731, "rewards_train/rejected": -15.084335327148438, "step": 2070 }, { "epoch": 1.02, "learning_rate": 7.87800078701558e-07, "loss": 0.0001, "step": 2071 }, { "epoch": 1.02, "logps_train/chosen": -70.26461791992188, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -274.74468994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47431308031082153, "rewards_train/margins": 14.233552634716034, "rewards_train/rejected": -14.707865715026855, "step": 2071 }, { "epoch": 1.02, "learning_rate": 7.875781134949416e-07, "loss": 0.0002, "step": 2072 }, { "epoch": 1.02, "logps_train/chosen": -65.67142486572266, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -260.19891357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41850942373275757, "rewards_train/margins": 13.52316004037857, "rewards_train/rejected": -13.941669464111328, "step": 2072 }, { "epoch": 1.02, "learning_rate": 7.873560635658723e-07, "loss": 0.0005, "step": 2073 }, { "epoch": 1.02, "logps_train/chosen": -68.4810562133789, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -266.7819519042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5681251287460327, "rewards_train/margins": 13.794786095619202, "rewards_train/rejected": -14.362911224365234, "step": 2073 }, { "epoch": 1.02, "learning_rate": 7.871339289797679e-07, "loss": 0.0003, "step": 2074 }, { "epoch": 1.02, "logps_train/chosen": -70.76115417480469, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -133.125, "logps_train/rejected": -291.09765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5115152597427368, "rewards_train/margins": 15.282042860984802, "rewards_train/rejected": -15.793558120727539, "step": 2074 }, { "epoch": 1.02, "learning_rate": 7.869117098020704e-07, "loss": 0.0, "step": 2075 }, { "epoch": 1.02, "logps_train/chosen": -72.4889144897461, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -278.809326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9487940073013306, "rewards_train/margins": 14.101184248924255, "rewards_train/rejected": -15.049978256225586, "step": 2075 }, { "epoch": 1.02, "learning_rate": 7.866894060982472e-07, "loss": 0.0, "step": 2076 }, { "epoch": 1.02, "logps_train/chosen": -71.83372497558594, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -272.4573669433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6531474590301514, "rewards_train/margins": 14.038047075271606, "rewards_train/rejected": -14.691194534301758, "step": 2076 }, { "epoch": 1.02, "learning_rate": 7.864670179337903e-07, "loss": 0.0003, "step": 2077 }, { "epoch": 1.02, "logps_train/chosen": -70.08995056152344, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -274.12786865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3939557671546936, "rewards_train/margins": 14.219615399837494, "rewards_train/rejected": -14.613571166992188, "step": 2077 }, { "epoch": 1.02, "learning_rate": 7.86244545374217e-07, "loss": 0.0001, "step": 2078 }, { "epoch": 1.02, "logps_train/chosen": -69.65383911132812, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -275.11785888671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6076444983482361, "rewards_train/margins": 14.385489046573639, "rewards_train/rejected": -14.993133544921875, "step": 2078 }, { "epoch": 1.02, "learning_rate": 7.860219884850692e-07, "loss": 0.0, "step": 2079 }, { "epoch": 1.02, "logps_train/chosen": -73.6356201171875, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -272.90386962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8897829651832581, "rewards_train/margins": 13.92570298910141, "rewards_train/rejected": -14.815485954284668, "step": 2079 }, { "epoch": 1.02, "learning_rate": 7.857993473319136e-07, "loss": 0.0001, "step": 2080 }, { "epoch": 1.02, "logps_train/chosen": -75.04682922363281, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -287.850830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.896821916103363, "rewards_train/margins": 14.900175154209137, "rewards_train/rejected": -15.7969970703125, "step": 2080 }, { "epoch": 1.02, "learning_rate": 7.855766219803417e-07, "loss": 0.0001, "step": 2081 }, { "epoch": 1.02, "logps_train/chosen": -67.7115478515625, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -271.2366027832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3001105487346649, "rewards_train/margins": 14.431556969881058, "rewards_train/rejected": -14.731667518615723, "step": 2081 }, { "epoch": 1.03, "learning_rate": 7.8535381249597e-07, "loss": 0.0, "step": 2082 }, { "epoch": 1.03, "logps_train/chosen": -67.71617126464844, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -277.277587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5174176692962646, "rewards_train/margins": 14.660046815872192, "rewards_train/rejected": -15.177464485168457, "step": 2082 }, { "epoch": 1.03, "learning_rate": 7.851309189444396e-07, "loss": 0.0, "step": 2083 }, { "epoch": 1.03, "logps_train/chosen": -67.77412414550781, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -273.6763610839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5701859593391418, "rewards_train/margins": 14.293055236339569, "rewards_train/rejected": -14.863241195678711, "step": 2083 }, { "epoch": 1.03, "learning_rate": 7.849079413914164e-07, "loss": 0.0, "step": 2084 }, { "epoch": 1.03, "logps_train/chosen": -75.00360107421875, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -285.21624755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6748226881027222, "rewards_train/margins": 14.746169209480286, "rewards_train/rejected": -15.420991897583008, "step": 2084 }, { "epoch": 1.03, "learning_rate": 7.846848799025914e-07, "loss": 0.0, "step": 2085 }, { "epoch": 1.03, "logps_train/chosen": -68.60459899902344, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -264.760009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5876567363739014, "rewards_train/margins": 13.708656549453735, "rewards_train/rejected": -14.296313285827637, "step": 2085 }, { "epoch": 1.03, "learning_rate": 7.844617345436795e-07, "loss": 0.0004, "step": 2086 }, { "epoch": 1.03, "logps_train/chosen": -69.1516342163086, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -279.697998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38420629501342773, "rewards_train/margins": 14.785398960113525, "rewards_train/rejected": -15.169605255126953, "step": 2086 }, { "epoch": 1.03, "learning_rate": 7.842385053804213e-07, "loss": 0.0, "step": 2087 }, { "epoch": 1.03, "logps_train/chosen": -66.81232452392578, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -275.21173095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22290733456611633, "rewards_train/margins": 14.610764235258102, "rewards_train/rejected": -14.833671569824219, "step": 2087 }, { "epoch": 1.03, "learning_rate": 7.840151924785815e-07, "loss": 0.0001, "step": 2088 }, { "epoch": 1.03, "logps_train/chosen": -73.94554901123047, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -281.1990966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.864378809928894, "rewards_train/margins": 14.611195683479309, "rewards_train/rejected": -15.475574493408203, "step": 2088 }, { "epoch": 1.03, "learning_rate": 7.837917959039494e-07, "loss": 0.0, "step": 2089 }, { "epoch": 1.03, "logps_train/chosen": -65.39088439941406, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -265.31396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21003586053848267, "rewards_train/margins": 13.876145780086517, "rewards_train/rejected": -14.086181640625, "step": 2089 }, { "epoch": 1.03, "learning_rate": 7.835683157223393e-07, "loss": 0.0003, "step": 2090 }, { "epoch": 1.03, "logps_train/chosen": -71.4622573852539, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -282.51861572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5424657464027405, "rewards_train/margins": 14.788299977779388, "rewards_train/rejected": -15.330765724182129, "step": 2090 }, { "epoch": 1.03, "learning_rate": 7.833447519995899e-07, "loss": 0.0, "step": 2091 }, { "epoch": 1.03, "logps_train/chosen": -73.00332641601562, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -278.9966735839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7371006608009338, "rewards_train/margins": 14.552508294582367, "rewards_train/rejected": -15.2896089553833, "step": 2091 }, { "epoch": 1.03, "learning_rate": 7.831211048015648e-07, "loss": 0.0, "step": 2092 }, { "epoch": 1.03, "logps_train/chosen": -69.35911560058594, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -272.6388854980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5762439966201782, "rewards_train/margins": 14.571092963218689, "rewards_train/rejected": -15.147336959838867, "step": 2092 }, { "epoch": 1.03, "learning_rate": 7.828973741941516e-07, "loss": 0.0003, "step": 2093 }, { "epoch": 1.03, "logps_train/chosen": -68.84938049316406, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -266.6551818847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5327408313751221, "rewards_train/margins": 13.937415838241577, "rewards_train/rejected": -14.4701566696167, "step": 2093 }, { "epoch": 1.03, "learning_rate": 7.826735602432632e-07, "loss": 0.0009, "step": 2094 }, { "epoch": 1.03, "logps_train/chosen": -72.47228240966797, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -288.5685119628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7566033601760864, "rewards_train/margins": 14.893560290336609, "rewards_train/rejected": -15.650163650512695, "step": 2094 }, { "epoch": 1.03, "learning_rate": 7.824496630148364e-07, "loss": 0.0001, "step": 2095 }, { "epoch": 1.03, "logps_train/chosen": -70.70433044433594, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -283.3510437011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5339587330818176, "rewards_train/margins": 14.88268917798996, "rewards_train/rejected": -15.416647911071777, "step": 2095 }, { "epoch": 1.03, "learning_rate": 7.82225682574833e-07, "loss": 0.0, "step": 2096 }, { "epoch": 1.03, "logps_train/chosen": -68.04135131835938, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -265.8819274902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41785579919815063, "rewards_train/margins": 13.95339149236679, "rewards_train/rejected": -14.371247291564941, "step": 2096 }, { "epoch": 1.03, "learning_rate": 7.820016189892391e-07, "loss": 0.0002, "step": 2097 }, { "epoch": 1.03, "logps_train/chosen": -70.83512878417969, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -260.4759826660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265307307243347, "rewards_train/margins": 13.253783524036407, "rewards_train/rejected": -13.980314254760742, "step": 2097 }, { "epoch": 1.03, "learning_rate": 7.817774723240656e-07, "loss": 0.0001, "step": 2098 }, { "epoch": 1.03, "logps_train/chosen": -72.77765655517578, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -273.1829833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.045734167098999, "rewards_train/margins": 13.855180025100708, "rewards_train/rejected": -14.900914192199707, "step": 2098 }, { "epoch": 1.03, "learning_rate": 7.815532426453471e-07, "loss": 0.0005, "step": 2099 }, { "epoch": 1.03, "logps_train/chosen": -72.16216278076172, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -276.87872314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7365288138389587, "rewards_train/margins": 14.24782806634903, "rewards_train/rejected": -14.984356880187988, "step": 2099 }, { "epoch": 1.03, "learning_rate": 7.813289300191434e-07, "loss": 0.0002, "step": 2100 }, { "epoch": 1.03, "logps_train/chosen": -68.25205993652344, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -271.44744873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3796497881412506, "rewards_train/margins": 14.321441978216171, "rewards_train/rejected": -14.701091766357422, "step": 2100 }, { "epoch": 1.03, "learning_rate": 7.811045345115388e-07, "loss": 0.0001, "step": 2101 }, { "epoch": 1.03, "logps_train/chosen": -69.79418182373047, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -269.2944641113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4833733141422272, "rewards_train/margins": 13.884061187505722, "rewards_train/rejected": -14.36743450164795, "step": 2101 }, { "epoch": 1.03, "learning_rate": 7.808800561886415e-07, "loss": 0.0002, "step": 2102 }, { "epoch": 1.03, "logps_train/chosen": -70.3338623046875, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -270.81072998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6720091700553894, "rewards_train/margins": 14.037190735340118, "rewards_train/rejected": -14.709199905395508, "step": 2102 }, { "epoch": 1.04, "learning_rate": 7.806554951165843e-07, "loss": 0.0, "step": 2103 }, { "epoch": 1.04, "logps_train/chosen": -69.98858642578125, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -269.9502868652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.784552276134491, "rewards_train/margins": 14.004519760608673, "rewards_train/rejected": -14.789072036743164, "step": 2103 }, { "epoch": 1.04, "learning_rate": 7.804308513615243e-07, "loss": 0.0001, "step": 2104 }, { "epoch": 1.04, "logps_train/chosen": -69.31436157226562, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -272.6512451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4664458632469177, "rewards_train/margins": 14.31347280740738, "rewards_train/rejected": -14.779918670654297, "step": 2104 }, { "epoch": 1.04, "learning_rate": 7.802061249896434e-07, "loss": 0.0001, "step": 2105 }, { "epoch": 1.04, "logps_train/chosen": -67.15548706054688, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -261.52655029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17668141424655914, "rewards_train/margins": 13.806883916258812, "rewards_train/rejected": -13.983565330505371, "step": 2105 }, { "epoch": 1.04, "learning_rate": 7.799813160671471e-07, "loss": 0.0001, "step": 2106 }, { "epoch": 1.04, "logps_train/chosen": -70.2767333984375, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -279.30670166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5501340627670288, "rewards_train/margins": 14.634050011634827, "rewards_train/rejected": -15.184184074401855, "step": 2106 }, { "epoch": 1.04, "learning_rate": 7.797564246602661e-07, "loss": 0.0, "step": 2107 }, { "epoch": 1.04, "logps_train/chosen": -68.16631317138672, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -275.47552490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3351859152317047, "rewards_train/margins": 14.515197843313217, "rewards_train/rejected": -14.850383758544922, "step": 2107 }, { "epoch": 1.04, "learning_rate": 7.795314508352546e-07, "loss": 0.0, "step": 2108 }, { "epoch": 1.04, "logps_train/chosen": -68.960693359375, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -275.0086669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5325922966003418, "rewards_train/margins": 14.273788928985596, "rewards_train/rejected": -14.806381225585938, "step": 2108 }, { "epoch": 1.04, "learning_rate": 7.793063946583913e-07, "loss": 0.0001, "step": 2109 }, { "epoch": 1.04, "logps_train/chosen": -69.78101348876953, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -267.528564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6514413356781006, "rewards_train/margins": 13.85922646522522, "rewards_train/rejected": -14.51066780090332, "step": 2109 }, { "epoch": 1.04, "learning_rate": 7.790812561959797e-07, "loss": 0.0021, "step": 2110 }, { "epoch": 1.04, "logps_train/chosen": -68.83575439453125, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -272.7232971191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5081360936164856, "rewards_train/margins": 14.31546276807785, "rewards_train/rejected": -14.823598861694336, "step": 2110 }, { "epoch": 1.04, "learning_rate": 7.788560355143465e-07, "loss": 0.0001, "step": 2111 }, { "epoch": 1.04, "logps_train/chosen": -67.89836883544922, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -266.6068420410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3705986738204956, "rewards_train/margins": 14.02763283252716, "rewards_train/rejected": -14.398231506347656, "step": 2111 }, { "epoch": 1.04, "learning_rate": 7.786307326798439e-07, "loss": 0.0002, "step": 2112 }, { "epoch": 1.04, "logps_train/chosen": -73.27362823486328, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -282.5904541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6062691807746887, "rewards_train/margins": 14.483440101146698, "rewards_train/rejected": -15.089709281921387, "step": 2112 }, { "epoch": 1.04, "learning_rate": 7.784053477588472e-07, "loss": 0.0001, "step": 2113 }, { "epoch": 1.04, "logps_train/chosen": -67.8175277709961, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -270.12432861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5550434589385986, "rewards_train/margins": 14.095574140548706, "rewards_train/rejected": -14.650617599487305, "step": 2113 }, { "epoch": 1.04, "learning_rate": 7.781798808177564e-07, "loss": 0.0003, "step": 2114 }, { "epoch": 1.04, "logps_train/chosen": -69.86941528320312, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -279.06964111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5024687647819519, "rewards_train/margins": 14.672268211841583, "rewards_train/rejected": -15.174736976623535, "step": 2114 }, { "epoch": 1.04, "learning_rate": 7.779543319229958e-07, "loss": 0.0006, "step": 2115 }, { "epoch": 1.04, "logps_train/chosen": -65.25885009765625, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -279.22308349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16182224452495575, "rewards_train/margins": 15.214099958539009, "rewards_train/rejected": -15.375922203063965, "step": 2115 }, { "epoch": 1.04, "learning_rate": 7.777287011410132e-07, "loss": 0.0, "step": 2116 }, { "epoch": 1.04, "logps_train/chosen": -68.287109375, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -268.7557678222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42758768796920776, "rewards_train/margins": 14.109609425067902, "rewards_train/rejected": -14.53719711303711, "step": 2116 }, { "epoch": 1.04, "learning_rate": 7.775029885382813e-07, "loss": 0.0, "step": 2117 }, { "epoch": 1.04, "logps_train/chosen": -69.72555541992188, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -271.1637268066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.43661782145500183, "rewards_train/margins": 14.14396384358406, "rewards_train/rejected": -14.580581665039062, "step": 2117 }, { "epoch": 1.04, "learning_rate": 7.772771941812965e-07, "loss": 0.0002, "step": 2118 }, { "epoch": 1.04, "logps_train/chosen": -68.33045959472656, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -272.4327087402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45362716913223267, "rewards_train/margins": 14.445550382137299, "rewards_train/rejected": -14.899177551269531, "step": 2118 }, { "epoch": 1.04, "learning_rate": 7.770513181365792e-07, "loss": 0.0001, "step": 2119 }, { "epoch": 1.04, "logps_train/chosen": -74.3531494140625, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -273.44317626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.936633288860321, "rewards_train/margins": 13.943766415119171, "rewards_train/rejected": -14.880399703979492, "step": 2119 }, { "epoch": 1.04, "learning_rate": 7.768253604706743e-07, "loss": 0.0002, "step": 2120 }, { "epoch": 1.04, "logps_train/chosen": -66.12690734863281, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -267.4786376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32689905166625977, "rewards_train/margins": 14.07579755783081, "rewards_train/rejected": -14.40269660949707, "step": 2120 }, { "epoch": 1.04, "learning_rate": 7.765993212501501e-07, "loss": 0.0003, "step": 2121 }, { "epoch": 1.04, "logps_train/chosen": -67.16130065917969, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -267.74774169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4803396761417389, "rewards_train/margins": 14.269580215215683, "rewards_train/rejected": -14.749919891357422, "step": 2121 }, { "epoch": 1.04, "learning_rate": 7.763732005415993e-07, "loss": 0.0, "step": 2122 }, { "epoch": 1.04, "logps_train/chosen": -71.68844604492188, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -275.9435729980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640102863311768, "rewards_train/margins": 14.239331007003784, "rewards_train/rejected": -14.903341293334961, "step": 2122 }, { "epoch": 1.05, "learning_rate": 7.761469984116388e-07, "loss": 0.0001, "step": 2123 }, { "epoch": 1.05, "logps_train/chosen": -65.59587097167969, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -271.82269287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1979179084300995, "rewards_train/margins": 14.559110671281815, "rewards_train/rejected": -14.757028579711914, "step": 2123 }, { "epoch": 1.05, "learning_rate": 7.759207149269093e-07, "loss": 0.0, "step": 2124 }, { "epoch": 1.05, "logps_train/chosen": -67.51579284667969, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -269.7127685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34322935342788696, "rewards_train/margins": 14.15763658285141, "rewards_train/rejected": -14.500865936279297, "step": 2124 }, { "epoch": 1.05, "learning_rate": 7.756943501540754e-07, "loss": 0.0, "step": 2125 }, { "epoch": 1.05, "logps_train/chosen": -70.01943969726562, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -268.4726867675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33597737550735474, "rewards_train/margins": 13.9788219332695, "rewards_train/rejected": -14.314799308776855, "step": 2125 }, { "epoch": 1.05, "learning_rate": 7.754679041598256e-07, "loss": 0.0, "step": 2126 }, { "epoch": 1.05, "logps_train/chosen": -70.53466033935547, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -268.14324951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5495111346244812, "rewards_train/margins": 13.939817249774933, "rewards_train/rejected": -14.489328384399414, "step": 2126 }, { "epoch": 1.05, "learning_rate": 7.752413770108723e-07, "loss": 0.0001, "step": 2127 }, { "epoch": 1.05, "logps_train/chosen": -69.9388427734375, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -275.61773681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6879271268844604, "rewards_train/margins": 14.353288769721985, "rewards_train/rejected": -15.041215896606445, "step": 2127 }, { "epoch": 1.05, "learning_rate": 7.750147687739521e-07, "loss": 0.0001, "step": 2128 }, { "epoch": 1.05, "logps_train/chosen": -71.47776794433594, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -280.8486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8152816295623779, "rewards_train/margins": 14.819485902786255, "rewards_train/rejected": -15.634767532348633, "step": 2128 }, { "epoch": 1.05, "learning_rate": 7.747880795158252e-07, "loss": 0.0, "step": 2129 }, { "epoch": 1.05, "logps_train/chosen": -73.34039306640625, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -271.91192626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7994197010993958, "rewards_train/margins": 13.705934226512909, "rewards_train/rejected": -14.505353927612305, "step": 2129 }, { "epoch": 1.05, "learning_rate": 7.745613093032761e-07, "loss": 0.0, "step": 2130 }, { "epoch": 1.05, "logps_train/chosen": -70.06968688964844, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -275.54058837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4194934070110321, "rewards_train/margins": 14.621186524629593, "rewards_train/rejected": -15.040679931640625, "step": 2130 }, { "epoch": 1.05, "learning_rate": 7.743344582031124e-07, "loss": 0.0001, "step": 2131 }, { "epoch": 1.05, "logps_train/chosen": -70.54854583740234, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -271.158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5286824107170105, "rewards_train/margins": 13.93601256608963, "rewards_train/rejected": -14.46469497680664, "step": 2131 }, { "epoch": 1.05, "learning_rate": 7.741075262821664e-07, "loss": 0.0004, "step": 2132 }, { "epoch": 1.05, "logps_train/chosen": -69.06425476074219, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -272.2401123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4492231607437134, "rewards_train/margins": 14.10008180141449, "rewards_train/rejected": -14.549304962158203, "step": 2132 }, { "epoch": 1.05, "learning_rate": 7.738805136072933e-07, "loss": 0.0, "step": 2133 }, { "epoch": 1.05, "logps_train/chosen": -69.36630249023438, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -283.22564697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4026464819908142, "rewards_train/margins": 15.003707468509674, "rewards_train/rejected": -15.406353950500488, "step": 2133 }, { "epoch": 1.05, "learning_rate": 7.73653420245373e-07, "loss": 0.0, "step": 2134 }, { "epoch": 1.05, "logps_train/chosen": -71.59491729736328, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -276.8739318847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7610054016113281, "rewards_train/margins": 14.449092864990234, "rewards_train/rejected": -15.210098266601562, "step": 2134 }, { "epoch": 1.05, "learning_rate": 7.734262462633084e-07, "loss": 0.0001, "step": 2135 }, { "epoch": 1.05, "logps_train/chosen": -68.92898559570312, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -279.9677429199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46169722080230713, "rewards_train/margins": 14.74313485622406, "rewards_train/rejected": -15.204832077026367, "step": 2135 }, { "epoch": 1.05, "learning_rate": 7.731989917280266e-07, "loss": 0.0001, "step": 2136 }, { "epoch": 1.05, "logps_train/chosen": -67.88386535644531, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -267.3867492675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4604575037956238, "rewards_train/margins": 14.059027969837189, "rewards_train/rejected": -14.519485473632812, "step": 2136 }, { "epoch": 1.05, "learning_rate": 7.729716567064786e-07, "loss": 0.0002, "step": 2137 }, { "epoch": 1.05, "logps_train/chosen": -72.37164306640625, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -286.0143127441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5688545107841492, "rewards_train/margins": 14.91348534822464, "rewards_train/rejected": -15.482339859008789, "step": 2137 }, { "epoch": 1.05, "learning_rate": 7.727442412656384e-07, "loss": 0.0003, "step": 2138 }, { "epoch": 1.05, "logps_train/chosen": -69.03767395019531, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -275.419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4407792091369629, "rewards_train/margins": 14.556092739105225, "rewards_train/rejected": -14.996871948242188, "step": 2138 }, { "epoch": 1.05, "learning_rate": 7.725167454725043e-07, "loss": 0.0007, "step": 2139 }, { "epoch": 1.05, "logps_train/chosen": -65.84811401367188, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -270.77545166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3590788245201111, "rewards_train/margins": 14.37286239862442, "rewards_train/rejected": -14.731941223144531, "step": 2139 }, { "epoch": 1.05, "learning_rate": 7.722891693940983e-07, "loss": 0.0001, "step": 2140 }, { "epoch": 1.05, "logps_train/chosen": -66.577392578125, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -274.5051574707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3050532341003418, "rewards_train/margins": 14.410990238189697, "rewards_train/rejected": -14.716043472290039, "step": 2140 }, { "epoch": 1.05, "learning_rate": 7.720615130974654e-07, "loss": 0.0001, "step": 2141 }, { "epoch": 1.05, "logps_train/chosen": -67.19596099853516, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -264.5924377441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30702272057533264, "rewards_train/margins": 13.80314764380455, "rewards_train/rejected": -14.110170364379883, "step": 2141 }, { "epoch": 1.05, "learning_rate": 7.71833776649675e-07, "loss": 0.0, "step": 2142 }, { "epoch": 1.05, "logps_train/chosen": -69.56468200683594, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -263.60467529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.724851131439209, "rewards_train/margins": 13.631661891937256, "rewards_train/rejected": -14.356513023376465, "step": 2142 }, { "epoch": 1.06, "learning_rate": 7.716059601178197e-07, "loss": 0.0001, "step": 2143 }, { "epoch": 1.06, "logps_train/chosen": -72.15292358398438, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -133.25, "logps_train/rejected": -295.4969177246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5165621638298035, "rewards_train/margins": 15.708226501941681, "rewards_train/rejected": -16.224788665771484, "step": 2143 }, { "epoch": 1.06, "learning_rate": 7.713780635690161e-07, "loss": 0.0, "step": 2144 }, { "epoch": 1.06, "logps_train/chosen": -71.58599090576172, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -273.664794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6723198294639587, "rewards_train/margins": 14.373359262943268, "rewards_train/rejected": -15.045679092407227, "step": 2144 }, { "epoch": 1.06, "learning_rate": 7.711500870704035e-07, "loss": 0.0008, "step": 2145 }, { "epoch": 1.06, "logps_train/chosen": -71.56194305419922, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -274.68756103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47816669940948486, "rewards_train/margins": 14.203773379325867, "rewards_train/rejected": -14.681940078735352, "step": 2145 }, { "epoch": 1.06, "learning_rate": 7.709220306891457e-07, "loss": 0.0, "step": 2146 }, { "epoch": 1.06, "logps_train/chosen": -67.97944641113281, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -273.3721923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4332965016365051, "rewards_train/margins": 14.390786826610565, "rewards_train/rejected": -14.82408332824707, "step": 2146 }, { "epoch": 1.06, "learning_rate": 7.706938944924293e-07, "loss": 0.0002, "step": 2147 }, { "epoch": 1.06, "logps_train/chosen": -69.53817749023438, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -278.15948486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4544040858745575, "rewards_train/margins": 14.747286587953568, "rewards_train/rejected": -15.201690673828125, "step": 2147 }, { "epoch": 1.06, "learning_rate": 7.704656785474652e-07, "loss": 0.0001, "step": 2148 }, { "epoch": 1.06, "logps_train/chosen": -66.92849731445312, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -274.76708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21721448004245758, "rewards_train/margins": 14.289232358336449, "rewards_train/rejected": -14.506446838378906, "step": 2148 }, { "epoch": 1.06, "learning_rate": 7.702373829214872e-07, "loss": 0.0002, "step": 2149 }, { "epoch": 1.06, "logps_train/chosen": -75.85780334472656, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -271.8516845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9778703451156616, "rewards_train/margins": 13.692650437355042, "rewards_train/rejected": -14.670520782470703, "step": 2149 }, { "epoch": 1.06, "learning_rate": 7.700090076817528e-07, "loss": 0.0001, "step": 2150 }, { "epoch": 1.06, "logps_train/chosen": -66.01493835449219, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -275.174560546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3491494655609131, "rewards_train/margins": 14.454826593399048, "rewards_train/rejected": -14.803976058959961, "step": 2150 }, { "epoch": 1.06, "learning_rate": 7.697805528955425e-07, "loss": 0.0002, "step": 2151 }, { "epoch": 1.06, "logps_train/chosen": -71.82884979248047, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -272.8580627441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.43132251501083374, "rewards_train/margins": 14.076602518558502, "rewards_train/rejected": -14.507925033569336, "step": 2151 }, { "epoch": 1.06, "learning_rate": 7.695520186301612e-07, "loss": 0.0003, "step": 2152 }, { "epoch": 1.06, "logps_train/chosen": -73.87030029296875, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -276.3402099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7285584211349487, "rewards_train/margins": 14.449260830879211, "rewards_train/rejected": -15.17781925201416, "step": 2152 }, { "epoch": 1.06, "learning_rate": 7.693234049529362e-07, "loss": 0.0001, "step": 2153 }, { "epoch": 1.06, "logps_train/chosen": -69.79986572265625, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -282.75823974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6304985880851746, "rewards_train/margins": 14.88428908586502, "rewards_train/rejected": -15.514787673950195, "step": 2153 }, { "epoch": 1.06, "learning_rate": 7.690947119312188e-07, "loss": 0.0, "step": 2154 }, { "epoch": 1.06, "logps_train/chosen": -66.30612182617188, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -270.06427001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.300338476896286, "rewards_train/margins": 14.412973672151566, "rewards_train/rejected": -14.713312149047852, "step": 2154 }, { "epoch": 1.06, "learning_rate": 7.688659396323833e-07, "loss": 0.0005, "step": 2155 }, { "epoch": 1.06, "logps_train/chosen": -65.00347137451172, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -266.14996337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10884290933609009, "rewards_train/margins": 14.132811486721039, "rewards_train/rejected": -14.241654396057129, "step": 2155 }, { "epoch": 1.06, "learning_rate": 7.68637088123828e-07, "loss": 0.0002, "step": 2156 }, { "epoch": 1.06, "logps_train/chosen": -71.90449523925781, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -117.4375, "logps_train/rejected": -260.9521789550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8592973351478577, "rewards_train/margins": 13.493441998958588, "rewards_train/rejected": -14.352739334106445, "step": 2156 }, { "epoch": 1.06, "learning_rate": 7.684081574729737e-07, "loss": 0.0006, "step": 2157 }, { "epoch": 1.06, "logps_train/chosen": -71.77278137207031, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -281.28204345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5916821956634521, "rewards_train/margins": 14.698728799819946, "rewards_train/rejected": -15.290410995483398, "step": 2157 }, { "epoch": 1.06, "learning_rate": 7.68179147747265e-07, "loss": 0.0, "step": 2158 }, { "epoch": 1.06, "logps_train/chosen": -69.66326141357422, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -275.4652099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.403142511844635, "rewards_train/margins": 14.478924214839935, "rewards_train/rejected": -14.88206672668457, "step": 2158 }, { "epoch": 1.06, "learning_rate": 7.679500590141699e-07, "loss": 0.0, "step": 2159 }, { "epoch": 1.06, "logps_train/chosen": -69.58531188964844, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -268.64642333984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5848493576049805, "rewards_train/margins": 14.275494575500488, "rewards_train/rejected": -14.860343933105469, "step": 2159 }, { "epoch": 1.06, "learning_rate": 7.677208913411792e-07, "loss": 0.0, "step": 2160 }, { "epoch": 1.06, "logps_train/chosen": -68.89625549316406, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -267.97125244140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5225845575332642, "rewards_train/margins": 14.126589179039001, "rewards_train/rejected": -14.649173736572266, "step": 2160 }, { "epoch": 1.06, "learning_rate": 7.674916447958075e-07, "loss": 0.0002, "step": 2161 }, { "epoch": 1.06, "logps_train/chosen": -69.51495361328125, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -278.76788330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2942679226398468, "rewards_train/margins": 14.701662749052048, "rewards_train/rejected": -14.995930671691895, "step": 2161 }, { "epoch": 1.06, "learning_rate": 7.672623194455923e-07, "loss": 0.0, "step": 2162 }, { "epoch": 1.06, "logps_train/chosen": -70.61734771728516, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -267.712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4912758469581604, "rewards_train/margins": 14.192565858364105, "rewards_train/rejected": -14.683841705322266, "step": 2162 }, { "epoch": 1.06, "learning_rate": 7.670329153580942e-07, "loss": 0.0001, "step": 2163 }, { "epoch": 1.06, "logps_train/chosen": -69.39836120605469, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -268.1368713378906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2852464020252228, "rewards_train/margins": 14.027219265699387, "rewards_train/rejected": -14.31246566772461, "step": 2163 }, { "epoch": 1.07, "learning_rate": 7.668034326008975e-07, "loss": 0.0001, "step": 2164 }, { "epoch": 1.07, "logps_train/chosen": -65.4171142578125, "logps_train/ref_chosen": -60.65625, "logps_train/ref_rejected": -119.3125, "logps_train/rejected": -252.8903350830078, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4772586226463318, "rewards_train/margins": 12.880867063999176, "rewards_train/rejected": -13.358125686645508, "step": 2164 }, { "epoch": 1.07, "learning_rate": 7.665738712416093e-07, "loss": 0.0005, "step": 2165 }, { "epoch": 1.07, "logps_train/chosen": -76.052978515625, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -285.87841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7779537439346313, "rewards_train/margins": 14.682449698448181, "rewards_train/rejected": -15.460403442382812, "step": 2165 }, { "epoch": 1.07, "learning_rate": 7.663442313478597e-07, "loss": 0.0, "step": 2166 }, { "epoch": 1.07, "logps_train/chosen": -70.63798522949219, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -281.6455993652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5250290036201477, "rewards_train/margins": 15.060721576213837, "rewards_train/rejected": -15.585750579833984, "step": 2166 }, { "epoch": 1.07, "learning_rate": 7.661145129873024e-07, "loss": 0.0, "step": 2167 }, { "epoch": 1.07, "logps_train/chosen": -69.7549057006836, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -268.4823303222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5297390818595886, "rewards_train/margins": 13.98987990617752, "rewards_train/rejected": -14.51961898803711, "step": 2167 }, { "epoch": 1.07, "learning_rate": 7.658847162276138e-07, "loss": 0.0, "step": 2168 }, { "epoch": 1.07, "logps_train/chosen": -71.43487548828125, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -277.4527282714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5035704970359802, "rewards_train/margins": 14.466164648532867, "rewards_train/rejected": -14.969735145568848, "step": 2168 }, { "epoch": 1.07, "learning_rate": 7.656548411364938e-07, "loss": 0.0, "step": 2169 }, { "epoch": 1.07, "logps_train/chosen": -70.05133056640625, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -282.5501403808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5193414688110352, "rewards_train/margins": 15.084794044494629, "rewards_train/rejected": -15.604135513305664, "step": 2169 }, { "epoch": 1.07, "learning_rate": 7.654248877816651e-07, "loss": 0.0, "step": 2170 }, { "epoch": 1.07, "logps_train/chosen": -70.29098510742188, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -280.8968505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45854178071022034, "rewards_train/margins": 14.739929288625717, "rewards_train/rejected": -15.198471069335938, "step": 2170 }, { "epoch": 1.07, "learning_rate": 7.651948562308733e-07, "loss": 0.0, "step": 2171 }, { "epoch": 1.07, "logps_train/chosen": -69.12000274658203, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -273.0585021972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6794809103012085, "rewards_train/margins": 14.243069291114807, "rewards_train/rejected": -14.922550201416016, "step": 2171 }, { "epoch": 1.07, "learning_rate": 7.649647465518874e-07, "loss": 0.0003, "step": 2172 }, { "epoch": 1.07, "logps_train/chosen": -72.80693817138672, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -279.3570556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8364067673683167, "rewards_train/margins": 14.544269502162933, "rewards_train/rejected": -15.38067626953125, "step": 2172 }, { "epoch": 1.07, "learning_rate": 7.647345588124992e-07, "loss": 0.0, "step": 2173 }, { "epoch": 1.07, "logps_train/chosen": -74.79164123535156, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -270.52532958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.859633207321167, "rewards_train/margins": 13.9443199634552, "rewards_train/rejected": -14.803953170776367, "step": 2173 }, { "epoch": 1.07, "learning_rate": 7.645042930805237e-07, "loss": 0.0001, "step": 2174 }, { "epoch": 1.07, "logps_train/chosen": -70.890625, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -281.746337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.556250274181366, "rewards_train/margins": 14.596214592456818, "rewards_train/rejected": -15.152464866638184, "step": 2174 }, { "epoch": 1.07, "learning_rate": 7.642739494237985e-07, "loss": 0.0001, "step": 2175 }, { "epoch": 1.07, "logps_train/chosen": -68.52561950683594, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -277.12542724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47297203540802, "rewards_train/margins": 14.717598795890808, "rewards_train/rejected": -15.190570831298828, "step": 2175 }, { "epoch": 1.07, "learning_rate": 7.640435279101847e-07, "loss": 0.0001, "step": 2176 }, { "epoch": 1.07, "logps_train/chosen": -71.70915222167969, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -287.6055908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5407878160476685, "rewards_train/margins": 15.38402545452118, "rewards_train/rejected": -15.924813270568848, "step": 2176 }, { "epoch": 1.07, "learning_rate": 7.638130286075658e-07, "loss": 0.0, "step": 2177 }, { "epoch": 1.07, "logps_train/chosen": -71.5400390625, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -282.40106201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8171381950378418, "rewards_train/margins": 14.491032123565674, "rewards_train/rejected": -15.308170318603516, "step": 2177 }, { "epoch": 1.07, "learning_rate": 7.635824515838484e-07, "loss": 0.0001, "step": 2178 }, { "epoch": 1.07, "logps_train/chosen": -73.19953918457031, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -288.97412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.49617502093315125, "rewards_train/margins": 15.436100751161575, "rewards_train/rejected": -15.932275772094727, "step": 2178 }, { "epoch": 1.07, "learning_rate": 7.633517969069625e-07, "loss": 0.0, "step": 2179 }, { "epoch": 1.07, "logps_train/chosen": -73.30648803710938, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -271.7459411621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5844576358795166, "rewards_train/margins": 14.006837606430054, "rewards_train/rejected": -14.59129524230957, "step": 2179 }, { "epoch": 1.07, "learning_rate": 7.631210646448601e-07, "loss": 0.0001, "step": 2180 }, { "epoch": 1.07, "logps_train/chosen": -68.9017333984375, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -279.54608154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5054076910018921, "rewards_train/margins": 15.044514298439026, "rewards_train/rejected": -15.549921989440918, "step": 2180 }, { "epoch": 1.07, "learning_rate": 7.628902548655163e-07, "loss": 0.0001, "step": 2181 }, { "epoch": 1.07, "logps_train/chosen": -72.08824157714844, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -279.759033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8072615265846252, "rewards_train/margins": 14.626550614833832, "rewards_train/rejected": -15.433812141418457, "step": 2181 }, { "epoch": 1.07, "learning_rate": 7.626593676369297e-07, "loss": 0.0001, "step": 2182 }, { "epoch": 1.07, "logps_train/chosen": -72.17149353027344, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -277.592529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46168094873428345, "rewards_train/margins": 14.392300069332123, "rewards_train/rejected": -14.853981018066406, "step": 2182 }, { "epoch": 1.07, "learning_rate": 7.62428403027121e-07, "loss": 0.0, "step": 2183 }, { "epoch": 1.07, "logps_train/chosen": -69.5045166015625, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -265.7475280761719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48482614755630493, "rewards_train/margins": 14.00584501028061, "rewards_train/rejected": -14.490671157836914, "step": 2183 }, { "epoch": 1.08, "learning_rate": 7.62197361104134e-07, "loss": 0.0001, "step": 2184 }, { "epoch": 1.08, "logps_train/chosen": -72.27840423583984, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -278.6341552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5080167055130005, "rewards_train/margins": 14.538699984550476, "rewards_train/rejected": -15.046716690063477, "step": 2184 }, { "epoch": 1.08, "learning_rate": 7.619662419360352e-07, "loss": 0.0, "step": 2185 }, { "epoch": 1.08, "logps_train/chosen": -66.94548797607422, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -267.67645263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.13273286819458008, "rewards_train/margins": 14.213428020477295, "rewards_train/rejected": -14.346160888671875, "step": 2185 }, { "epoch": 1.08, "learning_rate": 7.617350455909137e-07, "loss": 0.0, "step": 2186 }, { "epoch": 1.08, "logps_train/chosen": -70.88790893554688, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -272.5223388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6173552870750427, "rewards_train/margins": 14.407436430454254, "rewards_train/rejected": -15.024791717529297, "step": 2186 }, { "epoch": 1.08, "learning_rate": 7.615037721368817e-07, "loss": 0.0, "step": 2187 }, { "epoch": 1.08, "logps_train/chosen": -66.02040100097656, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -262.7447509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37379294633865356, "rewards_train/margins": 13.771875083446503, "rewards_train/rejected": -14.145668029785156, "step": 2187 }, { "epoch": 1.08, "learning_rate": 7.612724216420741e-07, "loss": 0.0002, "step": 2188 }, { "epoch": 1.08, "logps_train/chosen": -72.69943237304688, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -281.6195373535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7773165702819824, "rewards_train/margins": 14.771159648895264, "rewards_train/rejected": -15.548476219177246, "step": 2188 }, { "epoch": 1.08, "learning_rate": 7.610409941746478e-07, "loss": 0.0, "step": 2189 }, { "epoch": 1.08, "logps_train/chosen": -72.18375396728516, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -269.00177001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7024577260017395, "rewards_train/margins": 14.023893058300018, "rewards_train/rejected": -14.726350784301758, "step": 2189 }, { "epoch": 1.08, "learning_rate": 7.608094898027833e-07, "loss": 0.0005, "step": 2190 }, { "epoch": 1.08, "logps_train/chosen": -71.40280151367188, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -278.910400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8332482576370239, "rewards_train/margins": 14.543777346611023, "rewards_train/rejected": -15.377025604248047, "step": 2190 }, { "epoch": 1.08, "learning_rate": 7.605779085946831e-07, "loss": 0.0001, "step": 2191 }, { "epoch": 1.08, "logps_train/chosen": -72.30628204345703, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -289.3616943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5019660592079163, "rewards_train/margins": 15.298847615718842, "rewards_train/rejected": -15.800813674926758, "step": 2191 }, { "epoch": 1.08, "learning_rate": 7.603462506185727e-07, "loss": 0.0, "step": 2192 }, { "epoch": 1.08, "logps_train/chosen": -72.46143341064453, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -280.2769775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8769048452377319, "rewards_train/margins": 14.356066346168518, "rewards_train/rejected": -15.23297119140625, "step": 2192 }, { "epoch": 1.08, "learning_rate": 7.601145159427003e-07, "loss": 0.0, "step": 2193 }, { "epoch": 1.08, "logps_train/chosen": -69.56297302246094, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -288.19134521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5398424863815308, "rewards_train/margins": 15.322017788887024, "rewards_train/rejected": -15.861860275268555, "step": 2193 }, { "epoch": 1.08, "learning_rate": 7.598827046353362e-07, "loss": 0.0, "step": 2194 }, { "epoch": 1.08, "logps_train/chosen": -72.42207336425781, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -281.07275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.732197642326355, "rewards_train/margins": 14.909451603889465, "rewards_train/rejected": -15.64164924621582, "step": 2194 }, { "epoch": 1.08, "learning_rate": 7.596508167647738e-07, "loss": 0.0, "step": 2195 }, { "epoch": 1.08, "logps_train/chosen": -69.91871643066406, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -280.4757080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30168619751930237, "rewards_train/margins": 15.286900609731674, "rewards_train/rejected": -15.588586807250977, "step": 2195 }, { "epoch": 1.08, "learning_rate": 7.594188523993286e-07, "loss": 0.0001, "step": 2196 }, { "epoch": 1.08, "logps_train/chosen": -68.92344665527344, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -279.7660217285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46021583676338196, "rewards_train/margins": 14.725174635648727, "rewards_train/rejected": -15.18539047241211, "step": 2196 }, { "epoch": 1.08, "learning_rate": 7.59186811607339e-07, "loss": 0.0001, "step": 2197 }, { "epoch": 1.08, "logps_train/chosen": -70.0698471069336, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -284.0069580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6001485586166382, "rewards_train/margins": 15.106504082679749, "rewards_train/rejected": -15.706652641296387, "step": 2197 }, { "epoch": 1.08, "learning_rate": 7.589546944571656e-07, "loss": 0.0001, "step": 2198 }, { "epoch": 1.08, "logps_train/chosen": -70.51915740966797, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -274.999267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6483998894691467, "rewards_train/margins": 14.386882245540619, "rewards_train/rejected": -15.035282135009766, "step": 2198 }, { "epoch": 1.08, "learning_rate": 7.58722501017192e-07, "loss": 0.0002, "step": 2199 }, { "epoch": 1.08, "logps_train/chosen": -70.56800079345703, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -277.40521240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67672199010849, "rewards_train/margins": 14.349540293216705, "rewards_train/rejected": -15.026262283325195, "step": 2199 }, { "epoch": 1.08, "learning_rate": 7.584902313558239e-07, "loss": 0.0, "step": 2200 }, { "epoch": 1.08, "logps_train/chosen": -68.58882904052734, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -277.6842041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.419918417930603, "rewards_train/margins": 14.598990082740784, "rewards_train/rejected": -15.018908500671387, "step": 2200 }, { "epoch": 1.08, "learning_rate": 7.582578855414894e-07, "loss": 0.0001, "step": 2201 }, { "epoch": 1.08, "logps_train/chosen": -74.846923828125, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -278.99700927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8797122836112976, "rewards_train/margins": 14.480049908161163, "rewards_train/rejected": -15.359762191772461, "step": 2201 }, { "epoch": 1.08, "learning_rate": 7.58025463642639e-07, "loss": 0.0, "step": 2202 }, { "epoch": 1.08, "logps_train/chosen": -67.6390380859375, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -272.3719482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44688740372657776, "rewards_train/margins": 14.452417939901352, "rewards_train/rejected": -14.89930534362793, "step": 2202 }, { "epoch": 1.08, "learning_rate": 7.577929657277461e-07, "loss": 0.0001, "step": 2203 }, { "epoch": 1.08, "logps_train/chosen": -70.62510681152344, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -274.9326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7463003396987915, "rewards_train/margins": 14.535634398460388, "rewards_train/rejected": -15.28193473815918, "step": 2203 }, { "epoch": 1.09, "learning_rate": 7.575603918653057e-07, "loss": 0.0002, "step": 2204 }, { "epoch": 1.09, "logps_train/chosen": -70.25328063964844, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -277.54779052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8494986891746521, "rewards_train/margins": 14.5489302277565, "rewards_train/rejected": -15.398428916931152, "step": 2204 }, { "epoch": 1.09, "learning_rate": 7.573277421238362e-07, "loss": 0.0005, "step": 2205 }, { "epoch": 1.09, "logps_train/chosen": -69.84519958496094, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -268.70904541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6789538264274597, "rewards_train/margins": 14.064410507678986, "rewards_train/rejected": -14.743364334106445, "step": 2205 }, { "epoch": 1.09, "learning_rate": 7.570950165718774e-07, "loss": 0.0001, "step": 2206 }, { "epoch": 1.09, "logps_train/chosen": -68.71348571777344, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -264.96533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.49134308099746704, "rewards_train/margins": 13.78878539800644, "rewards_train/rejected": -14.280128479003906, "step": 2206 }, { "epoch": 1.09, "learning_rate": 7.568622152779919e-07, "loss": 0.0, "step": 2207 }, { "epoch": 1.09, "logps_train/chosen": -69.92044067382812, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -272.17156982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.60468989610672, "rewards_train/margins": 14.094546973705292, "rewards_train/rejected": -14.699236869812012, "step": 2207 }, { "epoch": 1.09, "learning_rate": 7.566293383107644e-07, "loss": 0.0, "step": 2208 }, { "epoch": 1.09, "logps_train/chosen": -68.37895965576172, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -272.77490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5175346732139587, "rewards_train/margins": 14.340619623661041, "rewards_train/rejected": -14.858154296875, "step": 2208 }, { "epoch": 1.09, "learning_rate": 7.563963857388023e-07, "loss": 0.0001, "step": 2209 }, { "epoch": 1.09, "logps_train/chosen": -71.35768127441406, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -274.23529052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5789319276809692, "rewards_train/margins": 14.419596552848816, "rewards_train/rejected": -14.998528480529785, "step": 2209 }, { "epoch": 1.09, "learning_rate": 7.56163357630735e-07, "loss": 0.0001, "step": 2210 }, { "epoch": 1.09, "logps_train/chosen": -73.19003295898438, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -266.51776123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8378511667251587, "rewards_train/margins": 13.71060311794281, "rewards_train/rejected": -14.548454284667969, "step": 2210 }, { "epoch": 1.09, "learning_rate": 7.559302540552137e-07, "loss": 0.0001, "step": 2211 }, { "epoch": 1.09, "logps_train/chosen": -67.38278198242188, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -271.6300048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.567770779132843, "rewards_train/margins": 14.303776919841766, "rewards_train/rejected": -14.87154769897461, "step": 2211 }, { "epoch": 1.09, "learning_rate": 7.556970750809127e-07, "loss": 0.0002, "step": 2212 }, { "epoch": 1.09, "logps_train/chosen": -74.32904052734375, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -279.6029052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7464296817779541, "rewards_train/margins": 14.608684301376343, "rewards_train/rejected": -15.355113983154297, "step": 2212 }, { "epoch": 1.09, "learning_rate": 7.55463820776528e-07, "loss": 0.0001, "step": 2213 }, { "epoch": 1.09, "logps_train/chosen": -74.90849304199219, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -270.1986083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.022635817527771, "rewards_train/margins": 13.794149994850159, "rewards_train/rejected": -14.81678581237793, "step": 2213 }, { "epoch": 1.09, "learning_rate": 7.552304912107781e-07, "loss": 0.0001, "step": 2214 }, { "epoch": 1.09, "logps_train/chosen": -69.68788146972656, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -268.5453186035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5842177867889404, "rewards_train/margins": 14.187013864517212, "rewards_train/rejected": -14.771231651306152, "step": 2214 }, { "epoch": 1.09, "learning_rate": 7.549970864524029e-07, "loss": 0.0, "step": 2215 }, { "epoch": 1.09, "logps_train/chosen": -74.81436920166016, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -278.85980224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9985755681991577, "rewards_train/margins": 14.159374833106995, "rewards_train/rejected": -15.157950401306152, "step": 2215 }, { "epoch": 1.09, "learning_rate": 7.547636065701657e-07, "loss": 0.0001, "step": 2216 }, { "epoch": 1.09, "logps_train/chosen": -72.94219970703125, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -281.3216247558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7685847282409668, "rewards_train/margins": 14.54731798171997, "rewards_train/rejected": -15.315902709960938, "step": 2216 }, { "epoch": 1.09, "learning_rate": 7.545300516328508e-07, "loss": 0.0002, "step": 2217 }, { "epoch": 1.09, "logps_train/chosen": -69.11250305175781, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -280.1650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4002160131931305, "rewards_train/margins": 14.92595586180687, "rewards_train/rejected": -15.326171875, "step": 2217 }, { "epoch": 1.09, "learning_rate": 7.542964217092651e-07, "loss": 0.0005, "step": 2218 }, { "epoch": 1.09, "logps_train/chosen": -73.31383514404297, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -280.2087097167969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8479853272438049, "rewards_train/margins": 14.483530938625336, "rewards_train/rejected": -15.33151626586914, "step": 2218 }, { "epoch": 1.09, "learning_rate": 7.540627168682376e-07, "loss": 0.0009, "step": 2219 }, { "epoch": 1.09, "logps_train/chosen": -74.76052856445312, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -285.18585205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.888260006904602, "rewards_train/margins": 14.81030547618866, "rewards_train/rejected": -15.698565483093262, "step": 2219 }, { "epoch": 1.09, "learning_rate": 7.538289371786195e-07, "loss": 0.0, "step": 2220 }, { "epoch": 1.09, "logps_train/chosen": -67.99710083007812, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -266.8016357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4564484655857086, "rewards_train/margins": 14.097543805837631, "rewards_train/rejected": -14.55399227142334, "step": 2220 }, { "epoch": 1.09, "learning_rate": 7.535950827092836e-07, "loss": 0.0002, "step": 2221 }, { "epoch": 1.09, "logps_train/chosen": -70.75743103027344, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -268.021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8820419311523438, "rewards_train/margins": 13.778407096862793, "rewards_train/rejected": -14.660449028015137, "step": 2221 }, { "epoch": 1.09, "learning_rate": 7.533611535291254e-07, "loss": 0.0002, "step": 2222 }, { "epoch": 1.09, "logps_train/chosen": -70.46266174316406, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -285.36578369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51799476146698, "rewards_train/margins": 15.180596470832825, "rewards_train/rejected": -15.698591232299805, "step": 2222 }, { "epoch": 1.09, "learning_rate": 7.531271497070614e-07, "loss": 0.0004, "step": 2223 }, { "epoch": 1.09, "logps_train/chosen": -72.23442077636719, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -277.22125244140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8403370976448059, "rewards_train/margins": 14.113526046276093, "rewards_train/rejected": -14.953863143920898, "step": 2223 }, { "epoch": 1.1, "learning_rate": 7.528930713120311e-07, "loss": 0.0, "step": 2224 }, { "epoch": 1.1, "logps_train/chosen": -72.835693359375, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -280.488037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8365965485572815, "rewards_train/margins": 14.912548005580902, "rewards_train/rejected": -15.749144554138184, "step": 2224 }, { "epoch": 1.1, "learning_rate": 7.526589184129956e-07, "loss": 0.0, "step": 2225 }, { "epoch": 1.1, "logps_train/chosen": -68.46849060058594, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -280.6704406738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6210677623748779, "rewards_train/margins": 14.818585634231567, "rewards_train/rejected": -15.439653396606445, "step": 2225 }, { "epoch": 1.1, "learning_rate": 7.52424691078938e-07, "loss": 0.0001, "step": 2226 }, { "epoch": 1.1, "logps_train/chosen": -69.87110900878906, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -269.2632751464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5866464376449585, "rewards_train/margins": 13.771029114723206, "rewards_train/rejected": -14.357675552368164, "step": 2226 }, { "epoch": 1.1, "learning_rate": 7.521903893788631e-07, "loss": 0.0003, "step": 2227 }, { "epoch": 1.1, "logps_train/chosen": -70.72782897949219, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -288.830322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.512529194355011, "rewards_train/margins": 15.459175288677216, "rewards_train/rejected": -15.971704483032227, "step": 2227 }, { "epoch": 1.1, "learning_rate": 7.519560133817977e-07, "loss": 0.0001, "step": 2228 }, { "epoch": 1.1, "logps_train/chosen": -71.79474639892578, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -272.7740173339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8693917989730835, "rewards_train/margins": 14.08178961277008, "rewards_train/rejected": -14.951181411743164, "step": 2228 }, { "epoch": 1.1, "learning_rate": 7.517215631567905e-07, "loss": 0.0002, "step": 2229 }, { "epoch": 1.1, "logps_train/chosen": -72.34339904785156, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -283.1607666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7137346267700195, "rewards_train/margins": 15.12802505493164, "rewards_train/rejected": -15.84175968170166, "step": 2229 }, { "epoch": 1.1, "learning_rate": 7.514870387729124e-07, "loss": 0.0002, "step": 2230 }, { "epoch": 1.1, "logps_train/chosen": -68.74620056152344, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -285.4573974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5887801647186279, "rewards_train/margins": 15.126782655715942, "rewards_train/rejected": -15.71556282043457, "step": 2230 }, { "epoch": 1.1, "learning_rate": 7.512524402992555e-07, "loss": 0.0, "step": 2231 }, { "epoch": 1.1, "logps_train/chosen": -71.41651153564453, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -275.66949462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7314461469650269, "rewards_train/margins": 14.475152134895325, "rewards_train/rejected": -15.206598281860352, "step": 2231 }, { "epoch": 1.1, "learning_rate": 7.510177678049345e-07, "loss": 0.0006, "step": 2232 }, { "epoch": 1.1, "logps_train/chosen": -71.89450073242188, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -273.7486572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7343473434448242, "rewards_train/margins": 14.196086883544922, "rewards_train/rejected": -14.930434226989746, "step": 2232 }, { "epoch": 1.1, "learning_rate": 7.507830213590851e-07, "loss": 0.0001, "step": 2233 }, { "epoch": 1.1, "logps_train/chosen": -67.00636291503906, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -274.09808349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4476091265678406, "rewards_train/margins": 14.761663258075714, "rewards_train/rejected": -15.209272384643555, "step": 2233 }, { "epoch": 1.1, "learning_rate": 7.505482010308657e-07, "loss": 0.001, "step": 2234 }, { "epoch": 1.1, "logps_train/chosen": -69.1450424194336, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -282.3679504394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5383809804916382, "rewards_train/margins": 15.02224314212799, "rewards_train/rejected": -15.560624122619629, "step": 2234 }, { "epoch": 1.1, "learning_rate": 7.503133068894553e-07, "loss": 0.0, "step": 2235 }, { "epoch": 1.1, "logps_train/chosen": -70.89030456542969, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -276.28363037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5173993110656738, "rewards_train/margins": 14.705498218536377, "rewards_train/rejected": -15.22289752960205, "step": 2235 }, { "epoch": 1.1, "learning_rate": 7.500783390040557e-07, "loss": 0.0, "step": 2236 }, { "epoch": 1.1, "logps_train/chosen": -66.04206848144531, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -279.93988037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17649734020233154, "rewards_train/margins": 15.37998878955841, "rewards_train/rejected": -15.556486129760742, "step": 2236 }, { "epoch": 1.1, "learning_rate": 7.498432974438902e-07, "loss": 0.0, "step": 2237 }, { "epoch": 1.1, "logps_train/chosen": -71.62730407714844, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -272.59881591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.646373450756073, "rewards_train/margins": 14.458773910999298, "rewards_train/rejected": -15.105147361755371, "step": 2237 }, { "epoch": 1.1, "learning_rate": 7.496081822782031e-07, "loss": 0.0, "step": 2238 }, { "epoch": 1.1, "logps_train/chosen": -69.51130676269531, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -276.26409912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5950266718864441, "rewards_train/margins": 14.283044159412384, "rewards_train/rejected": -14.878070831298828, "step": 2238 }, { "epoch": 1.1, "learning_rate": 7.493729935762613e-07, "loss": 0.0001, "step": 2239 }, { "epoch": 1.1, "logps_train/chosen": -76.25648498535156, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -284.1793212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8879050612449646, "rewards_train/margins": 14.916939795017242, "rewards_train/rejected": -15.804844856262207, "step": 2239 }, { "epoch": 1.1, "learning_rate": 7.49137731407353e-07, "loss": 0.0, "step": 2240 }, { "epoch": 1.1, "logps_train/chosen": -70.81278991699219, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -284.92999267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6421677470207214, "rewards_train/margins": 15.140478432178497, "rewards_train/rejected": -15.782646179199219, "step": 2240 }, { "epoch": 1.1, "learning_rate": 7.489023958407877e-07, "loss": 0.0001, "step": 2241 }, { "epoch": 1.1, "logps_train/chosen": -73.06562805175781, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -279.6502990722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.992842435836792, "rewards_train/margins": 14.54367184638977, "rewards_train/rejected": -15.536514282226562, "step": 2241 }, { "epoch": 1.1, "learning_rate": 7.486669869458973e-07, "loss": 0.0001, "step": 2242 }, { "epoch": 1.1, "logps_train/chosen": -71.00332641601562, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -273.79327392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6870514154434204, "rewards_train/margins": 14.346571326255798, "rewards_train/rejected": -15.033622741699219, "step": 2242 }, { "epoch": 1.1, "learning_rate": 7.484315047920344e-07, "loss": 0.0002, "step": 2243 }, { "epoch": 1.1, "logps_train/chosen": -71.80990600585938, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -282.6310729980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6412445306777954, "rewards_train/margins": 14.821863532066345, "rewards_train/rejected": -15.46310806274414, "step": 2243 }, { "epoch": 1.1, "learning_rate": 7.481959494485739e-07, "loss": 0.0, "step": 2244 }, { "epoch": 1.1, "logps_train/chosen": -71.93135070800781, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -281.35736083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6411335468292236, "rewards_train/margins": 14.975754499435425, "rewards_train/rejected": -15.616888046264648, "step": 2244 }, { "epoch": 1.11, "learning_rate": 7.47960320984912e-07, "loss": 0.0, "step": 2245 }, { "epoch": 1.11, "logps_train/chosen": -68.57263946533203, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -277.82562255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7087775468826294, "rewards_train/margins": 14.63642966747284, "rewards_train/rejected": -15.345207214355469, "step": 2245 }, { "epoch": 1.11, "learning_rate": 7.477246194704662e-07, "loss": 0.0001, "step": 2246 }, { "epoch": 1.11, "logps_train/chosen": -74.91294860839844, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -280.54327392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7518414258956909, "rewards_train/margins": 14.674555897712708, "rewards_train/rejected": -15.426397323608398, "step": 2246 }, { "epoch": 1.11, "learning_rate": 7.47488844974676e-07, "loss": 0.0, "step": 2247 }, { "epoch": 1.11, "logps_train/chosen": -70.5159683227539, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -276.35162353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6058448553085327, "rewards_train/margins": 14.69406282901764, "rewards_train/rejected": -15.299907684326172, "step": 2247 }, { "epoch": 1.11, "learning_rate": 7.472529975670022e-07, "loss": 0.0003, "step": 2248 }, { "epoch": 1.11, "logps_train/chosen": -72.38945007324219, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -276.92156982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8146287202835083, "rewards_train/margins": 14.615318179130554, "rewards_train/rejected": -15.429946899414062, "step": 2248 }, { "epoch": 1.11, "learning_rate": 7.470170773169267e-07, "loss": 0.0, "step": 2249 }, { "epoch": 1.11, "logps_train/chosen": -72.11994934082031, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -278.2578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6534501314163208, "rewards_train/margins": 14.711151003837585, "rewards_train/rejected": -15.364601135253906, "step": 2249 }, { "epoch": 1.11, "learning_rate": 7.467810842939534e-07, "loss": 0.0003, "step": 2250 }, { "epoch": 1.11, "logps_train/chosen": -74.57415771484375, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -282.501708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9586364030838013, "rewards_train/margins": 14.451594233512878, "rewards_train/rejected": -15.41023063659668, "step": 2250 }, { "epoch": 1.11, "learning_rate": 7.465450185676078e-07, "loss": 0.0, "step": 2251 }, { "epoch": 1.11, "logps_train/chosen": -68.06483459472656, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -269.78106689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.439296692609787, "rewards_train/margins": 14.581486731767654, "rewards_train/rejected": -15.020783424377441, "step": 2251 }, { "epoch": 1.11, "learning_rate": 7.463088802074358e-07, "loss": 0.0, "step": 2252 }, { "epoch": 1.11, "logps_train/chosen": -69.23683166503906, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -277.69903564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7278335094451904, "rewards_train/margins": 14.398514986038208, "rewards_train/rejected": -15.126348495483398, "step": 2252 }, { "epoch": 1.11, "learning_rate": 7.460726692830056e-07, "loss": 0.0001, "step": 2253 }, { "epoch": 1.11, "logps_train/chosen": -67.70323181152344, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -275.91339111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3303331136703491, "rewards_train/margins": 14.77848494052887, "rewards_train/rejected": -15.108818054199219, "step": 2253 }, { "epoch": 1.11, "learning_rate": 7.458363858639068e-07, "loss": 0.0, "step": 2254 }, { "epoch": 1.11, "logps_train/chosen": -72.70547485351562, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -279.2589111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6006264686584473, "rewards_train/margins": 14.646214962005615, "rewards_train/rejected": -15.246841430664062, "step": 2254 }, { "epoch": 1.11, "learning_rate": 7.456000300197496e-07, "loss": 0.0, "step": 2255 }, { "epoch": 1.11, "logps_train/chosen": -74.0413589477539, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -280.4693603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.981919527053833, "rewards_train/margins": 14.351736783981323, "rewards_train/rejected": -15.333656311035156, "step": 2255 }, { "epoch": 1.11, "learning_rate": 7.453636018201665e-07, "loss": 0.0012, "step": 2256 }, { "epoch": 1.11, "logps_train/chosen": -74.45530700683594, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -267.8249816894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9733134508132935, "rewards_train/margins": 13.662456393241882, "rewards_train/rejected": -14.635769844055176, "step": 2256 }, { "epoch": 1.11, "learning_rate": 7.451271013348108e-07, "loss": 0.0002, "step": 2257 }, { "epoch": 1.11, "logps_train/chosen": -67.30029296875, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -283.5140380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4839838743209839, "rewards_train/margins": 15.36570942401886, "rewards_train/rejected": -15.849693298339844, "step": 2257 }, { "epoch": 1.11, "learning_rate": 7.448905286333568e-07, "loss": 0.0001, "step": 2258 }, { "epoch": 1.11, "logps_train/chosen": -71.04457092285156, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -286.7279052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.68004310131073, "rewards_train/margins": 15.1457759141922, "rewards_train/rejected": -15.82581901550293, "step": 2258 }, { "epoch": 1.11, "learning_rate": 7.446538837855005e-07, "loss": 0.0, "step": 2259 }, { "epoch": 1.11, "logps_train/chosen": -70.30387115478516, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -280.798095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5908849239349365, "rewards_train/margins": 15.000545740127563, "rewards_train/rejected": -15.5914306640625, "step": 2259 }, { "epoch": 1.11, "learning_rate": 7.444171668609594e-07, "loss": 0.0001, "step": 2260 }, { "epoch": 1.11, "logps_train/chosen": -69.98192596435547, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -281.09014892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6514639854431152, "rewards_train/margins": 14.840363025665283, "rewards_train/rejected": -15.491827011108398, "step": 2260 }, { "epoch": 1.11, "learning_rate": 7.441803779294716e-07, "loss": 0.0, "step": 2261 }, { "epoch": 1.11, "logps_train/chosen": -68.11151885986328, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -266.7720031738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5696970820426941, "rewards_train/margins": 13.987190544605255, "rewards_train/rejected": -14.55688762664795, "step": 2261 }, { "epoch": 1.11, "learning_rate": 7.439435170607966e-07, "loss": 0.0008, "step": 2262 }, { "epoch": 1.11, "logps_train/chosen": -65.77572631835938, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -270.07403564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3534024655818939, "rewards_train/margins": 14.481735855340958, "rewards_train/rejected": -14.835138320922852, "step": 2262 }, { "epoch": 1.11, "learning_rate": 7.437065843247157e-07, "loss": 0.0001, "step": 2263 }, { "epoch": 1.11, "logps_train/chosen": -70.0626449584961, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -276.64776611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4574611186981201, "rewards_train/margins": 14.634267091751099, "rewards_train/rejected": -15.091728210449219, "step": 2263 }, { "epoch": 1.11, "learning_rate": 7.434695797910303e-07, "loss": 0.0, "step": 2264 }, { "epoch": 1.11, "logps_train/chosen": -72.44622802734375, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -274.0853271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7237733006477356, "rewards_train/margins": 14.295500457286835, "rewards_train/rejected": -15.01927375793457, "step": 2264 }, { "epoch": 1.12, "learning_rate": 7.43232503529564e-07, "loss": 0.0001, "step": 2265 }, { "epoch": 1.12, "logps_train/chosen": -68.16519165039062, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -278.21685791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4467431902885437, "rewards_train/margins": 15.102241337299347, "rewards_train/rejected": -15.54898452758789, "step": 2265 }, { "epoch": 1.12, "learning_rate": 7.429953556101609e-07, "loss": 0.0003, "step": 2266 }, { "epoch": 1.12, "logps_train/chosen": -72.13525390625, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -287.0906982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7200683355331421, "rewards_train/margins": 15.266443848609924, "rewards_train/rejected": -15.986512184143066, "step": 2266 }, { "epoch": 1.12, "learning_rate": 7.427581361026862e-07, "loss": 0.0, "step": 2267 }, { "epoch": 1.12, "logps_train/chosen": -70.56802368164062, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -272.210205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5606594681739807, "rewards_train/margins": 14.318956315517426, "rewards_train/rejected": -14.879615783691406, "step": 2267 }, { "epoch": 1.12, "learning_rate": 7.425208450770266e-07, "loss": 0.0001, "step": 2268 }, { "epoch": 1.12, "logps_train/chosen": -59.13425827026367, "logps_train/ref_chosen": -59.53125, "logps_train/ref_rejected": -119.0, "logps_train/rejected": -265.6329650878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.038966741412878036, "rewards_train/margins": 14.702117528766394, "rewards_train/rejected": -14.663150787353516, "step": 2268 }, { "epoch": 1.12, "learning_rate": 7.422834826030897e-07, "loss": 0.0007, "step": 2269 }, { "epoch": 1.12, "logps_train/chosen": -72.10305786132812, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -277.941162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.713235080242157, "rewards_train/margins": 14.531809628009796, "rewards_train/rejected": -15.245044708251953, "step": 2269 }, { "epoch": 1.12, "learning_rate": 7.420460487508038e-07, "loss": 0.0001, "step": 2270 }, { "epoch": 1.12, "logps_train/chosen": -70.84701538085938, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -282.61798095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8555271625518799, "rewards_train/margins": 14.796557188034058, "rewards_train/rejected": -15.652084350585938, "step": 2270 }, { "epoch": 1.12, "learning_rate": 7.418085435901188e-07, "loss": 0.0001, "step": 2271 }, { "epoch": 1.12, "logps_train/chosen": -71.15400695800781, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -271.125244140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5476269125938416, "rewards_train/margins": 14.191266596317291, "rewards_train/rejected": -14.738893508911133, "step": 2271 }, { "epoch": 1.12, "learning_rate": 7.415709671910052e-07, "loss": 0.0002, "step": 2272 }, { "epoch": 1.12, "logps_train/chosen": -69.25524139404297, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -262.905029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6642442345619202, "rewards_train/margins": 13.640322148799896, "rewards_train/rejected": -14.304566383361816, "step": 2272 }, { "epoch": 1.12, "learning_rate": 7.413333196234543e-07, "loss": 0.0015, "step": 2273 }, { "epoch": 1.12, "logps_train/chosen": -72.02830505371094, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -283.845947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7488260269165039, "rewards_train/margins": 14.870780944824219, "rewards_train/rejected": -15.619606971740723, "step": 2273 }, { "epoch": 1.12, "learning_rate": 7.410956009574794e-07, "loss": 0.0001, "step": 2274 }, { "epoch": 1.12, "logps_train/chosen": -71.47956848144531, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -288.374267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6711008548736572, "rewards_train/margins": 15.24381709098816, "rewards_train/rejected": -15.914917945861816, "step": 2274 }, { "epoch": 1.12, "learning_rate": 7.408578112631135e-07, "loss": 0.0012, "step": 2275 }, { "epoch": 1.12, "logps_train/chosen": -63.35173416137695, "logps_train/ref_chosen": -60.71875, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -273.10699462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2632012963294983, "rewards_train/margins": 14.834510266780853, "rewards_train/rejected": -15.097711563110352, "step": 2275 }, { "epoch": 1.12, "learning_rate": 7.406199506104109e-07, "loss": 0.0, "step": 2276 }, { "epoch": 1.12, "logps_train/chosen": -71.93191528320312, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -280.65118408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9302027821540833, "rewards_train/margins": 14.650542914867401, "rewards_train/rejected": -15.580745697021484, "step": 2276 }, { "epoch": 1.12, "learning_rate": 7.403820190694474e-07, "loss": 0.0, "step": 2277 }, { "epoch": 1.12, "logps_train/chosen": -71.54429626464844, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -276.57684326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6501330733299255, "rewards_train/margins": 14.675373136997223, "rewards_train/rejected": -15.325506210327148, "step": 2277 }, { "epoch": 1.12, "learning_rate": 7.40144016710319e-07, "loss": 0.0007, "step": 2278 }, { "epoch": 1.12, "logps_train/chosen": -72.74596405029297, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -287.978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7508655786514282, "rewards_train/margins": 15.132240653038025, "rewards_train/rejected": -15.883106231689453, "step": 2278 }, { "epoch": 1.12, "learning_rate": 7.399059436031427e-07, "loss": 0.0, "step": 2279 }, { "epoch": 1.12, "logps_train/chosen": -71.128662109375, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -284.1257629394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5803462266921997, "rewards_train/margins": 14.878568530082703, "rewards_train/rejected": -15.458914756774902, "step": 2279 }, { "epoch": 1.12, "learning_rate": 7.396677998180569e-07, "loss": 0.0, "step": 2280 }, { "epoch": 1.12, "logps_train/chosen": -76.53071594238281, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -287.4246520996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.155707597732544, "rewards_train/margins": 15.03734278678894, "rewards_train/rejected": -16.193050384521484, "step": 2280 }, { "epoch": 1.12, "learning_rate": 7.394295854252199e-07, "loss": 0.0, "step": 2281 }, { "epoch": 1.12, "logps_train/chosen": -67.11407470703125, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -275.50494384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4354307949542999, "rewards_train/margins": 14.830102652311325, "rewards_train/rejected": -15.265533447265625, "step": 2281 }, { "epoch": 1.12, "learning_rate": 7.391913004948114e-07, "loss": 0.0, "step": 2282 }, { "epoch": 1.12, "logps_train/chosen": -70.85362243652344, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -277.78131103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6967873573303223, "rewards_train/margins": 14.729003429412842, "rewards_train/rejected": -15.425790786743164, "step": 2282 }, { "epoch": 1.12, "learning_rate": 7.389529450970317e-07, "loss": 0.0, "step": 2283 }, { "epoch": 1.12, "logps_train/chosen": -66.04490661621094, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -277.4815673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.24116072058677673, "rewards_train/margins": 15.052603393793106, "rewards_train/rejected": -15.293764114379883, "step": 2283 }, { "epoch": 1.12, "learning_rate": 7.387145193021021e-07, "loss": 0.0001, "step": 2284 }, { "epoch": 1.12, "logps_train/chosen": -67.65123748779297, "logps_train/ref_chosen": -60.5, "logps_train/ref_rejected": -118.9375, "logps_train/rejected": -267.7978210449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7149046063423157, "rewards_train/margins": 14.171859920024872, "rewards_train/rejected": -14.886764526367188, "step": 2284 }, { "epoch": 1.13, "learning_rate": 7.384760231802641e-07, "loss": 0.0, "step": 2285 }, { "epoch": 1.13, "logps_train/chosen": -72.16278076171875, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -284.59332275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7373234629631042, "rewards_train/margins": 14.863805115222931, "rewards_train/rejected": -15.601128578186035, "step": 2285 }, { "epoch": 1.13, "learning_rate": 7.382374568017809e-07, "loss": 0.0001, "step": 2286 }, { "epoch": 1.13, "logps_train/chosen": -70.2422103881836, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -283.68292236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5161148905754089, "rewards_train/margins": 14.999445259571075, "rewards_train/rejected": -15.515560150146484, "step": 2286 }, { "epoch": 1.13, "learning_rate": 7.379988202369349e-07, "loss": 0.0, "step": 2287 }, { "epoch": 1.13, "logps_train/chosen": -70.81028747558594, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -280.12237548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7776834964752197, "rewards_train/margins": 14.715413808822632, "rewards_train/rejected": -15.493097305297852, "step": 2287 }, { "epoch": 1.13, "learning_rate": 7.377601135560309e-07, "loss": 0.0, "step": 2288 }, { "epoch": 1.13, "logps_train/chosen": -73.55203247070312, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -281.82568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7750281095504761, "rewards_train/margins": 14.749778866767883, "rewards_train/rejected": -15.52480697631836, "step": 2288 }, { "epoch": 1.13, "learning_rate": 7.375213368293928e-07, "loss": 0.0001, "step": 2289 }, { "epoch": 1.13, "logps_train/chosen": -73.34832000732422, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -283.0087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9325861930847168, "rewards_train/margins": 14.801394939422607, "rewards_train/rejected": -15.733981132507324, "step": 2289 }, { "epoch": 1.13, "learning_rate": 7.372824901273663e-07, "loss": 0.0001, "step": 2290 }, { "epoch": 1.13, "logps_train/chosen": -71.44342041015625, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -281.05181884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6792055368423462, "rewards_train/margins": 14.854782462120056, "rewards_train/rejected": -15.533987998962402, "step": 2290 }, { "epoch": 1.13, "learning_rate": 7.37043573520317e-07, "loss": 0.0001, "step": 2291 }, { "epoch": 1.13, "logps_train/chosen": -64.1827163696289, "logps_train/ref_chosen": -60.21875, "logps_train/ref_rejected": -119.0625, "logps_train/rejected": -268.45184326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39732393622398376, "rewards_train/margins": 14.54249158501625, "rewards_train/rejected": -14.939815521240234, "step": 2291 }, { "epoch": 1.13, "learning_rate": 7.368045870786314e-07, "loss": 0.0, "step": 2292 }, { "epoch": 1.13, "logps_train/chosen": -71.4689712524414, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -290.45184326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7558813095092773, "rewards_train/margins": 15.432421684265137, "rewards_train/rejected": -16.188302993774414, "step": 2292 }, { "epoch": 1.13, "learning_rate": 7.365655308727166e-07, "loss": 0.0, "step": 2293 }, { "epoch": 1.13, "logps_train/chosen": -70.86931610107422, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -275.7926025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6082206964492798, "rewards_train/margins": 14.42743718624115, "rewards_train/rejected": -15.03565788269043, "step": 2293 }, { "epoch": 1.13, "learning_rate": 7.363264049730001e-07, "loss": 0.0005, "step": 2294 }, { "epoch": 1.13, "logps_train/chosen": -74.4278564453125, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -133.5, "logps_train/rejected": -295.5027160644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6898564100265503, "rewards_train/margins": 15.509730458259583, "rewards_train/rejected": -16.199586868286133, "step": 2294 }, { "epoch": 1.13, "learning_rate": 7.360872094499301e-07, "loss": 0.0, "step": 2295 }, { "epoch": 1.13, "logps_train/chosen": -69.63147735595703, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -281.887939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5126839876174927, "rewards_train/margins": 14.866832613945007, "rewards_train/rejected": -15.3795166015625, "step": 2295 }, { "epoch": 1.13, "learning_rate": 7.358479443739752e-07, "loss": 0.0001, "step": 2296 }, { "epoch": 1.13, "logps_train/chosen": -71.65388488769531, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -275.4734802246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6900466680526733, "rewards_train/margins": 14.508424401283264, "rewards_train/rejected": -15.198471069335938, "step": 2296 }, { "epoch": 1.13, "learning_rate": 7.356086098156242e-07, "loss": 0.0, "step": 2297 }, { "epoch": 1.13, "logps_train/chosen": -71.58405303955078, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -291.9715881347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5498603582382202, "rewards_train/margins": 15.596908211708069, "rewards_train/rejected": -16.14676856994629, "step": 2297 }, { "epoch": 1.13, "learning_rate": 7.353692058453871e-07, "loss": 0.0, "step": 2298 }, { "epoch": 1.13, "logps_train/chosen": -72.42015075683594, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -276.1289367675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7847394943237305, "rewards_train/margins": 14.301251411437988, "rewards_train/rejected": -15.085990905761719, "step": 2298 }, { "epoch": 1.13, "learning_rate": 7.351297325337935e-07, "loss": 0.0001, "step": 2299 }, { "epoch": 1.13, "logps_train/chosen": -74.18803405761719, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -285.54486083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8159719705581665, "rewards_train/margins": 14.891345381736755, "rewards_train/rejected": -15.707317352294922, "step": 2299 }, { "epoch": 1.13, "learning_rate": 7.348901899513944e-07, "loss": 0.0, "step": 2300 }, { "epoch": 1.13, "logps_train/chosen": -73.70377349853516, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -278.39422607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7609049081802368, "rewards_train/margins": 14.550493597984314, "rewards_train/rejected": -15.31139850616455, "step": 2300 }, { "epoch": 1.13, "learning_rate": 7.346505781687603e-07, "loss": 0.0001, "step": 2301 }, { "epoch": 1.13, "logps_train/chosen": -70.14781188964844, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -286.7031555175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7133162021636963, "rewards_train/margins": 15.234537839889526, "rewards_train/rejected": -15.947854042053223, "step": 2301 }, { "epoch": 1.13, "learning_rate": 7.344108972564825e-07, "loss": 0.0, "step": 2302 }, { "epoch": 1.13, "logps_train/chosen": -74.07457733154297, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -278.55401611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8972036242485046, "rewards_train/margins": 14.233392536640167, "rewards_train/rejected": -15.130596160888672, "step": 2302 }, { "epoch": 1.13, "learning_rate": 7.341711472851725e-07, "loss": 0.0, "step": 2303 }, { "epoch": 1.13, "logps_train/chosen": -72.27471923828125, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -271.2926025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8608219623565674, "rewards_train/margins": 14.302376508712769, "rewards_train/rejected": -15.163198471069336, "step": 2303 }, { "epoch": 1.13, "learning_rate": 7.339313283254625e-07, "loss": 0.0001, "step": 2304 }, { "epoch": 1.13, "logps_train/chosen": -67.22948455810547, "logps_train/ref_chosen": -61.71875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -282.90179443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.551805853843689, "rewards_train/margins": 15.307417511940002, "rewards_train/rejected": -15.859223365783691, "step": 2304 }, { "epoch": 1.13, "learning_rate": 7.336914404480044e-07, "loss": 0.0001, "step": 2305 }, { "epoch": 1.13, "logps_train/chosen": -73.08060455322266, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -283.45159912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6356968283653259, "rewards_train/margins": 14.805800974369049, "rewards_train/rejected": -15.441497802734375, "step": 2305 }, { "epoch": 1.14, "learning_rate": 7.334514837234713e-07, "loss": 0.0, "step": 2306 }, { "epoch": 1.14, "logps_train/chosen": -70.88560485839844, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -277.12396240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6196640133857727, "rewards_train/margins": 14.605961978435516, "rewards_train/rejected": -15.225625991821289, "step": 2306 }, { "epoch": 1.14, "learning_rate": 7.332114582225559e-07, "loss": 0.0002, "step": 2307 }, { "epoch": 1.14, "logps_train/chosen": -67.90585327148438, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -272.35406494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4367767870426178, "rewards_train/margins": 14.387205451726913, "rewards_train/rejected": -14.823982238769531, "step": 2307 }, { "epoch": 1.14, "learning_rate": 7.32971364015971e-07, "loss": 0.0004, "step": 2308 }, { "epoch": 1.14, "logps_train/chosen": -70.87212371826172, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -275.3751525878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7909718751907349, "rewards_train/margins": 14.519101977348328, "rewards_train/rejected": -15.310073852539062, "step": 2308 }, { "epoch": 1.14, "learning_rate": 7.327312011744504e-07, "loss": 0.0001, "step": 2309 }, { "epoch": 1.14, "logps_train/chosen": -71.8904800415039, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -285.81494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7308933734893799, "rewards_train/margins": 14.902748823165894, "rewards_train/rejected": -15.633642196655273, "step": 2309 }, { "epoch": 1.14, "learning_rate": 7.324909697687474e-07, "loss": 0.0001, "step": 2310 }, { "epoch": 1.14, "logps_train/chosen": -65.88786315917969, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -270.25677490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28634440898895264, "rewards_train/margins": 14.638507008552551, "rewards_train/rejected": -14.924851417541504, "step": 2310 }, { "epoch": 1.14, "learning_rate": 7.32250669869636e-07, "loss": 0.0, "step": 2311 }, { "epoch": 1.14, "logps_train/chosen": -68.6171875, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -274.33349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39609378576278687, "rewards_train/margins": 14.70932537317276, "rewards_train/rejected": -15.105419158935547, "step": 2311 }, { "epoch": 1.14, "learning_rate": 7.320103015479101e-07, "loss": 0.0, "step": 2312 }, { "epoch": 1.14, "logps_train/chosen": -68.80941009521484, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -266.607177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5469563603401184, "rewards_train/margins": 14.004436194896698, "rewards_train/rejected": -14.551392555236816, "step": 2312 }, { "epoch": 1.14, "learning_rate": 7.317698648743837e-07, "loss": 0.0018, "step": 2313 }, { "epoch": 1.14, "logps_train/chosen": -67.79460144042969, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -283.49737548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5543619990348816, "rewards_train/margins": 15.22056895494461, "rewards_train/rejected": -15.774930953979492, "step": 2313 }, { "epoch": 1.14, "learning_rate": 7.315293599198913e-07, "loss": 0.0001, "step": 2314 }, { "epoch": 1.14, "logps_train/chosen": -69.5904541015625, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -270.98968505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5265265107154846, "rewards_train/margins": 14.40823358297348, "rewards_train/rejected": -14.934760093688965, "step": 2314 }, { "epoch": 1.14, "learning_rate": 7.312887867552872e-07, "loss": 0.0, "step": 2315 }, { "epoch": 1.14, "logps_train/chosen": -72.66888427734375, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -285.9112548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6519719362258911, "rewards_train/margins": 15.189395785331726, "rewards_train/rejected": -15.841367721557617, "step": 2315 }, { "epoch": 1.14, "learning_rate": 7.310481454514457e-07, "loss": 0.0001, "step": 2316 }, { "epoch": 1.14, "logps_train/chosen": -73.9284439086914, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -274.1373596191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7488499879837036, "rewards_train/margins": 14.363765597343445, "rewards_train/rejected": -15.112615585327148, "step": 2316 }, { "epoch": 1.14, "learning_rate": 7.308074360792616e-07, "loss": 0.0002, "step": 2317 }, { "epoch": 1.14, "logps_train/chosen": -71.19242858886719, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -288.5409240722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6297900676727295, "rewards_train/margins": 15.415709733963013, "rewards_train/rejected": -16.045499801635742, "step": 2317 }, { "epoch": 1.14, "learning_rate": 7.305666587096493e-07, "loss": 0.0001, "step": 2318 }, { "epoch": 1.14, "logps_train/chosen": -69.36248016357422, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -275.656494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6657402515411377, "rewards_train/margins": 14.603281736373901, "rewards_train/rejected": -15.269021987915039, "step": 2318 }, { "epoch": 1.14, "learning_rate": 7.303258134135436e-07, "loss": 0.0, "step": 2319 }, { "epoch": 1.14, "logps_train/chosen": -71.83955383300781, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -281.97039794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7974802255630493, "rewards_train/margins": 14.935691237449646, "rewards_train/rejected": -15.733171463012695, "step": 2319 }, { "epoch": 1.14, "learning_rate": 7.30084900261899e-07, "loss": 0.0001, "step": 2320 }, { "epoch": 1.14, "logps_train/chosen": -74.68571472167969, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -133.625, "logps_train/rejected": -290.99615478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7964039444923401, "rewards_train/margins": 14.940273225307465, "rewards_train/rejected": -15.736677169799805, "step": 2320 }, { "epoch": 1.14, "learning_rate": 7.298439193256904e-07, "loss": 0.0, "step": 2321 }, { "epoch": 1.14, "logps_train/chosen": -69.81825256347656, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -278.046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6007709503173828, "rewards_train/margins": 14.71133804321289, "rewards_train/rejected": -15.312108993530273, "step": 2321 }, { "epoch": 1.14, "learning_rate": 7.296028706759121e-07, "loss": 0.0, "step": 2322 }, { "epoch": 1.14, "logps_train/chosen": -66.8011474609375, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -275.0551452636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36600279808044434, "rewards_train/margins": 14.682090044021606, "rewards_train/rejected": -15.04809284210205, "step": 2322 }, { "epoch": 1.14, "learning_rate": 7.293617543835788e-07, "loss": 0.0, "step": 2323 }, { "epoch": 1.14, "logps_train/chosen": -70.55070495605469, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -117.0, "logps_train/rejected": -261.83203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8210867643356323, "rewards_train/margins": 13.662164807319641, "rewards_train/rejected": -14.483251571655273, "step": 2323 }, { "epoch": 1.14, "learning_rate": 7.291205705197251e-07, "loss": 0.0005, "step": 2324 }, { "epoch": 1.14, "logps_train/chosen": -74.83404541015625, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -279.500244140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9933412671089172, "rewards_train/margins": 14.441934764385223, "rewards_train/rejected": -15.43527603149414, "step": 2324 }, { "epoch": 1.14, "learning_rate": 7.28879319155405e-07, "loss": 0.0001, "step": 2325 }, { "epoch": 1.14, "logps_train/chosen": -69.67081451416016, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -284.59747314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5632727146148682, "rewards_train/margins": 15.37191367149353, "rewards_train/rejected": -15.935186386108398, "step": 2325 }, { "epoch": 1.15, "learning_rate": 7.286380003616933e-07, "loss": 0.0, "step": 2326 }, { "epoch": 1.15, "logps_train/chosen": -71.14557647705078, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -289.674072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5689517259597778, "rewards_train/margins": 15.337811350822449, "rewards_train/rejected": -15.906763076782227, "step": 2326 }, { "epoch": 1.15, "learning_rate": 7.283966142096839e-07, "loss": 0.0, "step": 2327 }, { "epoch": 1.15, "logps_train/chosen": -69.75985717773438, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -276.81256103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7088468074798584, "rewards_train/margins": 14.54980206489563, "rewards_train/rejected": -15.258648872375488, "step": 2327 }, { "epoch": 1.15, "learning_rate": 7.281551607704907e-07, "loss": 0.0001, "step": 2328 }, { "epoch": 1.15, "logps_train/chosen": -68.76895141601562, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -266.90350341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.518496036529541, "rewards_train/margins": 14.105689525604248, "rewards_train/rejected": -14.624185562133789, "step": 2328 }, { "epoch": 1.15, "learning_rate": 7.279136401152476e-07, "loss": 0.0001, "step": 2329 }, { "epoch": 1.15, "logps_train/chosen": -74.10177612304688, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -282.77935791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8967019319534302, "rewards_train/margins": 14.954379916191101, "rewards_train/rejected": -15.851081848144531, "step": 2329 }, { "epoch": 1.15, "learning_rate": 7.276720523151082e-07, "loss": 0.0001, "step": 2330 }, { "epoch": 1.15, "logps_train/chosen": -75.90365600585938, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -280.6441345214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0934414863586426, "rewards_train/margins": 14.662769794464111, "rewards_train/rejected": -15.756211280822754, "step": 2330 }, { "epoch": 1.15, "learning_rate": 7.274303974412459e-07, "loss": 0.0, "step": 2331 }, { "epoch": 1.15, "logps_train/chosen": -69.13009643554688, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -269.4461669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6617888808250427, "rewards_train/margins": 14.32042795419693, "rewards_train/rejected": -14.982216835021973, "step": 2331 }, { "epoch": 1.15, "learning_rate": 7.27188675564854e-07, "loss": 0.0001, "step": 2332 }, { "epoch": 1.15, "logps_train/chosen": -67.73653411865234, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -272.8976745605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.307442843914032, "rewards_train/margins": 14.732130825519562, "rewards_train/rejected": -15.039573669433594, "step": 2332 }, { "epoch": 1.15, "learning_rate": 7.269468867571452e-07, "loss": 0.0001, "step": 2333 }, { "epoch": 1.15, "logps_train/chosen": -74.4638671875, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -282.609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9632323980331421, "rewards_train/margins": 14.667725205421448, "rewards_train/rejected": -15.63095760345459, "step": 2333 }, { "epoch": 1.15, "learning_rate": 7.267050310893521e-07, "loss": 0.0, "step": 2334 }, { "epoch": 1.15, "logps_train/chosen": -70.85366821289062, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -277.53387451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5706698894500732, "rewards_train/margins": 14.738480806350708, "rewards_train/rejected": -15.309150695800781, "step": 2334 }, { "epoch": 1.15, "learning_rate": 7.264631086327272e-07, "loss": 0.0, "step": 2335 }, { "epoch": 1.15, "logps_train/chosen": -69.73835754394531, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -288.7955322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5195384621620178, "rewards_train/margins": 15.566654622554779, "rewards_train/rejected": -16.086193084716797, "step": 2335 }, { "epoch": 1.15, "learning_rate": 7.262211194585422e-07, "loss": 0.0, "step": 2336 }, { "epoch": 1.15, "logps_train/chosen": -73.29293823242188, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -285.79840087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8554166555404663, "rewards_train/margins": 15.100887894630432, "rewards_train/rejected": -15.956304550170898, "step": 2336 }, { "epoch": 1.15, "learning_rate": 7.259790636380892e-07, "loss": 0.0001, "step": 2337 }, { "epoch": 1.15, "logps_train/chosen": -76.44635009765625, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -132.75, "logps_train/rejected": -296.04638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8805476427078247, "rewards_train/margins": 15.452803492546082, "rewards_train/rejected": -16.333351135253906, "step": 2337 }, { "epoch": 1.15, "learning_rate": 7.25736941242679e-07, "loss": 0.0001, "step": 2338 }, { "epoch": 1.15, "logps_train/chosen": -70.80522155761719, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -282.113525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6638229489326477, "rewards_train/margins": 14.849579989910126, "rewards_train/rejected": -15.513402938842773, "step": 2338 }, { "epoch": 1.15, "learning_rate": 7.254947523436426e-07, "loss": 0.0, "step": 2339 }, { "epoch": 1.15, "logps_train/chosen": -71.23629760742188, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -274.22515869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8656715750694275, "rewards_train/margins": 14.326716959476471, "rewards_train/rejected": -15.192388534545898, "step": 2339 }, { "epoch": 1.15, "learning_rate": 7.252524970123308e-07, "loss": 0.0004, "step": 2340 }, { "epoch": 1.15, "logps_train/chosen": -69.47981262207031, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -282.99554443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4739578366279602, "rewards_train/margins": 15.047471225261688, "rewards_train/rejected": -15.521429061889648, "step": 2340 }, { "epoch": 1.15, "learning_rate": 7.250101753201133e-07, "loss": 0.0, "step": 2341 }, { "epoch": 1.15, "logps_train/chosen": -80.56787109375, "logps_train/ref_chosen": -70.0625, "logps_train/ref_rejected": -135.875, "logps_train/rejected": -309.020263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.051757574081421, "rewards_train/margins": 16.26540780067444, "rewards_train/rejected": -17.31716537475586, "step": 2341 }, { "epoch": 1.15, "learning_rate": 7.247677873383799e-07, "loss": 0.0, "step": 2342 }, { "epoch": 1.15, "logps_train/chosen": -73.03956604003906, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -273.258544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.909180760383606, "rewards_train/margins": 14.022534251213074, "rewards_train/rejected": -14.93171501159668, "step": 2342 }, { "epoch": 1.15, "learning_rate": 7.245253331385397e-07, "loss": 0.0018, "step": 2343 }, { "epoch": 1.15, "logps_train/chosen": -75.41520690917969, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -287.5024719238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9871751070022583, "rewards_train/margins": 14.805944323539734, "rewards_train/rejected": -15.793119430541992, "step": 2343 }, { "epoch": 1.15, "learning_rate": 7.242828127920213e-07, "loss": 0.0, "step": 2344 }, { "epoch": 1.15, "logps_train/chosen": -69.26347351074219, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -267.3881530761719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7007611393928528, "rewards_train/margins": 14.115447700023651, "rewards_train/rejected": -14.816208839416504, "step": 2344 }, { "epoch": 1.15, "learning_rate": 7.240402263702727e-07, "loss": 0.0001, "step": 2345 }, { "epoch": 1.15, "logps_train/chosen": -71.88348388671875, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -277.9432678222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8689641952514648, "rewards_train/margins": 14.828389167785645, "rewards_train/rejected": -15.69735336303711, "step": 2345 }, { "epoch": 1.16, "learning_rate": 7.237975739447617e-07, "loss": 0.0006, "step": 2346 }, { "epoch": 1.16, "logps_train/chosen": -74.32278442382812, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -294.199462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.798684298992157, "rewards_train/margins": 15.47262841463089, "rewards_train/rejected": -16.271312713623047, "step": 2346 }, { "epoch": 1.16, "learning_rate": 7.235548555869754e-07, "loss": 0.0, "step": 2347 }, { "epoch": 1.16, "logps_train/chosen": -73.0059814453125, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -281.44573974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9681271910667419, "rewards_train/margins": 14.715608656406403, "rewards_train/rejected": -15.683735847473145, "step": 2347 }, { "epoch": 1.16, "learning_rate": 7.2331207136842e-07, "loss": 0.0003, "step": 2348 }, { "epoch": 1.16, "logps_train/chosen": -65.23524475097656, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -119.0, "logps_train/rejected": -273.75091552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33368074893951416, "rewards_train/margins": 15.142482161521912, "rewards_train/rejected": -15.476162910461426, "step": 2348 }, { "epoch": 1.16, "learning_rate": 7.230692213606217e-07, "loss": 0.0001, "step": 2349 }, { "epoch": 1.16, "logps_train/chosen": -71.88778686523438, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -276.7582702636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7714205384254456, "rewards_train/margins": 14.283947885036469, "rewards_train/rejected": -15.055368423461914, "step": 2349 }, { "epoch": 1.16, "learning_rate": 7.228263056351254e-07, "loss": 0.0001, "step": 2350 }, { "epoch": 1.16, "logps_train/chosen": -74.24337768554688, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -278.5361328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8858118653297424, "rewards_train/margins": 14.8013476729393, "rewards_train/rejected": -15.687159538269043, "step": 2350 }, { "epoch": 1.16, "learning_rate": 7.22583324263496e-07, "loss": 0.0001, "step": 2351 }, { "epoch": 1.16, "logps_train/chosen": -70.7813720703125, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -269.7436828613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8219848871231079, "rewards_train/margins": 14.089834570884705, "rewards_train/rejected": -14.911819458007812, "step": 2351 }, { "epoch": 1.16, "learning_rate": 7.223402773173171e-07, "loss": 0.0004, "step": 2352 }, { "epoch": 1.16, "logps_train/chosen": -67.10749816894531, "logps_train/ref_chosen": -61.59375, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -274.58258056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5497150421142578, "rewards_train/margins": 14.69135570526123, "rewards_train/rejected": -15.241070747375488, "step": 2352 }, { "epoch": 1.16, "learning_rate": 7.220971648681925e-07, "loss": 0.0, "step": 2353 }, { "epoch": 1.16, "logps_train/chosen": -70.84980773925781, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -270.06304931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8526082634925842, "rewards_train/margins": 14.122643887996674, "rewards_train/rejected": -14.975252151489258, "step": 2353 }, { "epoch": 1.16, "learning_rate": 7.218539869877444e-07, "loss": 0.0001, "step": 2354 }, { "epoch": 1.16, "logps_train/chosen": -72.20243072509766, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -280.94500732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7872350215911865, "rewards_train/margins": 14.731386423110962, "rewards_train/rejected": -15.518621444702148, "step": 2354 }, { "epoch": 1.16, "learning_rate": 7.216107437476147e-07, "loss": 0.0, "step": 2355 }, { "epoch": 1.16, "logps_train/chosen": -71.1007308959961, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -282.2903747558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6006252765655518, "rewards_train/margins": 15.001655340194702, "rewards_train/rejected": -15.602280616760254, "step": 2355 }, { "epoch": 1.16, "learning_rate": 7.213674352194645e-07, "loss": 0.0001, "step": 2356 }, { "epoch": 1.16, "logps_train/chosen": -70.7799072265625, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -291.59130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6173946857452393, "rewards_train/margins": 15.92181658744812, "rewards_train/rejected": -16.53921127319336, "step": 2356 }, { "epoch": 1.16, "learning_rate": 7.211240614749739e-07, "loss": 0.0, "step": 2357 }, { "epoch": 1.16, "logps_train/chosen": -75.71122741699219, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -285.24853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.079667568206787, "rewards_train/margins": 14.578245639801025, "rewards_train/rejected": -15.657913208007812, "step": 2357 }, { "epoch": 1.16, "learning_rate": 7.20880622585843e-07, "loss": 0.0001, "step": 2358 }, { "epoch": 1.16, "logps_train/chosen": -66.65554809570312, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -277.07159423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21282067894935608, "rewards_train/margins": 15.058011382818222, "rewards_train/rejected": -15.270832061767578, "step": 2358 }, { "epoch": 1.16, "learning_rate": 7.206371186237903e-07, "loss": 0.0, "step": 2359 }, { "epoch": 1.16, "logps_train/chosen": -68.07386779785156, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -289.1302490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.273743599653244, "rewards_train/margins": 15.533078223466873, "rewards_train/rejected": -15.806821823120117, "step": 2359 }, { "epoch": 1.16, "learning_rate": 7.203935496605535e-07, "loss": 0.0, "step": 2360 }, { "epoch": 1.16, "logps_train/chosen": -71.16109466552734, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -274.42950439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7057086229324341, "rewards_train/margins": 14.32591426372528, "rewards_train/rejected": -15.031622886657715, "step": 2360 }, { "epoch": 1.16, "learning_rate": 7.201499157678899e-07, "loss": 0.0, "step": 2361 }, { "epoch": 1.16, "logps_train/chosen": -72.62297821044922, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -284.1302795410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9620539546012878, "rewards_train/margins": 14.892674744129181, "rewards_train/rejected": -15.854728698730469, "step": 2361 }, { "epoch": 1.16, "learning_rate": 7.199062170175754e-07, "loss": 0.0, "step": 2362 }, { "epoch": 1.16, "logps_train/chosen": -67.32316589355469, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -269.9388427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39657384157180786, "rewards_train/margins": 14.401461780071259, "rewards_train/rejected": -14.798035621643066, "step": 2362 }, { "epoch": 1.16, "learning_rate": 7.196624534814055e-07, "loss": 0.0001, "step": 2363 }, { "epoch": 1.16, "logps_train/chosen": -72.60253143310547, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -294.85479736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6808586120605469, "rewards_train/margins": 15.93069076538086, "rewards_train/rejected": -16.611549377441406, "step": 2363 }, { "epoch": 1.16, "learning_rate": 7.194186252311947e-07, "loss": 0.0, "step": 2364 }, { "epoch": 1.16, "logps_train/chosen": -67.6727294921875, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -282.32281494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3368285298347473, "rewards_train/margins": 15.258538901805878, "rewards_train/rejected": -15.595367431640625, "step": 2364 }, { "epoch": 1.16, "learning_rate": 7.191747323387764e-07, "loss": 0.0, "step": 2365 }, { "epoch": 1.16, "logps_train/chosen": -76.69371032714844, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -279.25390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1734240055084229, "rewards_train/margins": 14.30763030052185, "rewards_train/rejected": -15.481054306030273, "step": 2365 }, { "epoch": 1.16, "learning_rate": 7.18930774876003e-07, "loss": 0.0007, "step": 2366 }, { "epoch": 1.16, "logps_train/chosen": -70.30463409423828, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -274.45068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5458195805549622, "rewards_train/margins": 14.21736592054367, "rewards_train/rejected": -14.763185501098633, "step": 2366 }, { "epoch": 1.17, "learning_rate": 7.186867529147459e-07, "loss": 0.0011, "step": 2367 }, { "epoch": 1.17, "logps_train/chosen": -75.61881256103516, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -281.65869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.862515926361084, "rewards_train/margins": 14.443684101104736, "rewards_train/rejected": -15.30620002746582, "step": 2367 }, { "epoch": 1.17, "learning_rate": 7.184426665268956e-07, "loss": 0.0, "step": 2368 }, { "epoch": 1.17, "logps_train/chosen": -70.21858215332031, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -286.57305908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6682208180427551, "rewards_train/margins": 15.276924788951874, "rewards_train/rejected": -15.945145606994629, "step": 2368 }, { "epoch": 1.17, "learning_rate": 7.18198515784362e-07, "loss": 0.0001, "step": 2369 }, { "epoch": 1.17, "logps_train/chosen": -69.66424560546875, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -284.19830322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4667174816131592, "rewards_train/margins": 15.044081926345825, "rewards_train/rejected": -15.510799407958984, "step": 2369 }, { "epoch": 1.17, "learning_rate": 7.179543007590734e-07, "loss": 0.0004, "step": 2370 }, { "epoch": 1.17, "logps_train/chosen": -74.48277282714844, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -289.716064453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0788682699203491, "rewards_train/margins": 15.156262040138245, "rewards_train/rejected": -16.235130310058594, "step": 2370 }, { "epoch": 1.17, "learning_rate": 7.177100215229767e-07, "loss": 0.0, "step": 2371 }, { "epoch": 1.17, "logps_train/chosen": -68.52561950683594, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -271.2193603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5759995579719543, "rewards_train/margins": 14.361267745494843, "rewards_train/rejected": -14.937267303466797, "step": 2371 }, { "epoch": 1.17, "learning_rate": 7.17465678148039e-07, "loss": 0.0, "step": 2372 }, { "epoch": 1.17, "logps_train/chosen": -70.02850341796875, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -276.9837646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8217220902442932, "rewards_train/margins": 14.456635415554047, "rewards_train/rejected": -15.27835750579834, "step": 2372 }, { "epoch": 1.17, "learning_rate": 7.172212707062448e-07, "loss": 0.0, "step": 2373 }, { "epoch": 1.17, "logps_train/chosen": -73.09906005859375, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -286.5843505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9597600698471069, "rewards_train/margins": 15.37396490573883, "rewards_train/rejected": -16.333724975585938, "step": 2373 }, { "epoch": 1.17, "learning_rate": 7.169767992695986e-07, "loss": 0.0, "step": 2374 }, { "epoch": 1.17, "logps_train/chosen": -68.85154724121094, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -284.87548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4738266170024872, "rewards_train/margins": 15.194826871156693, "rewards_train/rejected": -15.66865348815918, "step": 2374 }, { "epoch": 1.17, "learning_rate": 7.167322639101234e-07, "loss": 0.0001, "step": 2375 }, { "epoch": 1.17, "logps_train/chosen": -72.45935821533203, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -291.78350830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5537972450256348, "rewards_train/margins": 15.618695735931396, "rewards_train/rejected": -16.17249298095703, "step": 2375 }, { "epoch": 1.17, "learning_rate": 7.164876646998606e-07, "loss": 0.0, "step": 2376 }, { "epoch": 1.17, "logps_train/chosen": -71.06521606445312, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -118.4375, "logps_train/rejected": -263.8818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6821075677871704, "rewards_train/margins": 13.861740469932556, "rewards_train/rejected": -14.543848037719727, "step": 2376 }, { "epoch": 1.17, "learning_rate": 7.162430017108711e-07, "loss": 0.0001, "step": 2377 }, { "epoch": 1.17, "logps_train/chosen": -70.34053039550781, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -281.9397888183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3485543727874756, "rewards_train/margins": 15.042007684707642, "rewards_train/rejected": -15.390562057495117, "step": 2377 }, { "epoch": 1.17, "learning_rate": 7.15998275015234e-07, "loss": 0.0001, "step": 2378 }, { "epoch": 1.17, "logps_train/chosen": -69.22685241699219, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -278.0310974121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42751920223236084, "rewards_train/margins": 14.538089394569397, "rewards_train/rejected": -14.965608596801758, "step": 2378 }, { "epoch": 1.17, "learning_rate": 7.157534846850477e-07, "loss": 0.0, "step": 2379 }, { "epoch": 1.17, "logps_train/chosen": -72.41006469726562, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -281.90277099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7243562340736389, "rewards_train/margins": 15.091701924800873, "rewards_train/rejected": -15.816058158874512, "step": 2379 }, { "epoch": 1.17, "learning_rate": 7.155086307924288e-07, "loss": 0.0, "step": 2380 }, { "epoch": 1.17, "logps_train/chosen": -69.36536407470703, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -281.7513122558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6193488836288452, "rewards_train/margins": 15.02819311618805, "rewards_train/rejected": -15.647541999816895, "step": 2380 }, { "epoch": 1.17, "learning_rate": 7.152637134095131e-07, "loss": 0.0001, "step": 2381 }, { "epoch": 1.17, "logps_train/chosen": -71.24702453613281, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -285.8194580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7938427925109863, "rewards_train/margins": 15.23390531539917, "rewards_train/rejected": -16.027748107910156, "step": 2381 }, { "epoch": 1.17, "learning_rate": 7.15018732608455e-07, "loss": 0.0, "step": 2382 }, { "epoch": 1.17, "logps_train/chosen": -68.94247436523438, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -275.7169494628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6196380853652954, "rewards_train/margins": 14.474761366844177, "rewards_train/rejected": -15.094399452209473, "step": 2382 }, { "epoch": 1.17, "learning_rate": 7.147736884614273e-07, "loss": 0.0001, "step": 2383 }, { "epoch": 1.17, "logps_train/chosen": -72.54161834716797, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -279.708251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9092890620231628, "rewards_train/margins": 14.558412849903107, "rewards_train/rejected": -15.46770191192627, "step": 2383 }, { "epoch": 1.17, "learning_rate": 7.145285810406218e-07, "loss": 0.0, "step": 2384 }, { "epoch": 1.17, "logps_train/chosen": -71.00434875488281, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -279.8510437011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6820263862609863, "rewards_train/margins": 14.628908634185791, "rewards_train/rejected": -15.310935020446777, "step": 2384 }, { "epoch": 1.17, "learning_rate": 7.142834104182488e-07, "loss": 0.0001, "step": 2385 }, { "epoch": 1.17, "logps_train/chosen": -73.02030944824219, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -282.6715393066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8349405527114868, "rewards_train/margins": 15.219468474388123, "rewards_train/rejected": -16.05440902709961, "step": 2385 }, { "epoch": 1.17, "learning_rate": 7.14038176666537e-07, "loss": 0.0, "step": 2386 }, { "epoch": 1.17, "logps_train/chosen": -72.60436248779297, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -292.40557861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6575063467025757, "rewards_train/margins": 15.703463196754456, "rewards_train/rejected": -16.36096954345703, "step": 2386 }, { "epoch": 1.18, "learning_rate": 7.137928798577342e-07, "loss": 0.0, "step": 2387 }, { "epoch": 1.18, "logps_train/chosen": -69.06719207763672, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -274.110107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7211238145828247, "rewards_train/margins": 14.620795130729675, "rewards_train/rejected": -15.3419189453125, "step": 2387 }, { "epoch": 1.18, "learning_rate": 7.135475200641062e-07, "loss": 0.0001, "step": 2388 }, { "epoch": 1.18, "logps_train/chosen": -74.98165893554688, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -285.48236083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.108907699584961, "rewards_train/margins": 14.923704147338867, "rewards_train/rejected": -16.032611846923828, "step": 2388 }, { "epoch": 1.18, "learning_rate": 7.13302097357938e-07, "loss": 0.0, "step": 2389 }, { "epoch": 1.18, "logps_train/chosen": -73.78428649902344, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -294.01177978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5925401449203491, "rewards_train/margins": 15.581589341163635, "rewards_train/rejected": -16.174129486083984, "step": 2389 }, { "epoch": 1.18, "learning_rate": 7.130566118115326e-07, "loss": 0.0, "step": 2390 }, { "epoch": 1.18, "logps_train/chosen": -73.39619445800781, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -288.4359130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9738965034484863, "rewards_train/margins": 15.125163555145264, "rewards_train/rejected": -16.09906005859375, "step": 2390 }, { "epoch": 1.18, "learning_rate": 7.128110634972115e-07, "loss": 0.0, "step": 2391 }, { "epoch": 1.18, "logps_train/chosen": -67.81394958496094, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -282.650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4113752245903015, "rewards_train/margins": 15.371143162250519, "rewards_train/rejected": -15.78251838684082, "step": 2391 }, { "epoch": 1.18, "learning_rate": 7.125654524873152e-07, "loss": 0.0001, "step": 2392 }, { "epoch": 1.18, "logps_train/chosen": -70.16570281982422, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -281.8570861816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703792572021484, "rewards_train/margins": 15.09701919555664, "rewards_train/rejected": -15.667398452758789, "step": 2392 }, { "epoch": 1.18, "learning_rate": 7.123197788542024e-07, "loss": 0.0003, "step": 2393 }, { "epoch": 1.18, "logps_train/chosen": -74.36748504638672, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -290.039794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7169728875160217, "rewards_train/margins": 15.44423621892929, "rewards_train/rejected": -16.161209106445312, "step": 2393 }, { "epoch": 1.18, "learning_rate": 7.120740426702499e-07, "loss": 0.0, "step": 2394 }, { "epoch": 1.18, "logps_train/chosen": -71.43714904785156, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -287.93841552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7971088886260986, "rewards_train/margins": 15.35952639579773, "rewards_train/rejected": -16.156635284423828, "step": 2394 }, { "epoch": 1.18, "learning_rate": 7.118282440078534e-07, "loss": 0.0, "step": 2395 }, { "epoch": 1.18, "logps_train/chosen": -68.43275451660156, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -282.70452880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46202564239501953, "rewards_train/margins": 15.232159614562988, "rewards_train/rejected": -15.694185256958008, "step": 2395 }, { "epoch": 1.18, "learning_rate": 7.115823829394271e-07, "loss": 0.0003, "step": 2396 }, { "epoch": 1.18, "logps_train/chosen": -65.21598052978516, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -276.2829284667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23912742733955383, "rewards_train/margins": 14.84033653140068, "rewards_train/rejected": -15.079463958740234, "step": 2396 }, { "epoch": 1.18, "learning_rate": 7.113364595374027e-07, "loss": 0.0, "step": 2397 }, { "epoch": 1.18, "logps_train/chosen": -69.96894073486328, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -279.4437255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7237982749938965, "rewards_train/margins": 14.712274074554443, "rewards_train/rejected": -15.43607234954834, "step": 2397 }, { "epoch": 1.18, "learning_rate": 7.110904738742316e-07, "loss": 0.0, "step": 2398 }, { "epoch": 1.18, "logps_train/chosen": -73.39910125732422, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -278.31597900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.916985273361206, "rewards_train/margins": 14.574864625930786, "rewards_train/rejected": -15.491849899291992, "step": 2398 }, { "epoch": 1.18, "learning_rate": 7.108444260223824e-07, "loss": 0.0001, "step": 2399 }, { "epoch": 1.18, "logps_train/chosen": -76.51071166992188, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -286.93414306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0681850910186768, "rewards_train/margins": 14.722496747970581, "rewards_train/rejected": -15.790681838989258, "step": 2399 }, { "epoch": 1.18, "learning_rate": 7.105983160543424e-07, "loss": 0.0, "step": 2400 }, { "epoch": 1.18, "logps_train/chosen": -78.04925537109375, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -300.8280029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0427188873291016, "rewards_train/margins": 15.806097030639648, "rewards_train/rejected": -16.84881591796875, "step": 2400 }, { "epoch": 1.18, "learning_rate": 7.103521440426177e-07, "loss": 0.0, "step": 2401 }, { "epoch": 1.18, "logps_train/chosen": -71.0684814453125, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -280.4013977050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.526330828666687, "rewards_train/margins": 14.980752110481262, "rewards_train/rejected": -15.50708293914795, "step": 2401 }, { "epoch": 1.18, "learning_rate": 7.101059100597317e-07, "loss": 0.0001, "step": 2402 }, { "epoch": 1.18, "logps_train/chosen": -74.43219757080078, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -282.86102294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1790108680725098, "rewards_train/margins": 14.475059032440186, "rewards_train/rejected": -15.654069900512695, "step": 2402 }, { "epoch": 1.18, "learning_rate": 7.098596141782271e-07, "loss": 0.0001, "step": 2403 }, { "epoch": 1.18, "logps_train/chosen": -74.57954406738281, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -282.81353759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.873823344707489, "rewards_train/margins": 14.911436855793, "rewards_train/rejected": -15.785260200500488, "step": 2403 }, { "epoch": 1.18, "learning_rate": 7.096132564706639e-07, "loss": 0.0001, "step": 2404 }, { "epoch": 1.18, "logps_train/chosen": -62.804168701171875, "logps_train/ref_chosen": -59.34375, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -266.204345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3467983603477478, "rewards_train/margins": 14.310355722904205, "rewards_train/rejected": -14.657154083251953, "step": 2404 }, { "epoch": 1.18, "learning_rate": 7.093668370096209e-07, "loss": 0.0001, "step": 2405 }, { "epoch": 1.18, "logps_train/chosen": -72.82101440429688, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -286.3399353027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.832247793674469, "rewards_train/margins": 15.060438096523285, "rewards_train/rejected": -15.892685890197754, "step": 2405 }, { "epoch": 1.18, "learning_rate": 7.09120355867695e-07, "loss": 0.0, "step": 2406 }, { "epoch": 1.18, "logps_train/chosen": -70.46338653564453, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -282.822998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4158698618412018, "rewards_train/margins": 15.248658031225204, "rewards_train/rejected": -15.664527893066406, "step": 2406 }, { "epoch": 1.19, "learning_rate": 7.088738131175012e-07, "loss": 0.0, "step": 2407 }, { "epoch": 1.19, "logps_train/chosen": -74.2623062133789, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -279.22332763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8984231948852539, "rewards_train/margins": 14.576544761657715, "rewards_train/rejected": -15.474967956542969, "step": 2407 }, { "epoch": 1.19, "learning_rate": 7.086272088316728e-07, "loss": 0.0, "step": 2408 }, { "epoch": 1.19, "logps_train/chosen": -74.21875, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -287.6986083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9813968539237976, "rewards_train/margins": 15.053110897541046, "rewards_train/rejected": -16.034507751464844, "step": 2408 }, { "epoch": 1.19, "learning_rate": 7.083805430828607e-07, "loss": 0.0, "step": 2409 }, { "epoch": 1.19, "logps_train/chosen": -72.25247955322266, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -295.05810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7222938537597656, "rewards_train/margins": 15.899236679077148, "rewards_train/rejected": -16.621530532836914, "step": 2409 }, { "epoch": 1.19, "learning_rate": 7.081338159437345e-07, "loss": 0.0001, "step": 2410 }, { "epoch": 1.19, "logps_train/chosen": -69.96208190917969, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -278.0647888183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.637931227684021, "rewards_train/margins": 14.443451523780823, "rewards_train/rejected": -15.081382751464844, "step": 2410 }, { "epoch": 1.19, "learning_rate": 7.078870274869817e-07, "loss": 0.0001, "step": 2411 }, { "epoch": 1.19, "logps_train/chosen": -75.16677856445312, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -286.6671142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9851830005645752, "rewards_train/margins": 15.183090925216675, "rewards_train/rejected": -16.16827392578125, "step": 2411 }, { "epoch": 1.19, "learning_rate": 7.076401777853078e-07, "loss": 0.0, "step": 2412 }, { "epoch": 1.19, "logps_train/chosen": -75.589599609375, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -292.7376708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9076903462409973, "rewards_train/margins": 15.677993476390839, "rewards_train/rejected": -16.585683822631836, "step": 2412 }, { "epoch": 1.19, "learning_rate": 7.073932669114366e-07, "loss": 0.0003, "step": 2413 }, { "epoch": 1.19, "logps_train/chosen": -71.25704956054688, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -285.354736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6240936517715454, "rewards_train/margins": 15.080667853355408, "rewards_train/rejected": -15.704761505126953, "step": 2413 }, { "epoch": 1.19, "learning_rate": 7.071462949381093e-07, "loss": 0.0003, "step": 2414 }, { "epoch": 1.19, "logps_train/chosen": -73.84395599365234, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -281.82061767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9335650205612183, "rewards_train/margins": 14.540243268013, "rewards_train/rejected": -15.473808288574219, "step": 2414 }, { "epoch": 1.19, "learning_rate": 7.068992619380858e-07, "loss": 0.0, "step": 2415 }, { "epoch": 1.19, "logps_train/chosen": -75.37254333496094, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -284.6201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.066208839416504, "rewards_train/margins": 14.86450481414795, "rewards_train/rejected": -15.930713653564453, "step": 2415 }, { "epoch": 1.19, "learning_rate": 7.066521679841437e-07, "loss": 0.0001, "step": 2416 }, { "epoch": 1.19, "logps_train/chosen": -72.61799621582031, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -289.6405334472656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6996414065361023, "rewards_train/margins": 15.60664826631546, "rewards_train/rejected": -16.306289672851562, "step": 2416 }, { "epoch": 1.19, "learning_rate": 7.064050131490784e-07, "loss": 0.0, "step": 2417 }, { "epoch": 1.19, "logps_train/chosen": -70.64212799072266, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -280.3502197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6871620416641235, "rewards_train/margins": 14.970322966575623, "rewards_train/rejected": -15.657485008239746, "step": 2417 }, { "epoch": 1.19, "learning_rate": 7.061577975057036e-07, "loss": 0.0002, "step": 2418 }, { "epoch": 1.19, "logps_train/chosen": -66.16636657714844, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -118.625, "logps_train/rejected": -263.60943603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4811878204345703, "rewards_train/margins": 14.019743919372559, "rewards_train/rejected": -14.500931739807129, "step": 2418 }, { "epoch": 1.19, "learning_rate": 7.059105211268504e-07, "loss": 0.0003, "step": 2419 }, { "epoch": 1.19, "logps_train/chosen": -74.82186889648438, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -288.5759582519531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.074960708618164, "rewards_train/margins": 15.253046035766602, "rewards_train/rejected": -16.328006744384766, "step": 2419 }, { "epoch": 1.19, "learning_rate": 7.056631840853683e-07, "loss": 0.0, "step": 2420 }, { "epoch": 1.19, "logps_train/chosen": -63.146095275878906, "logps_train/ref_chosen": -60.5625, "logps_train/ref_rejected": -115.875, "logps_train/rejected": -255.73593139648438, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2587258815765381, "rewards_train/margins": 13.727806329727173, "rewards_train/rejected": -13.986532211303711, "step": 2420 }, { "epoch": 1.19, "learning_rate": 7.054157864541244e-07, "loss": 0.0, "step": 2421 }, { "epoch": 1.19, "logps_train/chosen": -72.72183227539062, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -289.9092712402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6950345635414124, "rewards_train/margins": 15.582271039485931, "rewards_train/rejected": -16.277305603027344, "step": 2421 }, { "epoch": 1.19, "learning_rate": 7.051683283060037e-07, "loss": 0.0, "step": 2422 }, { "epoch": 1.19, "logps_train/chosen": -69.9371337890625, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -271.5059814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7064089775085449, "rewards_train/margins": 14.352929592132568, "rewards_train/rejected": -15.059338569641113, "step": 2422 }, { "epoch": 1.19, "learning_rate": 7.04920809713909e-07, "loss": 0.0, "step": 2423 }, { "epoch": 1.19, "logps_train/chosen": -67.99967956542969, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -286.191162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28273090720176697, "rewards_train/margins": 15.337456852197647, "rewards_train/rejected": -15.620187759399414, "step": 2423 }, { "epoch": 1.19, "learning_rate": 7.046732307507611e-07, "loss": 0.0001, "step": 2424 }, { "epoch": 1.19, "logps_train/chosen": -72.3753662109375, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -285.0180358886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.572741687297821, "rewards_train/margins": 15.273398220539093, "rewards_train/rejected": -15.846139907836914, "step": 2424 }, { "epoch": 1.19, "learning_rate": 7.044255914894983e-07, "loss": 0.0001, "step": 2425 }, { "epoch": 1.19, "logps_train/chosen": -71.07425689697266, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -273.3782958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.687479555606842, "rewards_train/margins": 14.226427495479584, "rewards_train/rejected": -14.913907051086426, "step": 2425 }, { "epoch": 1.19, "learning_rate": 7.041778920030768e-07, "loss": 0.0001, "step": 2426 }, { "epoch": 1.19, "logps_train/chosen": -71.26480102539062, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -282.16845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6288720965385437, "rewards_train/margins": 14.887679874897003, "rewards_train/rejected": -15.516551971435547, "step": 2426 }, { "epoch": 1.19, "learning_rate": 7.039301323644706e-07, "loss": 0.0, "step": 2427 }, { "epoch": 1.19, "logps_train/chosen": -71.69293212890625, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -272.00909423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5706603527069092, "rewards_train/margins": 14.384645700454712, "rewards_train/rejected": -14.955306053161621, "step": 2427 }, { "epoch": 1.2, "learning_rate": 7.036823126466715e-07, "loss": 0.0001, "step": 2428 }, { "epoch": 1.2, "logps_train/chosen": -73.92760467529297, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -295.1048583984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8931506276130676, "rewards_train/margins": 16.01802033185959, "rewards_train/rejected": -16.911170959472656, "step": 2428 }, { "epoch": 1.2, "learning_rate": 7.034344329226884e-07, "loss": 0.0, "step": 2429 }, { "epoch": 1.2, "logps_train/chosen": -74.07157897949219, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -285.40533447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.004570484161377, "rewards_train/margins": 14.997390270233154, "rewards_train/rejected": -16.00196075439453, "step": 2429 }, { "epoch": 1.2, "learning_rate": 7.031864932655488e-07, "loss": 0.0003, "step": 2430 }, { "epoch": 1.2, "logps_train/chosen": -74.22283935546875, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -284.0797119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8761905431747437, "rewards_train/margins": 14.834128975868225, "rewards_train/rejected": -15.710319519042969, "step": 2430 }, { "epoch": 1.2, "learning_rate": 7.029384937482971e-07, "loss": 0.0, "step": 2431 }, { "epoch": 1.2, "logps_train/chosen": -67.93321228027344, "logps_train/ref_chosen": -61.53125, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -280.2730712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.639805793762207, "rewards_train/margins": 15.126565933227539, "rewards_train/rejected": -15.766371726989746, "step": 2431 }, { "epoch": 1.2, "learning_rate": 7.026904344439959e-07, "loss": 0.0001, "step": 2432 }, { "epoch": 1.2, "logps_train/chosen": -77.27887725830078, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -285.7761535644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2838938236236572, "rewards_train/margins": 14.763934850692749, "rewards_train/rejected": -16.047828674316406, "step": 2432 }, { "epoch": 1.2, "learning_rate": 7.024423154257249e-07, "loss": 0.0002, "step": 2433 }, { "epoch": 1.2, "logps_train/chosen": -75.41804504394531, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -296.94512939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8276201486587524, "rewards_train/margins": 15.721825957298279, "rewards_train/rejected": -16.54944610595703, "step": 2433 }, { "epoch": 1.2, "learning_rate": 7.021941367665819e-07, "loss": 0.0, "step": 2434 }, { "epoch": 1.2, "logps_train/chosen": -73.5607681274414, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -288.3109130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7512913346290588, "rewards_train/margins": 15.21495908498764, "rewards_train/rejected": -15.9662504196167, "step": 2434 }, { "epoch": 1.2, "learning_rate": 7.019458985396817e-07, "loss": 0.0, "step": 2435 }, { "epoch": 1.2, "logps_train/chosen": -74.58055877685547, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -284.1965637207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.085228681564331, "rewards_train/margins": 14.656206369400024, "rewards_train/rejected": -15.741435050964355, "step": 2435 }, { "epoch": 1.2, "learning_rate": 7.016976008181571e-07, "loss": 0.0001, "step": 2436 }, { "epoch": 1.2, "logps_train/chosen": -72.49406433105469, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -278.48150634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.801505982875824, "rewards_train/margins": 14.718665182590485, "rewards_train/rejected": -15.520171165466309, "step": 2436 }, { "epoch": 1.2, "learning_rate": 7.014492436751585e-07, "loss": 0.0001, "step": 2437 }, { "epoch": 1.2, "logps_train/chosen": -73.12096405029297, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -287.9063415527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5885121822357178, "rewards_train/margins": 15.23249363899231, "rewards_train/rejected": -15.821005821228027, "step": 2437 }, { "epoch": 1.2, "learning_rate": 7.01200827183853e-07, "loss": 0.0, "step": 2438 }, { "epoch": 1.2, "logps_train/chosen": -74.79228973388672, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -283.06787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8535944223403931, "rewards_train/margins": 14.868674635887146, "rewards_train/rejected": -15.722269058227539, "step": 2438 }, { "epoch": 1.2, "learning_rate": 7.009523514174265e-07, "loss": 0.0004, "step": 2439 }, { "epoch": 1.2, "logps_train/chosen": -67.88212585449219, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -276.17578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40207940340042114, "rewards_train/margins": 14.894064128398895, "rewards_train/rejected": -15.296143531799316, "step": 2439 }, { "epoch": 1.2, "learning_rate": 7.007038164490812e-07, "loss": 0.0004, "step": 2440 }, { "epoch": 1.2, "logps_train/chosen": -66.54306030273438, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -272.9015808105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4553309381008148, "rewards_train/margins": 14.660315424203873, "rewards_train/rejected": -15.115646362304688, "step": 2440 }, { "epoch": 1.2, "learning_rate": 7.004552223520372e-07, "loss": 0.0, "step": 2441 }, { "epoch": 1.2, "logps_train/chosen": -74.61837768554688, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -289.1297912597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9730188250541687, "rewards_train/margins": 15.1397665143013, "rewards_train/rejected": -16.11278533935547, "step": 2441 }, { "epoch": 1.2, "learning_rate": 7.002065691995322e-07, "loss": 0.0, "step": 2442 }, { "epoch": 1.2, "logps_train/chosen": -67.19312286376953, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -285.28955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4908457398414612, "rewards_train/margins": 15.498659074306488, "rewards_train/rejected": -15.98950481414795, "step": 2442 }, { "epoch": 1.2, "learning_rate": 6.999578570648208e-07, "loss": 0.0001, "step": 2443 }, { "epoch": 1.2, "logps_train/chosen": -72.783203125, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -295.2987060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.679784893989563, "rewards_train/margins": 15.860536456108093, "rewards_train/rejected": -16.540321350097656, "step": 2443 }, { "epoch": 1.2, "learning_rate": 6.997090860211756e-07, "loss": 0.0001, "step": 2444 }, { "epoch": 1.2, "logps_train/chosen": -68.31181335449219, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -272.87127685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40774381160736084, "rewards_train/margins": 14.67215883731842, "rewards_train/rejected": -15.079902648925781, "step": 2444 }, { "epoch": 1.2, "learning_rate": 6.99460256141886e-07, "loss": 0.0002, "step": 2445 }, { "epoch": 1.2, "logps_train/chosen": -74.05066680908203, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -293.09197998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9257701635360718, "rewards_train/margins": 15.46497118473053, "rewards_train/rejected": -16.3907413482666, "step": 2445 }, { "epoch": 1.2, "learning_rate": 6.99211367500259e-07, "loss": 0.0, "step": 2446 }, { "epoch": 1.2, "logps_train/chosen": -74.79711151123047, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -298.51556396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0072503089904785, "rewards_train/margins": 15.77106237411499, "rewards_train/rejected": -16.77831268310547, "step": 2446 }, { "epoch": 1.2, "learning_rate": 6.989624201696189e-07, "loss": 0.0, "step": 2447 }, { "epoch": 1.2, "logps_train/chosen": -75.108642578125, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -280.0037841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1485600471496582, "rewards_train/margins": 14.599524021148682, "rewards_train/rejected": -15.74808406829834, "step": 2447 }, { "epoch": 1.21, "learning_rate": 6.987134142233074e-07, "loss": 0.0001, "step": 2448 }, { "epoch": 1.21, "logps_train/chosen": -70.997314453125, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -282.67578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6140621900558472, "rewards_train/margins": 14.933594822883606, "rewards_train/rejected": -15.547657012939453, "step": 2448 }, { "epoch": 1.21, "learning_rate": 6.98464349734683e-07, "loss": 0.0, "step": 2449 }, { "epoch": 1.21, "logps_train/chosen": -69.87431335449219, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -279.3543395996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6255902647972107, "rewards_train/margins": 15.066778242588043, "rewards_train/rejected": -15.692368507385254, "step": 2449 }, { "epoch": 1.21, "learning_rate": 6.982152267771221e-07, "loss": 0.0004, "step": 2450 }, { "epoch": 1.21, "logps_train/chosen": -73.98992919921875, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -279.087158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0372745990753174, "rewards_train/margins": 14.503376722335815, "rewards_train/rejected": -15.540651321411133, "step": 2450 }, { "epoch": 1.21, "learning_rate": 6.979660454240179e-07, "loss": 0.0, "step": 2451 }, { "epoch": 1.21, "logps_train/chosen": -71.02766418457031, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -275.4976806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9479330778121948, "rewards_train/margins": 14.315409779548645, "rewards_train/rejected": -15.26334285736084, "step": 2451 }, { "epoch": 1.21, "learning_rate": 6.977168057487808e-07, "loss": 0.0001, "step": 2452 }, { "epoch": 1.21, "logps_train/chosen": -74.94308471679688, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -294.3530578613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8313199281692505, "rewards_train/margins": 15.642461657524109, "rewards_train/rejected": -16.47378158569336, "step": 2452 }, { "epoch": 1.21, "learning_rate": 6.974675078248386e-07, "loss": 0.0001, "step": 2453 }, { "epoch": 1.21, "logps_train/chosen": -65.87467956542969, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -119.375, "logps_train/rejected": -271.0870361328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42526066303253174, "rewards_train/margins": 14.743943095207214, "rewards_train/rejected": -15.169203758239746, "step": 2453 }, { "epoch": 1.21, "learning_rate": 6.972181517256362e-07, "loss": 0.0002, "step": 2454 }, { "epoch": 1.21, "logps_train/chosen": -73.7163314819336, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -277.2717590332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9900412559509277, "rewards_train/margins": 14.288991451263428, "rewards_train/rejected": -15.279032707214355, "step": 2454 }, { "epoch": 1.21, "learning_rate": 6.969687375246354e-07, "loss": 0.0001, "step": 2455 }, { "epoch": 1.21, "logps_train/chosen": -72.05681610107422, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -275.20111083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8628103733062744, "rewards_train/margins": 14.540504217147827, "rewards_train/rejected": -15.403314590454102, "step": 2455 }, { "epoch": 1.21, "learning_rate": 6.967192652953156e-07, "loss": 0.0, "step": 2456 }, { "epoch": 1.21, "logps_train/chosen": -69.02661895751953, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -282.4598083496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3847416937351227, "rewards_train/margins": 15.307479947805405, "rewards_train/rejected": -15.692221641540527, "step": 2456 }, { "epoch": 1.21, "learning_rate": 6.964697351111728e-07, "loss": 0.0, "step": 2457 }, { "epoch": 1.21, "logps_train/chosen": -74.35479736328125, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -292.9845886230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7782045602798462, "rewards_train/margins": 15.690860152244568, "rewards_train/rejected": -16.469064712524414, "step": 2457 }, { "epoch": 1.21, "learning_rate": 6.962201470457202e-07, "loss": 0.0, "step": 2458 }, { "epoch": 1.21, "logps_train/chosen": -70.61522674560547, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -290.02935791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6620596647262573, "rewards_train/margins": 15.682281613349915, "rewards_train/rejected": -16.344341278076172, "step": 2458 }, { "epoch": 1.21, "learning_rate": 6.959705011724883e-07, "loss": 0.0, "step": 2459 }, { "epoch": 1.21, "logps_train/chosen": -72.44625854492188, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -283.7913818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.893014669418335, "rewards_train/margins": 15.025478601455688, "rewards_train/rejected": -15.918493270874023, "step": 2459 }, { "epoch": 1.21, "learning_rate": 6.957207975650245e-07, "loss": 0.0, "step": 2460 }, { "epoch": 1.21, "logps_train/chosen": -67.69987487792969, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -274.3184509277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38946962356567383, "rewards_train/margins": 14.728166103363037, "rewards_train/rejected": -15.117635726928711, "step": 2460 }, { "epoch": 1.21, "learning_rate": 6.954710362968929e-07, "loss": 0.0003, "step": 2461 }, { "epoch": 1.21, "logps_train/chosen": -68.26786804199219, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -280.6605529785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5533251762390137, "rewards_train/margins": 14.865366458892822, "rewards_train/rejected": -15.418691635131836, "step": 2461 }, { "epoch": 1.21, "learning_rate": 6.95221217441675e-07, "loss": 0.0, "step": 2462 }, { "epoch": 1.21, "logps_train/chosen": -75.90589904785156, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -272.87786865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0848281383514404, "rewards_train/margins": 13.764482736587524, "rewards_train/rejected": -14.849310874938965, "step": 2462 }, { "epoch": 1.21, "learning_rate": 6.949713410729692e-07, "loss": 0.0005, "step": 2463 }, { "epoch": 1.21, "logps_train/chosen": -76.89222717285156, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -289.65087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3219859600067139, "rewards_train/margins": 14.799550294876099, "rewards_train/rejected": -16.121536254882812, "step": 2463 }, { "epoch": 1.21, "learning_rate": 6.947214072643906e-07, "loss": 0.0001, "step": 2464 }, { "epoch": 1.21, "logps_train/chosen": -70.81559753417969, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -119.125, "logps_train/rejected": -273.48162841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5903971195220947, "rewards_train/margins": 14.84487509727478, "rewards_train/rejected": -15.435272216796875, "step": 2464 }, { "epoch": 1.21, "learning_rate": 6.944714160895716e-07, "loss": 0.0, "step": 2465 }, { "epoch": 1.21, "logps_train/chosen": -70.92713928222656, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -118.6875, "logps_train/rejected": -270.4383544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7867563962936401, "rewards_train/margins": 14.387986302375793, "rewards_train/rejected": -15.174742698669434, "step": 2465 }, { "epoch": 1.21, "learning_rate": 6.942213676221611e-07, "loss": 0.0005, "step": 2466 }, { "epoch": 1.21, "logps_train/chosen": -69.41365051269531, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -276.88427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6734452247619629, "rewards_train/margins": 14.880658626556396, "rewards_train/rejected": -15.55410385131836, "step": 2466 }, { "epoch": 1.21, "learning_rate": 6.939712619358251e-07, "loss": 0.0001, "step": 2467 }, { "epoch": 1.21, "logps_train/chosen": -62.3973388671875, "logps_train/ref_chosen": -61.125, "logps_train/ref_rejected": -116.8125, "logps_train/rejected": -259.6336364746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.12574465572834015, "rewards_train/margins": 14.158614322543144, "rewards_train/rejected": -14.284358978271484, "step": 2467 }, { "epoch": 1.22, "learning_rate": 6.937210991042466e-07, "loss": 0.0, "step": 2468 }, { "epoch": 1.22, "logps_train/chosen": -76.76914978027344, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -273.40911865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2475199699401855, "rewards_train/margins": 13.971417903900146, "rewards_train/rejected": -15.218937873840332, "step": 2468 }, { "epoch": 1.22, "learning_rate": 6.93470879201125e-07, "loss": 0.0001, "step": 2469 }, { "epoch": 1.22, "logps_train/chosen": -72.30998229980469, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -288.94293212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5967698097229004, "rewards_train/margins": 15.263882160186768, "rewards_train/rejected": -15.860651969909668, "step": 2469 }, { "epoch": 1.22, "learning_rate": 6.932206023001771e-07, "loss": 0.0001, "step": 2470 }, { "epoch": 1.22, "logps_train/chosen": -74.41956329345703, "logps_train/ref_chosen": -68.1875, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -301.14617919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6241827011108398, "rewards_train/margins": 16.417195320129395, "rewards_train/rejected": -17.041378021240234, "step": 2470 }, { "epoch": 1.22, "learning_rate": 6.929702684751358e-07, "loss": 0.0, "step": 2471 }, { "epoch": 1.22, "logps_train/chosen": -71.20355224609375, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -290.04144287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6863960027694702, "rewards_train/margins": 15.614232659339905, "rewards_train/rejected": -16.300628662109375, "step": 2471 }, { "epoch": 1.22, "learning_rate": 6.927198777997513e-07, "loss": 0.0001, "step": 2472 }, { "epoch": 1.22, "logps_train/chosen": -72.40635681152344, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -270.5889892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8529894948005676, "rewards_train/margins": 14.060208022594452, "rewards_train/rejected": -14.91319751739502, "step": 2472 }, { "epoch": 1.22, "learning_rate": 6.924694303477902e-07, "loss": 0.0002, "step": 2473 }, { "epoch": 1.22, "logps_train/chosen": -72.4700698852539, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -298.4856872558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5924660563468933, "rewards_train/margins": 16.10278159379959, "rewards_train/rejected": -16.695247650146484, "step": 2473 }, { "epoch": 1.22, "learning_rate": 6.922189261930364e-07, "loss": 0.0, "step": 2474 }, { "epoch": 1.22, "logps_train/chosen": -72.44583129882812, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -120.5625, "logps_train/rejected": -271.7110900878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8349151611328125, "rewards_train/margins": 14.281457901000977, "rewards_train/rejected": -15.116373062133789, "step": 2474 }, { "epoch": 1.22, "learning_rate": 6.919683654092899e-07, "loss": 0.0, "step": 2475 }, { "epoch": 1.22, "logps_train/chosen": -69.78936767578125, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -283.13531494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7271301746368408, "rewards_train/margins": 15.22146201133728, "rewards_train/rejected": -15.948592185974121, "step": 2475 }, { "epoch": 1.22, "learning_rate": 6.917177480703677e-07, "loss": 0.0, "step": 2476 }, { "epoch": 1.22, "logps_train/chosen": -69.22969818115234, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -276.58154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7096643447875977, "rewards_train/margins": 14.649271965026855, "rewards_train/rejected": -15.358936309814453, "step": 2476 }, { "epoch": 1.22, "learning_rate": 6.914670742501031e-07, "loss": 0.0001, "step": 2477 }, { "epoch": 1.22, "logps_train/chosen": -69.34870910644531, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -276.23876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.612800121307373, "rewards_train/margins": 14.609805583953857, "rewards_train/rejected": -15.22260570526123, "step": 2477 }, { "epoch": 1.22, "learning_rate": 6.912163440223461e-07, "loss": 0.0001, "step": 2478 }, { "epoch": 1.22, "logps_train/chosen": -76.60222625732422, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -294.4090576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0167168378829956, "rewards_train/margins": 15.560610890388489, "rewards_train/rejected": -16.577327728271484, "step": 2478 }, { "epoch": 1.22, "learning_rate": 6.909655574609644e-07, "loss": 0.0, "step": 2479 }, { "epoch": 1.22, "logps_train/chosen": -72.94445037841797, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -289.2442932128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9192012548446655, "rewards_train/margins": 15.33833658695221, "rewards_train/rejected": -16.257537841796875, "step": 2479 }, { "epoch": 1.22, "learning_rate": 6.907147146398408e-07, "loss": 0.0, "step": 2480 }, { "epoch": 1.22, "logps_train/chosen": -72.78431701660156, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -281.3547058105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6521139740943909, "rewards_train/margins": 15.13228315114975, "rewards_train/rejected": -15.78439712524414, "step": 2480 }, { "epoch": 1.22, "learning_rate": 6.904638156328753e-07, "loss": 0.0002, "step": 2481 }, { "epoch": 1.22, "logps_train/chosen": -77.32432556152344, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -280.20794677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.391806960105896, "rewards_train/margins": 14.512435555458069, "rewards_train/rejected": -15.904242515563965, "step": 2481 }, { "epoch": 1.22, "learning_rate": 6.902128605139844e-07, "loss": 0.0003, "step": 2482 }, { "epoch": 1.22, "logps_train/chosen": -75.09152221679688, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -291.47821044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1412322521209717, "rewards_train/margins": 15.531686067581177, "rewards_train/rejected": -16.67291831970215, "step": 2482 }, { "epoch": 1.22, "learning_rate": 6.899618493571013e-07, "loss": 0.0002, "step": 2483 }, { "epoch": 1.22, "logps_train/chosen": -69.74007415771484, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -275.98089599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6510583758354187, "rewards_train/margins": 14.50152188539505, "rewards_train/rejected": -15.152580261230469, "step": 2483 }, { "epoch": 1.22, "learning_rate": 6.897107822361756e-07, "loss": 0.0001, "step": 2484 }, { "epoch": 1.22, "logps_train/chosen": -70.97845458984375, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -288.0097961425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5909363031387329, "rewards_train/margins": 15.432358145713806, "rewards_train/rejected": -16.02329444885254, "step": 2484 }, { "epoch": 1.22, "learning_rate": 6.894596592251735e-07, "loss": 0.0002, "step": 2485 }, { "epoch": 1.22, "logps_train/chosen": -77.53688049316406, "logps_train/ref_chosen": -68.1875, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -296.29852294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9329845905303955, "rewards_train/margins": 15.757220029830933, "rewards_train/rejected": -16.690204620361328, "step": 2485 }, { "epoch": 1.22, "learning_rate": 6.892084803980772e-07, "loss": 0.0001, "step": 2486 }, { "epoch": 1.22, "logps_train/chosen": -69.33290100097656, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -283.5332946777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39271366596221924, "rewards_train/margins": 15.405440211296082, "rewards_train/rejected": -15.7981538772583, "step": 2486 }, { "epoch": 1.22, "learning_rate": 6.889572458288858e-07, "loss": 0.0001, "step": 2487 }, { "epoch": 1.22, "logps_train/chosen": -67.83055114746094, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -285.1203308105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46747827529907227, "rewards_train/margins": 15.500072002410889, "rewards_train/rejected": -15.967550277709961, "step": 2487 }, { "epoch": 1.23, "learning_rate": 6.887059555916148e-07, "loss": 0.0002, "step": 2488 }, { "epoch": 1.23, "logps_train/chosen": -72.51742553710938, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -283.1812744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7727385759353638, "rewards_train/margins": 15.058573603630066, "rewards_train/rejected": -15.83131217956543, "step": 2488 }, { "epoch": 1.23, "learning_rate": 6.884546097602959e-07, "loss": 0.0001, "step": 2489 }, { "epoch": 1.23, "logps_train/chosen": -72.8028564453125, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -297.08648681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8910278677940369, "rewards_train/margins": 15.987981379032135, "rewards_train/rejected": -16.879009246826172, "step": 2489 }, { "epoch": 1.23, "learning_rate": 6.882032084089774e-07, "loss": 0.0, "step": 2490 }, { "epoch": 1.23, "logps_train/chosen": -69.80773162841797, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -274.9261779785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6838492155075073, "rewards_train/margins": 14.583571553230286, "rewards_train/rejected": -15.267420768737793, "step": 2490 }, { "epoch": 1.23, "learning_rate": 6.879517516117237e-07, "loss": 0.0002, "step": 2491 }, { "epoch": 1.23, "logps_train/chosen": -73.04193878173828, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -294.62933349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9160594940185547, "rewards_train/margins": 15.591989517211914, "rewards_train/rejected": -16.50804901123047, "step": 2491 }, { "epoch": 1.23, "learning_rate": 6.877002394426157e-07, "loss": 0.0, "step": 2492 }, { "epoch": 1.23, "logps_train/chosen": -76.98136901855469, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -285.9787902832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1233811378479004, "rewards_train/margins": 14.654185771942139, "rewards_train/rejected": -15.777566909790039, "step": 2492 }, { "epoch": 1.23, "learning_rate": 6.874486719757506e-07, "loss": 0.0, "step": 2493 }, { "epoch": 1.23, "logps_train/chosen": -72.78174591064453, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -283.79547119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.809033989906311, "rewards_train/margins": 14.921783804893494, "rewards_train/rejected": -15.730817794799805, "step": 2493 }, { "epoch": 1.23, "learning_rate": 6.871970492852419e-07, "loss": 0.0001, "step": 2494 }, { "epoch": 1.23, "logps_train/chosen": -71.64248657226562, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -275.7930603027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9290430545806885, "rewards_train/margins": 14.571309804916382, "rewards_train/rejected": -15.50035285949707, "step": 2494 }, { "epoch": 1.23, "learning_rate": 6.869453714452193e-07, "loss": 0.0002, "step": 2495 }, { "epoch": 1.23, "logps_train/chosen": -71.8046875, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -297.01385498046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7316403985023499, "rewards_train/margins": 15.922919690608978, "rewards_train/rejected": -16.654560089111328, "step": 2495 }, { "epoch": 1.23, "learning_rate": 6.86693638529829e-07, "loss": 0.0002, "step": 2496 }, { "epoch": 1.23, "logps_train/chosen": -73.33094787597656, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -282.7483825683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8862690925598145, "rewards_train/margins": 15.008294582366943, "rewards_train/rejected": -15.894563674926758, "step": 2496 }, { "epoch": 1.23, "learning_rate": 6.864418506132328e-07, "loss": 0.0, "step": 2497 }, { "epoch": 1.23, "logps_train/chosen": -75.57003021240234, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -297.1904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.843575119972229, "rewards_train/margins": 15.60925805568695, "rewards_train/rejected": -16.45283317565918, "step": 2497 }, { "epoch": 1.23, "learning_rate": 6.861900077696094e-07, "loss": 0.0005, "step": 2498 }, { "epoch": 1.23, "logps_train/chosen": -67.1965103149414, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -276.9019775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.43566665053367615, "rewards_train/margins": 15.158878713846207, "rewards_train/rejected": -15.594545364379883, "step": 2498 }, { "epoch": 1.23, "learning_rate": 6.859381100731533e-07, "loss": 0.0, "step": 2499 }, { "epoch": 1.23, "logps_train/chosen": -73.76884460449219, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -278.7121887207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6543256044387817, "rewards_train/margins": 14.50175654888153, "rewards_train/rejected": -15.156082153320312, "step": 2499 }, { "epoch": 1.23, "learning_rate": 6.856861575980755e-07, "loss": 0.0004, "step": 2500 }, { "epoch": 1.23, "logps_train/chosen": -67.16793823242188, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -281.7557373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3236292600631714, "rewards_train/margins": 14.932022213935852, "rewards_train/rejected": -15.255651473999023, "step": 2500 }, { "epoch": 1.23, "learning_rate": 6.854341504186024e-07, "loss": 0.0, "step": 2501 }, { "epoch": 1.23, "logps_train/chosen": -75.38792419433594, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -288.26123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1384503841400146, "rewards_train/margins": 14.878786325454712, "rewards_train/rejected": -16.017236709594727, "step": 2501 }, { "epoch": 1.23, "learning_rate": 6.851820886089777e-07, "loss": 0.0001, "step": 2502 }, { "epoch": 1.23, "logps_train/chosen": -70.21949768066406, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -290.509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6418225765228271, "rewards_train/margins": 15.621654748916626, "rewards_train/rejected": -16.263477325439453, "step": 2502 }, { "epoch": 1.23, "learning_rate": 6.849299722434598e-07, "loss": 0.0, "step": 2503 }, { "epoch": 1.23, "logps_train/chosen": -72.89617919921875, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -287.23846435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8150573968887329, "rewards_train/margins": 15.570165991783142, "rewards_train/rejected": -16.385223388671875, "step": 2503 }, { "epoch": 1.23, "learning_rate": 6.846778013963242e-07, "loss": 0.0, "step": 2504 }, { "epoch": 1.23, "logps_train/chosen": -71.64137268066406, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -290.65362548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6765888333320618, "rewards_train/margins": 15.652592837810516, "rewards_train/rejected": -16.329181671142578, "step": 2504 }, { "epoch": 1.23, "learning_rate": 6.844255761418623e-07, "loss": 0.0002, "step": 2505 }, { "epoch": 1.23, "logps_train/chosen": -72.33538818359375, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -135.0, "logps_train/rejected": -303.5096435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5287052392959595, "rewards_train/margins": 16.32021176815033, "rewards_train/rejected": -16.84891700744629, "step": 2505 }, { "epoch": 1.23, "learning_rate": 6.841732965543813e-07, "loss": 0.0, "step": 2506 }, { "epoch": 1.23, "logps_train/chosen": -79.28053283691406, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -297.81939697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4086689949035645, "rewards_train/margins": 15.39934492111206, "rewards_train/rejected": -16.808013916015625, "step": 2506 }, { "epoch": 1.23, "learning_rate": 6.839209627082042e-07, "loss": 0.0, "step": 2507 }, { "epoch": 1.23, "logps_train/chosen": -70.00140380859375, "logps_train/ref_chosen": -61.1875, "logps_train/ref_rejected": -119.9375, "logps_train/rejected": -268.48944091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8801202774047852, "rewards_train/margins": 13.974003791809082, "rewards_train/rejected": -14.854124069213867, "step": 2507 }, { "epoch": 1.23, "learning_rate": 6.836685746776706e-07, "loss": 0.0001, "step": 2508 }, { "epoch": 1.23, "logps_train/chosen": -74.04237365722656, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -283.20208740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0372936725616455, "rewards_train/margins": 14.90420413017273, "rewards_train/rejected": -15.941497802734375, "step": 2508 }, { "epoch": 1.24, "learning_rate": 6.834161325371354e-07, "loss": 0.0003, "step": 2509 }, { "epoch": 1.24, "logps_train/chosen": -74.00527954101562, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -285.65191650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.830654501914978, "rewards_train/margins": 15.000309586524963, "rewards_train/rejected": -15.830964088439941, "step": 2509 }, { "epoch": 1.24, "learning_rate": 6.831636363609699e-07, "loss": 0.0001, "step": 2510 }, { "epoch": 1.24, "logps_train/chosen": -69.24675750732422, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -282.5035705566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6101980209350586, "rewards_train/margins": 15.434496879577637, "rewards_train/rejected": -16.044694900512695, "step": 2510 }, { "epoch": 1.24, "learning_rate": 6.829110862235613e-07, "loss": 0.0002, "step": 2511 }, { "epoch": 1.24, "logps_train/chosen": -75.11541748046875, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -286.67193603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2646667957305908, "rewards_train/margins": 15.269274473190308, "rewards_train/rejected": -16.5339412689209, "step": 2511 }, { "epoch": 1.24, "learning_rate": 6.826584821993125e-07, "loss": 0.0001, "step": 2512 }, { "epoch": 1.24, "logps_train/chosen": -74.9061279296875, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -292.7624816894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9125363230705261, "rewards_train/margins": 15.512247383594513, "rewards_train/rejected": -16.42478370666504, "step": 2512 }, { "epoch": 1.24, "learning_rate": 6.824058243626421e-07, "loss": 0.0, "step": 2513 }, { "epoch": 1.24, "logps_train/chosen": -79.52784729003906, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -292.28668212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.402979850769043, "rewards_train/margins": 15.236725807189941, "rewards_train/rejected": -16.639705657958984, "step": 2513 }, { "epoch": 1.24, "learning_rate": 6.821531127879852e-07, "loss": 0.0003, "step": 2514 }, { "epoch": 1.24, "logps_train/chosen": -72.3094253540039, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -279.2005615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8940772414207458, "rewards_train/margins": 14.921974241733551, "rewards_train/rejected": -15.816051483154297, "step": 2514 }, { "epoch": 1.24, "learning_rate": 6.81900347549792e-07, "loss": 0.0003, "step": 2515 }, { "epoch": 1.24, "logps_train/chosen": -72.03551483154297, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -280.08367919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6801630258560181, "rewards_train/margins": 15.043343901634216, "rewards_train/rejected": -15.723506927490234, "step": 2515 }, { "epoch": 1.24, "learning_rate": 6.81647528722529e-07, "loss": 0.0, "step": 2516 }, { "epoch": 1.24, "logps_train/chosen": -67.06393432617188, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -274.2528076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2238733023405075, "rewards_train/margins": 14.878848865628242, "rewards_train/rejected": -15.10272216796875, "step": 2516 }, { "epoch": 1.24, "learning_rate": 6.813946563806784e-07, "loss": 0.0001, "step": 2517 }, { "epoch": 1.24, "logps_train/chosen": -73.62733459472656, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -289.5262451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6407850384712219, "rewards_train/margins": 15.304369151592255, "rewards_train/rejected": -15.945154190063477, "step": 2517 }, { "epoch": 1.24, "learning_rate": 6.811417305987379e-07, "loss": 0.0001, "step": 2518 }, { "epoch": 1.24, "logps_train/chosen": -76.54501342773438, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -293.5352783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8948827385902405, "rewards_train/margins": 15.61665290594101, "rewards_train/rejected": -16.51153564453125, "step": 2518 }, { "epoch": 1.24, "learning_rate": 6.808887514512214e-07, "loss": 0.0, "step": 2519 }, { "epoch": 1.24, "logps_train/chosen": -65.02073669433594, "logps_train/ref_chosen": -61.0, "logps_train/ref_rejected": -115.3125, "logps_train/rejected": -268.40447998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40292757749557495, "rewards_train/margins": 14.90627270936966, "rewards_train/rejected": -15.309200286865234, "step": 2519 }, { "epoch": 1.24, "learning_rate": 6.806357190126581e-07, "loss": 0.0, "step": 2520 }, { "epoch": 1.24, "logps_train/chosen": -67.07566833496094, "logps_train/ref_chosen": -61.71875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -277.8912048339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5361795425415039, "rewards_train/margins": 14.90040111541748, "rewards_train/rejected": -15.436580657958984, "step": 2520 }, { "epoch": 1.24, "learning_rate": 6.80382633357593e-07, "loss": 0.0, "step": 2521 }, { "epoch": 1.24, "logps_train/chosen": -72.77701568603516, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -288.92840576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6211097240447998, "rewards_train/margins": 15.599318742752075, "rewards_train/rejected": -16.220428466796875, "step": 2521 }, { "epoch": 1.24, "learning_rate": 6.801294945605871e-07, "loss": 0.0, "step": 2522 }, { "epoch": 1.24, "logps_train/chosen": -73.58626556396484, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -291.97027587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9162436127662659, "rewards_train/margins": 15.604367196559906, "rewards_train/rejected": -16.520610809326172, "step": 2522 }, { "epoch": 1.24, "learning_rate": 6.798763026962167e-07, "loss": 0.0, "step": 2523 }, { "epoch": 1.24, "logps_train/chosen": -65.35314178466797, "logps_train/ref_chosen": -60.78125, "logps_train/ref_rejected": -118.125, "logps_train/rejected": -274.3438415527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4570430815219879, "rewards_train/margins": 15.16884383559227, "rewards_train/rejected": -15.625886917114258, "step": 2523 }, { "epoch": 1.24, "learning_rate": 6.796230578390735e-07, "loss": 0.0, "step": 2524 }, { "epoch": 1.24, "logps_train/chosen": -72.56863403320312, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -292.1298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6854524612426758, "rewards_train/margins": 15.75507640838623, "rewards_train/rejected": -16.440528869628906, "step": 2524 }, { "epoch": 1.24, "learning_rate": 6.793697600637655e-07, "loss": 0.0, "step": 2525 }, { "epoch": 1.24, "logps_train/chosen": -70.87255859375, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -283.0849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7280759811401367, "rewards_train/margins": 15.09784984588623, "rewards_train/rejected": -15.825925827026367, "step": 2525 }, { "epoch": 1.24, "learning_rate": 6.791164094449158e-07, "loss": 0.0, "step": 2526 }, { "epoch": 1.24, "logps_train/chosen": -72.75537109375, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -279.39422607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.736035168170929, "rewards_train/margins": 14.6504585146904, "rewards_train/rejected": -15.386493682861328, "step": 2526 }, { "epoch": 1.24, "learning_rate": 6.788630060571633e-07, "loss": 0.0, "step": 2527 }, { "epoch": 1.24, "logps_train/chosen": -78.18206024169922, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -299.7407531738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.963762640953064, "rewards_train/margins": 15.814904808998108, "rewards_train/rejected": -16.778667449951172, "step": 2527 }, { "epoch": 1.24, "learning_rate": 6.786095499751623e-07, "loss": 0.0, "step": 2528 }, { "epoch": 1.24, "logps_train/chosen": -70.1905746459961, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -282.10662841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8364890813827515, "rewards_train/margins": 15.133841156959534, "rewards_train/rejected": -15.970330238342285, "step": 2528 }, { "epoch": 1.25, "learning_rate": 6.783560412735827e-07, "loss": 0.0, "step": 2529 }, { "epoch": 1.25, "logps_train/chosen": -77.03370666503906, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -290.5172424316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.30840003490448, "rewards_train/margins": 15.282288670539856, "rewards_train/rejected": -16.590688705444336, "step": 2529 }, { "epoch": 1.25, "learning_rate": 6.781024800271098e-07, "loss": 0.0, "step": 2530 }, { "epoch": 1.25, "logps_train/chosen": -67.08946228027344, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -284.2303466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5167584419250488, "rewards_train/margins": 15.765161037445068, "rewards_train/rejected": -16.281919479370117, "step": 2530 }, { "epoch": 1.25, "learning_rate": 6.778488663104445e-07, "loss": 0.0, "step": 2531 }, { "epoch": 1.25, "logps_train/chosen": -74.8984375, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -291.5477294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9158202409744263, "rewards_train/margins": 15.492955088615417, "rewards_train/rejected": -16.408775329589844, "step": 2531 }, { "epoch": 1.25, "learning_rate": 6.77595200198303e-07, "loss": 0.0, "step": 2532 }, { "epoch": 1.25, "logps_train/chosen": -71.89019012451172, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -284.109130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7852591276168823, "rewards_train/margins": 15.272140622138977, "rewards_train/rejected": -16.05739974975586, "step": 2532 }, { "epoch": 1.25, "learning_rate": 6.773414817654173e-07, "loss": 0.0, "step": 2533 }, { "epoch": 1.25, "logps_train/chosen": -76.5805892944336, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -293.5423889160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1860861778259277, "rewards_train/margins": 15.735147953033447, "rewards_train/rejected": -16.921234130859375, "step": 2533 }, { "epoch": 1.25, "learning_rate": 6.770877110865344e-07, "loss": 0.0, "step": 2534 }, { "epoch": 1.25, "logps_train/chosen": -71.51414489746094, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -282.91192626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7766580581665039, "rewards_train/margins": 15.088459968566895, "rewards_train/rejected": -15.865118026733398, "step": 2534 }, { "epoch": 1.25, "learning_rate": 6.768338882364168e-07, "loss": 0.0, "step": 2535 }, { "epoch": 1.25, "logps_train/chosen": -73.92527770996094, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -288.2817687988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8632794618606567, "rewards_train/margins": 15.399564623832703, "rewards_train/rejected": -16.26284408569336, "step": 2535 }, { "epoch": 1.25, "learning_rate": 6.765800132898424e-07, "loss": 0.0001, "step": 2536 }, { "epoch": 1.25, "logps_train/chosen": -73.01238250732422, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -282.819580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0490901470184326, "rewards_train/margins": 15.096198320388794, "rewards_train/rejected": -16.145288467407227, "step": 2536 }, { "epoch": 1.25, "learning_rate": 6.763260863216047e-07, "loss": 0.0001, "step": 2537 }, { "epoch": 1.25, "logps_train/chosen": -74.52957153320312, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -292.19366455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7946563363075256, "rewards_train/margins": 15.36386913061142, "rewards_train/rejected": -16.158525466918945, "step": 2537 }, { "epoch": 1.25, "learning_rate": 6.760721074065122e-07, "loss": 0.0001, "step": 2538 }, { "epoch": 1.25, "logps_train/chosen": -72.42692565917969, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -287.79052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.975115180015564, "rewards_train/margins": 15.292218804359436, "rewards_train/rejected": -16.267333984375, "step": 2538 }, { "epoch": 1.25, "learning_rate": 6.758180766193886e-07, "loss": 0.0, "step": 2539 }, { "epoch": 1.25, "logps_train/chosen": -72.9083251953125, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -273.84326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9211543798446655, "rewards_train/margins": 14.461217522621155, "rewards_train/rejected": -15.38237190246582, "step": 2539 }, { "epoch": 1.25, "learning_rate": 6.755639940350735e-07, "loss": 0.0001, "step": 2540 }, { "epoch": 1.25, "logps_train/chosen": -72.11346435546875, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -296.5915222167969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5752624273300171, "rewards_train/margins": 16.266507744789124, "rewards_train/rejected": -16.84177017211914, "step": 2540 }, { "epoch": 1.25, "learning_rate": 6.753098597284209e-07, "loss": 0.0, "step": 2541 }, { "epoch": 1.25, "logps_train/chosen": -70.08567810058594, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -281.4646911621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8433821201324463, "rewards_train/margins": 15.264318227767944, "rewards_train/rejected": -16.10770034790039, "step": 2541 }, { "epoch": 1.25, "learning_rate": 6.750556737743006e-07, "loss": 0.0001, "step": 2542 }, { "epoch": 1.25, "logps_train/chosen": -76.60995483398438, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -295.7306213378906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8064545392990112, "rewards_train/margins": 15.523345112800598, "rewards_train/rejected": -16.32979965209961, "step": 2542 }, { "epoch": 1.25, "learning_rate": 6.748014362475978e-07, "loss": 0.0, "step": 2543 }, { "epoch": 1.25, "logps_train/chosen": -74.57168579101562, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -296.3045349121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8757727146148682, "rewards_train/margins": 15.562982320785522, "rewards_train/rejected": -16.43875503540039, "step": 2543 }, { "epoch": 1.25, "learning_rate": 6.745471472232122e-07, "loss": 0.0001, "step": 2544 }, { "epoch": 1.25, "logps_train/chosen": -72.28082275390625, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -283.7795715332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9314510822296143, "rewards_train/margins": 15.073458433151245, "rewards_train/rejected": -16.00490951538086, "step": 2544 }, { "epoch": 1.25, "learning_rate": 6.742928067760594e-07, "loss": 0.0, "step": 2545 }, { "epoch": 1.25, "logps_train/chosen": -68.22491455078125, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -296.0178527832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5611634254455566, "rewards_train/margins": 16.203317165374756, "rewards_train/rejected": -16.764480590820312, "step": 2545 }, { "epoch": 1.25, "learning_rate": 6.740384149810694e-07, "loss": 0.0, "step": 2546 }, { "epoch": 1.25, "logps_train/chosen": -73.03844451904297, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -292.7746887207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7498406767845154, "rewards_train/margins": 15.667471945285797, "rewards_train/rejected": -16.417312622070312, "step": 2546 }, { "epoch": 1.25, "learning_rate": 6.737839719131881e-07, "loss": 0.0, "step": 2547 }, { "epoch": 1.25, "logps_train/chosen": -70.40740966796875, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -285.88580322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.538641631603241, "rewards_train/margins": 15.485389053821564, "rewards_train/rejected": -16.024030685424805, "step": 2547 }, { "epoch": 1.25, "learning_rate": 6.735294776473761e-07, "loss": 0.0, "step": 2548 }, { "epoch": 1.25, "logps_train/chosen": -71.84764099121094, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -278.039306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7849107980728149, "rewards_train/margins": 14.747388243675232, "rewards_train/rejected": -15.532299041748047, "step": 2548 }, { "epoch": 1.26, "learning_rate": 6.73274932258609e-07, "loss": 0.0, "step": 2549 }, { "epoch": 1.26, "logps_train/chosen": -67.44575500488281, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -289.2850341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38061052560806274, "rewards_train/margins": 15.960051476955414, "rewards_train/rejected": -16.340662002563477, "step": 2549 }, { "epoch": 1.26, "learning_rate": 6.730203358218776e-07, "loss": 0.0, "step": 2550 }, { "epoch": 1.26, "logps_train/chosen": -71.49464416503906, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -284.410400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8371597528457642, "rewards_train/margins": 15.262182593345642, "rewards_train/rejected": -16.099342346191406, "step": 2550 }, { "epoch": 1.26, "learning_rate": 6.727656884121877e-07, "loss": 0.0001, "step": 2551 }, { "epoch": 1.26, "logps_train/chosen": -75.93449401855469, "logps_train/ref_chosen": -68.4375, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -302.23052978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7506759166717529, "rewards_train/margins": 16.30021023750305, "rewards_train/rejected": -17.050886154174805, "step": 2551 }, { "epoch": 1.26, "learning_rate": 6.725109901045603e-07, "loss": 0.0, "step": 2552 }, { "epoch": 1.26, "logps_train/chosen": -73.29076385498047, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -284.16162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8757563829421997, "rewards_train/margins": 15.000022768974304, "rewards_train/rejected": -15.875779151916504, "step": 2552 }, { "epoch": 1.26, "learning_rate": 6.722562409740311e-07, "loss": 0.0, "step": 2553 }, { "epoch": 1.26, "logps_train/chosen": -71.8927230834961, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -289.11346435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5397604703903198, "rewards_train/margins": 15.356545567512512, "rewards_train/rejected": -15.896306037902832, "step": 2553 }, { "epoch": 1.26, "learning_rate": 6.72001441095651e-07, "loss": 0.0001, "step": 2554 }, { "epoch": 1.26, "logps_train/chosen": -75.17444610595703, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -280.39556884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.165881872177124, "rewards_train/margins": 14.660785913467407, "rewards_train/rejected": -15.826667785644531, "step": 2554 }, { "epoch": 1.26, "learning_rate": 6.717465905444859e-07, "loss": 0.0001, "step": 2555 }, { "epoch": 1.26, "logps_train/chosen": -72.9552230834961, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -297.1749267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6757473945617676, "rewards_train/margins": 15.990569591522217, "rewards_train/rejected": -16.666316986083984, "step": 2555 }, { "epoch": 1.26, "learning_rate": 6.71491689395616e-07, "loss": 0.0, "step": 2556 }, { "epoch": 1.26, "logps_train/chosen": -72.11175537109375, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -285.3809814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8207463026046753, "rewards_train/margins": 15.122920155525208, "rewards_train/rejected": -15.943666458129883, "step": 2556 }, { "epoch": 1.26, "learning_rate": 6.712367377241373e-07, "loss": 0.0, "step": 2557 }, { "epoch": 1.26, "logps_train/chosen": -74.42550659179688, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -292.228515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8330290913581848, "rewards_train/margins": 15.574586570262909, "rewards_train/rejected": -16.407615661621094, "step": 2557 }, { "epoch": 1.26, "learning_rate": 6.709817356051604e-07, "loss": 0.0, "step": 2558 }, { "epoch": 1.26, "logps_train/chosen": -74.08634948730469, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -291.8572998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9601969718933105, "rewards_train/margins": 15.71044397354126, "rewards_train/rejected": -16.67064094543457, "step": 2558 }, { "epoch": 1.26, "learning_rate": 6.707266831138103e-07, "loss": 0.0, "step": 2559 }, { "epoch": 1.26, "logps_train/chosen": -71.03132629394531, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -282.615478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8235422372817993, "rewards_train/margins": 15.263591170310974, "rewards_train/rejected": -16.087133407592773, "step": 2559 }, { "epoch": 1.26, "learning_rate": 6.704715803252274e-07, "loss": 0.0003, "step": 2560 }, { "epoch": 1.26, "logps_train/chosen": -72.45340728759766, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -300.10791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8216099739074707, "rewards_train/margins": 16.138789653778076, "rewards_train/rejected": -16.960399627685547, "step": 2560 }, { "epoch": 1.26, "learning_rate": 6.702164273145666e-07, "loss": 0.0, "step": 2561 }, { "epoch": 1.26, "logps_train/chosen": -70.64427185058594, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -297.1676025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8245832920074463, "rewards_train/margins": 16.13817000389099, "rewards_train/rejected": -16.962753295898438, "step": 2561 }, { "epoch": 1.26, "learning_rate": 6.699612241569977e-07, "loss": 0.0001, "step": 2562 }, { "epoch": 1.26, "logps_train/chosen": -73.37001037597656, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -277.23797607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8439103364944458, "rewards_train/margins": 14.553569674491882, "rewards_train/rejected": -15.397480010986328, "step": 2562 }, { "epoch": 1.26, "learning_rate": 6.697059709277053e-07, "loss": 0.0004, "step": 2563 }, { "epoch": 1.26, "logps_train/chosen": -73.39777374267578, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -284.2829895019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8483227491378784, "rewards_train/margins": 15.240914463996887, "rewards_train/rejected": -16.089237213134766, "step": 2563 }, { "epoch": 1.26, "learning_rate": 6.694506677018888e-07, "loss": 0.0, "step": 2564 }, { "epoch": 1.26, "logps_train/chosen": -73.3260498046875, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -293.06982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7334344983100891, "rewards_train/margins": 15.650110423564911, "rewards_train/rejected": -16.383544921875, "step": 2564 }, { "epoch": 1.26, "learning_rate": 6.69195314554762e-07, "loss": 0.0, "step": 2565 }, { "epoch": 1.26, "logps_train/chosen": -72.93865966796875, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -300.307373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6881533861160278, "rewards_train/margins": 16.26777732372284, "rewards_train/rejected": -16.955930709838867, "step": 2565 }, { "epoch": 1.26, "learning_rate": 6.689399115615542e-07, "loss": 0.0, "step": 2566 }, { "epoch": 1.26, "logps_train/chosen": -70.19294738769531, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -292.2528076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5659991502761841, "rewards_train/margins": 15.763628840446472, "rewards_train/rejected": -16.329627990722656, "step": 2566 }, { "epoch": 1.26, "learning_rate": 6.686844587975081e-07, "loss": 0.0002, "step": 2567 }, { "epoch": 1.26, "logps_train/chosen": -70.56306457519531, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -290.2554931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6343827843666077, "rewards_train/margins": 15.717725217342377, "rewards_train/rejected": -16.352108001708984, "step": 2567 }, { "epoch": 1.26, "learning_rate": 6.684289563378823e-07, "loss": 0.0, "step": 2568 }, { "epoch": 1.26, "logps_train/chosen": -72.97675323486328, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -289.16009521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8723329901695251, "rewards_train/margins": 15.321018755435944, "rewards_train/rejected": -16.19335174560547, "step": 2568 }, { "epoch": 1.26, "learning_rate": 6.681734042579496e-07, "loss": 0.0, "step": 2569 }, { "epoch": 1.26, "logps_train/chosen": -76.38804626464844, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -292.56610107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1783552169799805, "rewards_train/margins": 15.708867073059082, "rewards_train/rejected": -16.887222290039062, "step": 2569 }, { "epoch": 1.27, "learning_rate": 6.67917802632997e-07, "loss": 0.0, "step": 2570 }, { "epoch": 1.27, "logps_train/chosen": -71.36665344238281, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -287.20721435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6076123714447021, "rewards_train/margins": 15.484935998916626, "rewards_train/rejected": -16.092548370361328, "step": 2570 }, { "epoch": 1.27, "learning_rate": 6.676621515383269e-07, "loss": 0.0, "step": 2571 }, { "epoch": 1.27, "logps_train/chosen": -77.83602142333984, "logps_train/ref_chosen": -68.375, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -300.03790283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9448810815811157, "rewards_train/margins": 15.863891959190369, "rewards_train/rejected": -16.808773040771484, "step": 2571 }, { "epoch": 1.27, "learning_rate": 6.674064510492557e-07, "loss": 0.0001, "step": 2572 }, { "epoch": 1.27, "logps_train/chosen": -71.60868835449219, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -285.4808349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7918263673782349, "rewards_train/margins": 15.431939959526062, "rewards_train/rejected": -16.223766326904297, "step": 2572 }, { "epoch": 1.27, "learning_rate": 6.67150701241114e-07, "loss": 0.0, "step": 2573 }, { "epoch": 1.27, "logps_train/chosen": -75.98684692382812, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -291.5365295410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9998563528060913, "rewards_train/margins": 15.669325470924377, "rewards_train/rejected": -16.66918182373047, "step": 2573 }, { "epoch": 1.27, "learning_rate": 6.668949021892481e-07, "loss": 0.0, "step": 2574 }, { "epoch": 1.27, "logps_train/chosen": -70.99296569824219, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -289.52362060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8015918731689453, "rewards_train/margins": 15.511465072631836, "rewards_train/rejected": -16.31305694580078, "step": 2574 }, { "epoch": 1.27, "learning_rate": 6.666390539690178e-07, "loss": 0.0, "step": 2575 }, { "epoch": 1.27, "logps_train/chosen": -73.71238708496094, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -286.04888916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0581525564193726, "rewards_train/margins": 15.254744172096252, "rewards_train/rejected": -16.312896728515625, "step": 2575 }, { "epoch": 1.27, "learning_rate": 6.663831566557976e-07, "loss": 0.0001, "step": 2576 }, { "epoch": 1.27, "logps_train/chosen": -72.46089172363281, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -287.71209716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9763624668121338, "rewards_train/margins": 15.294458150863647, "rewards_train/rejected": -16.27082061767578, "step": 2576 }, { "epoch": 1.27, "learning_rate": 6.66127210324977e-07, "loss": 0.0, "step": 2577 }, { "epoch": 1.27, "logps_train/chosen": -78.27955627441406, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -288.6068115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.108668327331543, "rewards_train/margins": 15.13350772857666, "rewards_train/rejected": -16.242176055908203, "step": 2577 }, { "epoch": 1.27, "learning_rate": 6.658712150519589e-07, "loss": 0.0001, "step": 2578 }, { "epoch": 1.27, "logps_train/chosen": -70.7642822265625, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -285.54901123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5995233058929443, "rewards_train/margins": 15.160112619400024, "rewards_train/rejected": -15.759635925292969, "step": 2578 }, { "epoch": 1.27, "learning_rate": 6.65615170912162e-07, "loss": 0.0006, "step": 2579 }, { "epoch": 1.27, "logps_train/chosen": -71.86614227294922, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -289.84332275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8525806665420532, "rewards_train/margins": 15.716814398765564, "rewards_train/rejected": -16.569395065307617, "step": 2579 }, { "epoch": 1.27, "learning_rate": 6.65359077981018e-07, "loss": 0.0, "step": 2580 }, { "epoch": 1.27, "logps_train/chosen": -79.98726654052734, "logps_train/ref_chosen": -69.1875, "logps_train/ref_rejected": -132.75, "logps_train/rejected": -303.05609130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0810024738311768, "rewards_train/margins": 15.949900388717651, "rewards_train/rejected": -17.030902862548828, "step": 2580 }, { "epoch": 1.27, "learning_rate": 6.651029363339738e-07, "loss": 0.0001, "step": 2581 }, { "epoch": 1.27, "logps_train/chosen": -73.70980834960938, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -287.108154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8631686568260193, "rewards_train/margins": 15.283975660800934, "rewards_train/rejected": -16.147144317626953, "step": 2581 }, { "epoch": 1.27, "learning_rate": 6.648467460464906e-07, "loss": 0.0, "step": 2582 }, { "epoch": 1.27, "logps_train/chosen": -66.04243469238281, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -119.5, "logps_train/rejected": -271.2607727050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2593221366405487, "rewards_train/margins": 14.915926963090897, "rewards_train/rejected": -15.175249099731445, "step": 2582 }, { "epoch": 1.27, "learning_rate": 6.645905071940435e-07, "loss": 0.0001, "step": 2583 }, { "epoch": 1.27, "logps_train/chosen": -72.339111328125, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -289.27593994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7661375999450684, "rewards_train/margins": 15.514971256256104, "rewards_train/rejected": -16.281108856201172, "step": 2583 }, { "epoch": 1.27, "learning_rate": 6.643342198521225e-07, "loss": 0.0, "step": 2584 }, { "epoch": 1.27, "logps_train/chosen": -69.40174865722656, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -285.3093566894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7324109077453613, "rewards_train/margins": 15.655261516571045, "rewards_train/rejected": -16.387672424316406, "step": 2584 }, { "epoch": 1.27, "learning_rate": 6.640778840962314e-07, "loss": 0.0, "step": 2585 }, { "epoch": 1.27, "logps_train/chosen": -73.17048645019531, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -292.758056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8504475355148315, "rewards_train/margins": 15.737176060676575, "rewards_train/rejected": -16.587623596191406, "step": 2585 }, { "epoch": 1.27, "learning_rate": 6.638215000018885e-07, "loss": 0.0, "step": 2586 }, { "epoch": 1.27, "logps_train/chosen": -73.67892456054688, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -293.53369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0029512643814087, "rewards_train/margins": 15.856862425804138, "rewards_train/rejected": -16.859813690185547, "step": 2586 }, { "epoch": 1.27, "learning_rate": 6.635650676446264e-07, "loss": 0.0, "step": 2587 }, { "epoch": 1.27, "logps_train/chosen": -71.26823425292969, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -289.078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7377127408981323, "rewards_train/margins": 15.479768872261047, "rewards_train/rejected": -16.21748161315918, "step": 2587 }, { "epoch": 1.27, "learning_rate": 6.633085870999914e-07, "loss": 0.0, "step": 2588 }, { "epoch": 1.27, "logps_train/chosen": -76.2083740234375, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -290.9395446777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9458866119384766, "rewards_train/margins": 15.446210861206055, "rewards_train/rejected": -16.39209747314453, "step": 2588 }, { "epoch": 1.27, "learning_rate": 6.630520584435449e-07, "loss": 0.0, "step": 2589 }, { "epoch": 1.27, "logps_train/chosen": -72.31867218017578, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -298.2414245605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8226391077041626, "rewards_train/margins": 16.1349014043808, "rewards_train/rejected": -16.95754051208496, "step": 2589 }, { "epoch": 1.28, "learning_rate": 6.627954817508616e-07, "loss": 0.0, "step": 2590 }, { "epoch": 1.28, "logps_train/chosen": -70.91081237792969, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -280.0167236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8176435232162476, "rewards_train/margins": 14.877581238746643, "rewards_train/rejected": -15.69522476196289, "step": 2590 }, { "epoch": 1.28, "learning_rate": 6.625388570975309e-07, "loss": 0.0, "step": 2591 }, { "epoch": 1.28, "logps_train/chosen": -73.88531494140625, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -289.79681396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9573304653167725, "rewards_train/margins": 15.467857599258423, "rewards_train/rejected": -16.425188064575195, "step": 2591 }, { "epoch": 1.28, "learning_rate": 6.622821845591562e-07, "loss": 0.0001, "step": 2592 }, { "epoch": 1.28, "logps_train/chosen": -72.79690551757812, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -281.0829162597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1349152326583862, "rewards_train/margins": 14.922057271003723, "rewards_train/rejected": -16.05697250366211, "step": 2592 }, { "epoch": 1.28, "learning_rate": 6.620254642113549e-07, "loss": 0.0, "step": 2593 }, { "epoch": 1.28, "logps_train/chosen": -75.44821166992188, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -283.71832275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9410613775253296, "rewards_train/margins": 14.903281807899475, "rewards_train/rejected": -15.844343185424805, "step": 2593 }, { "epoch": 1.28, "learning_rate": 6.617686961297584e-07, "loss": 0.0, "step": 2594 }, { "epoch": 1.28, "logps_train/chosen": -71.31824493408203, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -281.9403076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8902720212936401, "rewards_train/margins": 15.035789608955383, "rewards_train/rejected": -15.926061630249023, "step": 2594 }, { "epoch": 1.28, "learning_rate": 6.615118803900124e-07, "loss": 0.0, "step": 2595 }, { "epoch": 1.28, "logps_train/chosen": -77.36994934082031, "logps_train/ref_chosen": -68.625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -295.62860107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8719074130058289, "rewards_train/margins": 15.87820702791214, "rewards_train/rejected": -16.75011444091797, "step": 2595 }, { "epoch": 1.28, "learning_rate": 6.612550170677767e-07, "loss": 0.0001, "step": 2596 }, { "epoch": 1.28, "logps_train/chosen": -71.29046630859375, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -283.90106201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.879388153553009, "rewards_train/margins": 15.33317631483078, "rewards_train/rejected": -16.21256446838379, "step": 2596 }, { "epoch": 1.28, "learning_rate": 6.60998106238725e-07, "loss": 0.0001, "step": 2597 }, { "epoch": 1.28, "logps_train/chosen": -77.26873016357422, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -304.8350830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0908868312835693, "rewards_train/margins": 16.269575357437134, "rewards_train/rejected": -17.360462188720703, "step": 2597 }, { "epoch": 1.28, "learning_rate": 6.607411479785447e-07, "loss": 0.0, "step": 2598 }, { "epoch": 1.28, "logps_train/chosen": -74.08010864257812, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -285.0208435058594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2123074531555176, "rewards_train/margins": 14.933623790740967, "rewards_train/rejected": -16.145931243896484, "step": 2598 }, { "epoch": 1.28, "learning_rate": 6.604841423629376e-07, "loss": 0.0007, "step": 2599 }, { "epoch": 1.28, "logps_train/chosen": -74.97066497802734, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -292.03692626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8049278259277344, "rewards_train/margins": 15.673080444335938, "rewards_train/rejected": -16.478008270263672, "step": 2599 }, { "epoch": 1.28, "learning_rate": 6.602270894676192e-07, "loss": 0.0001, "step": 2600 }, { "epoch": 1.28, "logps_train/chosen": -73.37680053710938, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -286.3915710449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9884610772132874, "rewards_train/margins": 15.287318646907806, "rewards_train/rejected": -16.275779724121094, "step": 2600 }, { "epoch": 1.28, "learning_rate": 6.599699893683191e-07, "loss": 0.0, "step": 2601 }, { "epoch": 1.28, "logps_train/chosen": -74.43370056152344, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -295.25909423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8706644773483276, "rewards_train/margins": 15.644651532173157, "rewards_train/rejected": -16.515316009521484, "step": 2601 }, { "epoch": 1.28, "learning_rate": 6.597128421407806e-07, "loss": 0.0, "step": 2602 }, { "epoch": 1.28, "logps_train/chosen": -70.08690643310547, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -285.2760009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7405025959014893, "rewards_train/margins": 15.280848264694214, "rewards_train/rejected": -16.021350860595703, "step": 2602 }, { "epoch": 1.28, "learning_rate": 6.594556478607613e-07, "loss": 0.0001, "step": 2603 }, { "epoch": 1.28, "logps_train/chosen": -74.64879608154297, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -292.7612609863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9131218194961548, "rewards_train/margins": 15.42501676082611, "rewards_train/rejected": -16.338138580322266, "step": 2603 }, { "epoch": 1.28, "learning_rate": 6.591984066040319e-07, "loss": 0.0001, "step": 2604 }, { "epoch": 1.28, "logps_train/chosen": -67.95124816894531, "logps_train/ref_chosen": -61.1875, "logps_train/ref_rejected": -119.375, "logps_train/rejected": -277.9072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6765211820602417, "rewards_train/margins": 15.178458333015442, "rewards_train/rejected": -15.854979515075684, "step": 2604 }, { "epoch": 1.28, "learning_rate": 6.589411184463778e-07, "loss": 0.0004, "step": 2605 }, { "epoch": 1.28, "logps_train/chosen": -72.68991088867188, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -119.1875, "logps_train/rejected": -276.1611022949219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7340549230575562, "rewards_train/margins": 14.962378144264221, "rewards_train/rejected": -15.696433067321777, "step": 2605 }, { "epoch": 1.28, "learning_rate": 6.586837834635978e-07, "loss": 0.0, "step": 2606 }, { "epoch": 1.28, "logps_train/chosen": -75.29754638671875, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -298.6758117675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1170600652694702, "rewards_train/margins": 15.601401925086975, "rewards_train/rejected": -16.718461990356445, "step": 2606 }, { "epoch": 1.28, "learning_rate": 6.584264017315044e-07, "loss": 0.0, "step": 2607 }, { "epoch": 1.28, "logps_train/chosen": -68.59545135498047, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -281.1875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6703846454620361, "rewards_train/margins": 15.282936811447144, "rewards_train/rejected": -15.95332145690918, "step": 2607 }, { "epoch": 1.28, "learning_rate": 6.581689733259239e-07, "loss": 0.0, "step": 2608 }, { "epoch": 1.28, "logps_train/chosen": -71.43101501464844, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -297.6210632324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5657089948654175, "rewards_train/margins": 16.41504967212677, "rewards_train/rejected": -16.980758666992188, "step": 2608 }, { "epoch": 1.28, "learning_rate": 6.579114983226968e-07, "loss": 0.0, "step": 2609 }, { "epoch": 1.28, "logps_train/chosen": -75.25369262695312, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -288.2998962402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0716087818145752, "rewards_train/margins": 15.097248792648315, "rewards_train/rejected": -16.16885757446289, "step": 2609 }, { "epoch": 1.29, "learning_rate": 6.576539767976765e-07, "loss": 0.0, "step": 2610 }, { "epoch": 1.29, "logps_train/chosen": -75.45985412597656, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -294.5494384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8931782245635986, "rewards_train/margins": 15.503758192062378, "rewards_train/rejected": -16.396936416625977, "step": 2610 }, { "epoch": 1.29, "learning_rate": 6.57396408826731e-07, "loss": 0.0, "step": 2611 }, { "epoch": 1.29, "logps_train/chosen": -77.10308837890625, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -284.5233459472656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1465888023376465, "rewards_train/margins": 14.939634799957275, "rewards_train/rejected": -16.086223602294922, "step": 2611 }, { "epoch": 1.29, "learning_rate": 6.571387944857414e-07, "loss": 0.0003, "step": 2612 }, { "epoch": 1.29, "logps_train/chosen": -69.38689422607422, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -118.4375, "logps_train/rejected": -272.2900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6630793213844299, "rewards_train/margins": 14.722714364528656, "rewards_train/rejected": -15.385793685913086, "step": 2612 }, { "epoch": 1.29, "learning_rate": 6.568811338506025e-07, "loss": 0.0, "step": 2613 }, { "epoch": 1.29, "logps_train/chosen": -71.45870208740234, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -294.1640319824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6115442514419556, "rewards_train/margins": 15.742261290550232, "rewards_train/rejected": -16.353805541992188, "step": 2613 }, { "epoch": 1.29, "learning_rate": 6.56623426997223e-07, "loss": 0.0, "step": 2614 }, { "epoch": 1.29, "logps_train/chosen": -72.7734375, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -295.744873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7227056622505188, "rewards_train/margins": 15.951440989971161, "rewards_train/rejected": -16.67414665222168, "step": 2614 }, { "epoch": 1.29, "learning_rate": 6.56365674001525e-07, "loss": 0.0001, "step": 2615 }, { "epoch": 1.29, "logps_train/chosen": -72.37702941894531, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -290.3076477050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6823311448097229, "rewards_train/margins": 15.776168763637543, "rewards_train/rejected": -16.458499908447266, "step": 2615 }, { "epoch": 1.29, "learning_rate": 6.561078749394444e-07, "loss": 0.0, "step": 2616 }, { "epoch": 1.29, "logps_train/chosen": -76.96183013916016, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -296.84698486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.330460548400879, "rewards_train/margins": 15.214293479919434, "rewards_train/rejected": -16.544754028320312, "step": 2616 }, { "epoch": 1.29, "learning_rate": 6.558500298869304e-07, "loss": 0.0, "step": 2617 }, { "epoch": 1.29, "logps_train/chosen": -76.35026550292969, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -298.56732177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0000660419464111, "rewards_train/margins": 16.148707151412964, "rewards_train/rejected": -17.148773193359375, "step": 2617 }, { "epoch": 1.29, "learning_rate": 6.555921389199461e-07, "loss": 0.0, "step": 2618 }, { "epoch": 1.29, "logps_train/chosen": -74.46083068847656, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -297.8575439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75682532787323, "rewards_train/margins": 16.03073227405548, "rewards_train/rejected": -16.78755760192871, "step": 2618 }, { "epoch": 1.29, "learning_rate": 6.553342021144676e-07, "loss": 0.0005, "step": 2619 }, { "epoch": 1.29, "logps_train/chosen": -76.88841247558594, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -299.24285888671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0176496505737305, "rewards_train/margins": 15.919234275817871, "rewards_train/rejected": -16.9368839263916, "step": 2619 }, { "epoch": 1.29, "learning_rate": 6.550762195464849e-07, "loss": 0.0, "step": 2620 }, { "epoch": 1.29, "logps_train/chosen": -70.4369125366211, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -285.4964904785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8807029128074646, "rewards_train/margins": 15.237012922763824, "rewards_train/rejected": -16.11771583557129, "step": 2620 }, { "epoch": 1.29, "learning_rate": 6.548181912920018e-07, "loss": 0.0, "step": 2621 }, { "epoch": 1.29, "logps_train/chosen": -70.87289428710938, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -295.34356689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7254238128662109, "rewards_train/margins": 16.161911010742188, "rewards_train/rejected": -16.8873348236084, "step": 2621 }, { "epoch": 1.29, "learning_rate": 6.545601174270346e-07, "loss": 0.0, "step": 2622 }, { "epoch": 1.29, "logps_train/chosen": -73.65493774414062, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -278.58233642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0246243476867676, "rewards_train/margins": 14.490739345550537, "rewards_train/rejected": -15.515363693237305, "step": 2622 }, { "epoch": 1.29, "learning_rate": 6.543019980276141e-07, "loss": 0.0, "step": 2623 }, { "epoch": 1.29, "logps_train/chosen": -73.11166381835938, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -282.8697204589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9748378992080688, "rewards_train/margins": 14.938207983970642, "rewards_train/rejected": -15.913045883178711, "step": 2623 }, { "epoch": 1.29, "learning_rate": 6.540438331697838e-07, "loss": 0.0001, "step": 2624 }, { "epoch": 1.29, "logps_train/chosen": -78.2778091430664, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -298.2892150878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4935039281845093, "rewards_train/margins": 15.710321068763733, "rewards_train/rejected": -17.203824996948242, "step": 2624 }, { "epoch": 1.29, "learning_rate": 6.537856229296006e-07, "loss": 0.0017, "step": 2625 }, { "epoch": 1.29, "logps_train/chosen": -70.11909484863281, "logps_train/ref_chosen": -61.15625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -284.95416259765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8966512084007263, "rewards_train/margins": 15.266490042209625, "rewards_train/rejected": -16.16314125061035, "step": 2625 }, { "epoch": 1.29, "learning_rate": 6.535273673831353e-07, "loss": 0.0001, "step": 2626 }, { "epoch": 1.29, "logps_train/chosen": -74.21172332763672, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -286.77679443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.036358118057251, "rewards_train/margins": 15.357337713241577, "rewards_train/rejected": -16.393695831298828, "step": 2626 }, { "epoch": 1.29, "learning_rate": 6.532690666064716e-07, "loss": 0.0, "step": 2627 }, { "epoch": 1.29, "logps_train/chosen": -76.01774597167969, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -288.25921630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.838395357131958, "rewards_train/margins": 15.175028562545776, "rewards_train/rejected": -16.013423919677734, "step": 2627 }, { "epoch": 1.29, "learning_rate": 6.530107206757066e-07, "loss": 0.0003, "step": 2628 }, { "epoch": 1.29, "logps_train/chosen": -71.33016204833984, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -279.4527893066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7334558963775635, "rewards_train/margins": 15.047272443771362, "rewards_train/rejected": -15.780728340148926, "step": 2628 }, { "epoch": 1.29, "learning_rate": 6.527523296669509e-07, "loss": 0.0002, "step": 2629 }, { "epoch": 1.29, "logps_train/chosen": -69.78408813476562, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -284.4998474121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7455471754074097, "rewards_train/margins": 15.259514927864075, "rewards_train/rejected": -16.005062103271484, "step": 2629 }, { "epoch": 1.29, "learning_rate": 6.524938936563281e-07, "loss": 0.0005, "step": 2630 }, { "epoch": 1.29, "logps_train/chosen": -73.31069946289062, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -287.2845458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9990383386611938, "rewards_train/margins": 15.07101857662201, "rewards_train/rejected": -16.070056915283203, "step": 2630 }, { "epoch": 1.3, "learning_rate": 6.52235412719975e-07, "loss": 0.0001, "step": 2631 }, { "epoch": 1.3, "logps_train/chosen": -79.87144470214844, "logps_train/ref_chosen": -68.0, "logps_train/ref_rejected": -133.0, "logps_train/rejected": -299.69830322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1877796649932861, "rewards_train/margins": 15.484003782272339, "rewards_train/rejected": -16.671783447265625, "step": 2631 }, { "epoch": 1.3, "learning_rate": 6.519768869340424e-07, "loss": 0.0, "step": 2632 }, { "epoch": 1.3, "logps_train/chosen": -74.90937805175781, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -289.7078857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.897822380065918, "rewards_train/margins": 15.393082618713379, "rewards_train/rejected": -16.290904998779297, "step": 2632 }, { "epoch": 1.3, "learning_rate": 6.517183163746933e-07, "loss": 0.0, "step": 2633 }, { "epoch": 1.3, "logps_train/chosen": -72.70965576171875, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -283.39776611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0763366222381592, "rewards_train/margins": 14.847377061843872, "rewards_train/rejected": -15.923713684082031, "step": 2633 }, { "epoch": 1.3, "learning_rate": 6.514597011181044e-07, "loss": 0.0001, "step": 2634 }, { "epoch": 1.3, "logps_train/chosen": -74.6063461303711, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -287.9105529785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0420799255371094, "rewards_train/margins": 15.24819564819336, "rewards_train/rejected": -16.29027557373047, "step": 2634 }, { "epoch": 1.3, "learning_rate": 6.512010412404657e-07, "loss": 0.0001, "step": 2635 }, { "epoch": 1.3, "logps_train/chosen": -69.24520874023438, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -293.35595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6144617795944214, "rewards_train/margins": 15.91005527973175, "rewards_train/rejected": -16.524517059326172, "step": 2635 }, { "epoch": 1.3, "learning_rate": 6.509423368179799e-07, "loss": 0.0, "step": 2636 }, { "epoch": 1.3, "logps_train/chosen": -72.9948501586914, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -300.88836669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8753151893615723, "rewards_train/margins": 16.11254644393921, "rewards_train/rejected": -16.98786163330078, "step": 2636 }, { "epoch": 1.3, "learning_rate": 6.50683587926863e-07, "loss": 0.0001, "step": 2637 }, { "epoch": 1.3, "logps_train/chosen": -69.80906677246094, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -285.68701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.740330159664154, "rewards_train/margins": 15.024319231510162, "rewards_train/rejected": -15.764649391174316, "step": 2637 }, { "epoch": 1.3, "learning_rate": 6.504247946433444e-07, "loss": 0.0003, "step": 2638 }, { "epoch": 1.3, "logps_train/chosen": -71.2347640991211, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -289.0745849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7462788820266724, "rewards_train/margins": 15.562351107597351, "rewards_train/rejected": -16.308629989624023, "step": 2638 }, { "epoch": 1.3, "learning_rate": 6.501659570436664e-07, "loss": 0.0, "step": 2639 }, { "epoch": 1.3, "logps_train/chosen": -74.64912414550781, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -297.28466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.641425609588623, "rewards_train/margins": 16.003349781036377, "rewards_train/rejected": -16.644775390625, "step": 2639 }, { "epoch": 1.3, "learning_rate": 6.49907075204084e-07, "loss": 0.0, "step": 2640 }, { "epoch": 1.3, "logps_train/chosen": -68.81510162353516, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -290.86212158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3569982647895813, "rewards_train/margins": 15.943767726421356, "rewards_train/rejected": -16.300765991210938, "step": 2640 }, { "epoch": 1.3, "learning_rate": 6.496481492008657e-07, "loss": 0.0, "step": 2641 }, { "epoch": 1.3, "logps_train/chosen": -77.035400390625, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -283.9292907714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.160228967666626, "rewards_train/margins": 14.668441534042358, "rewards_train/rejected": -15.828670501708984, "step": 2641 }, { "epoch": 1.3, "learning_rate": 6.493891791102929e-07, "loss": 0.0001, "step": 2642 }, { "epoch": 1.3, "logps_train/chosen": -78.29193115234375, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -296.1680908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2008737325668335, "rewards_train/margins": 15.597382187843323, "rewards_train/rejected": -16.798255920410156, "step": 2642 }, { "epoch": 1.3, "learning_rate": 6.4913016500866e-07, "loss": 0.0, "step": 2643 }, { "epoch": 1.3, "logps_train/chosen": -64.55300903320312, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -277.6593933105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18352366983890533, "rewards_train/margins": 15.376166805624962, "rewards_train/rejected": -15.559690475463867, "step": 2643 }, { "epoch": 1.3, "learning_rate": 6.488711069722741e-07, "loss": 0.0002, "step": 2644 }, { "epoch": 1.3, "logps_train/chosen": -72.26795196533203, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -289.11834716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6255255937576294, "rewards_train/margins": 15.792755961418152, "rewards_train/rejected": -16.41828155517578, "step": 2644 }, { "epoch": 1.3, "learning_rate": 6.486120050774555e-07, "loss": 0.0, "step": 2645 }, { "epoch": 1.3, "logps_train/chosen": -73.07096862792969, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -294.44744873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7730396389961243, "rewards_train/margins": 15.874974429607391, "rewards_train/rejected": -16.648014068603516, "step": 2645 }, { "epoch": 1.3, "learning_rate": 6.483528594005373e-07, "loss": 0.0, "step": 2646 }, { "epoch": 1.3, "logps_train/chosen": -72.96463012695312, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -280.54742431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8269320726394653, "rewards_train/margins": 14.621559977531433, "rewards_train/rejected": -15.448492050170898, "step": 2646 }, { "epoch": 1.3, "learning_rate": 6.480936700178658e-07, "loss": 0.0001, "step": 2647 }, { "epoch": 1.3, "logps_train/chosen": -69.17864227294922, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -277.771240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5953543186187744, "rewards_train/margins": 14.947442770004272, "rewards_train/rejected": -15.542797088623047, "step": 2647 }, { "epoch": 1.3, "learning_rate": 6.478344370057999e-07, "loss": 0.0, "step": 2648 }, { "epoch": 1.3, "logps_train/chosen": -75.29531860351562, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -286.2020568847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0473544597625732, "rewards_train/margins": 14.9358389377594, "rewards_train/rejected": -15.983193397521973, "step": 2648 }, { "epoch": 1.3, "learning_rate": 6.475751604407113e-07, "loss": 0.0001, "step": 2649 }, { "epoch": 1.3, "logps_train/chosen": -71.91935729980469, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -272.4703063964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8255301117897034, "rewards_train/margins": 14.315153062343597, "rewards_train/rejected": -15.1406831741333, "step": 2649 }, { "epoch": 1.3, "learning_rate": 6.473158403989846e-07, "loss": 0.0002, "step": 2650 }, { "epoch": 1.3, "logps_train/chosen": -67.69190979003906, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -281.671142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6002943515777588, "rewards_train/margins": 15.471851110458374, "rewards_train/rejected": -16.072145462036133, "step": 2650 }, { "epoch": 1.31, "learning_rate": 6.470564769570172e-07, "loss": 0.0001, "step": 2651 }, { "epoch": 1.31, "logps_train/chosen": -73.94947052001953, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -291.23687744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98781818151474, "rewards_train/margins": 15.472444117069244, "rewards_train/rejected": -16.460262298583984, "step": 2651 }, { "epoch": 1.31, "learning_rate": 6.467970701912193e-07, "loss": 0.0001, "step": 2652 }, { "epoch": 1.31, "logps_train/chosen": -67.285888671875, "logps_train/ref_chosen": -60.25, "logps_train/ref_rejected": -118.6875, "logps_train/rejected": -271.50341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7024655342102051, "rewards_train/margins": 14.578933238983154, "rewards_train/rejected": -15.28139877319336, "step": 2652 }, { "epoch": 1.31, "learning_rate": 6.465376201780142e-07, "loss": 0.0001, "step": 2653 }, { "epoch": 1.31, "logps_train/chosen": -72.78758239746094, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -285.50531005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8815411925315857, "rewards_train/margins": 15.114207327365875, "rewards_train/rejected": -15.995748519897461, "step": 2653 }, { "epoch": 1.31, "learning_rate": 6.462781269938372e-07, "loss": 0.0001, "step": 2654 }, { "epoch": 1.31, "logps_train/chosen": -69.58905029296875, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -279.9018859863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7011420726776123, "rewards_train/margins": 14.810190439224243, "rewards_train/rejected": -15.511332511901855, "step": 2654 }, { "epoch": 1.31, "learning_rate": 6.460185907151371e-07, "loss": 0.0002, "step": 2655 }, { "epoch": 1.31, "logps_train/chosen": -74.54084777832031, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -288.446044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.791975736618042, "rewards_train/margins": 15.374793291091919, "rewards_train/rejected": -16.16676902770996, "step": 2655 }, { "epoch": 1.31, "learning_rate": 6.457590114183749e-07, "loss": 0.0, "step": 2656 }, { "epoch": 1.31, "logps_train/chosen": -69.89696502685547, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -286.5850830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5057367086410522, "rewards_train/margins": 15.661659836769104, "rewards_train/rejected": -16.167396545410156, "step": 2656 }, { "epoch": 1.31, "learning_rate": 6.454993891800242e-07, "loss": 0.0, "step": 2657 }, { "epoch": 1.31, "logps_train/chosen": -72.92601776123047, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -290.2977600097656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.767699122428894, "rewards_train/margins": 15.656413197517395, "rewards_train/rejected": -16.42411231994629, "step": 2657 }, { "epoch": 1.31, "learning_rate": 6.452397240765717e-07, "loss": 0.0, "step": 2658 }, { "epoch": 1.31, "logps_train/chosen": -72.2950439453125, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -300.33184814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6925904750823975, "rewards_train/margins": 16.52184510231018, "rewards_train/rejected": -17.214435577392578, "step": 2658 }, { "epoch": 1.31, "learning_rate": 6.449800161845166e-07, "loss": 0.0, "step": 2659 }, { "epoch": 1.31, "logps_train/chosen": -69.79963684082031, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -289.71435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5638015866279602, "rewards_train/margins": 15.693667590618134, "rewards_train/rejected": -16.257469177246094, "step": 2659 }, { "epoch": 1.31, "learning_rate": 6.447202655803703e-07, "loss": 0.0, "step": 2660 }, { "epoch": 1.31, "logps_train/chosen": -72.94951629638672, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -285.15704345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6554009914398193, "rewards_train/margins": 15.093067407608032, "rewards_train/rejected": -15.748468399047852, "step": 2660 }, { "epoch": 1.31, "learning_rate": 6.444604723406573e-07, "loss": 0.0001, "step": 2661 }, { "epoch": 1.31, "logps_train/chosen": -76.41893005371094, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -294.4347839355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9313950538635254, "rewards_train/margins": 15.463353633880615, "rewards_train/rejected": -16.39474868774414, "step": 2661 }, { "epoch": 1.31, "learning_rate": 6.442006365419141e-07, "loss": 0.0, "step": 2662 }, { "epoch": 1.31, "logps_train/chosen": -72.58944702148438, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -284.46197509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.788681149482727, "rewards_train/margins": 14.842670321464539, "rewards_train/rejected": -15.631351470947266, "step": 2662 }, { "epoch": 1.31, "learning_rate": 6.439407582606906e-07, "loss": 0.0, "step": 2663 }, { "epoch": 1.31, "logps_train/chosen": -74.10906982421875, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -289.8973083496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1690120697021484, "rewards_train/margins": 15.279504776000977, "rewards_train/rejected": -16.448516845703125, "step": 2663 }, { "epoch": 1.31, "learning_rate": 6.436808375735485e-07, "loss": 0.0004, "step": 2664 }, { "epoch": 1.31, "logps_train/chosen": -70.83467864990234, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -292.2894287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8267779350280762, "rewards_train/margins": 15.866812229156494, "rewards_train/rejected": -16.69359016418457, "step": 2664 }, { "epoch": 1.31, "learning_rate": 6.434208745570621e-07, "loss": 0.0, "step": 2665 }, { "epoch": 1.31, "logps_train/chosen": -73.55013275146484, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -286.84893798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7181482315063477, "rewards_train/margins": 15.417040824890137, "rewards_train/rejected": -16.135189056396484, "step": 2665 }, { "epoch": 1.31, "learning_rate": 6.431608692878181e-07, "loss": 0.0, "step": 2666 }, { "epoch": 1.31, "logps_train/chosen": -73.08729553222656, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -292.71759033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7958875894546509, "rewards_train/margins": 15.753508925437927, "rewards_train/rejected": -16.549396514892578, "step": 2666 }, { "epoch": 1.31, "learning_rate": 6.42900821842416e-07, "loss": 0.0, "step": 2667 }, { "epoch": 1.31, "logps_train/chosen": -72.09927368164062, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -295.5325927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7564114928245544, "rewards_train/margins": 15.995236456394196, "rewards_train/rejected": -16.75164794921875, "step": 2667 }, { "epoch": 1.31, "learning_rate": 6.426407322974678e-07, "loss": 0.0, "step": 2668 }, { "epoch": 1.31, "logps_train/chosen": -69.6583023071289, "logps_train/ref_chosen": -60.65625, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -277.5179748535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8989357948303223, "rewards_train/margins": 14.810187816619873, "rewards_train/rejected": -15.709123611450195, "step": 2668 }, { "epoch": 1.31, "learning_rate": 6.423806007295971e-07, "loss": 0.0001, "step": 2669 }, { "epoch": 1.31, "logps_train/chosen": -69.76046752929688, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -288.7783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6059291362762451, "rewards_train/margins": 15.519168138504028, "rewards_train/rejected": -16.125097274780273, "step": 2669 }, { "epoch": 1.31, "learning_rate": 6.421204272154408e-07, "loss": 0.0, "step": 2670 }, { "epoch": 1.31, "logps_train/chosen": -69.47222900390625, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -281.4397277832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4943910837173462, "rewards_train/margins": 15.399775862693787, "rewards_train/rejected": -15.894166946411133, "step": 2670 }, { "epoch": 1.32, "learning_rate": 6.418602118316475e-07, "loss": 0.0, "step": 2671 }, { "epoch": 1.32, "logps_train/chosen": -73.33419036865234, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -275.7098388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8492391109466553, "rewards_train/margins": 14.716861963272095, "rewards_train/rejected": -15.56610107421875, "step": 2671 }, { "epoch": 1.32, "learning_rate": 6.415999546548787e-07, "loss": 0.0001, "step": 2672 }, { "epoch": 1.32, "logps_train/chosen": -77.37969207763672, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -298.0857849121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.057060718536377, "rewards_train/margins": 15.501712322235107, "rewards_train/rejected": -16.558773040771484, "step": 2672 }, { "epoch": 1.32, "learning_rate": 6.413396557618077e-07, "loss": 0.0, "step": 2673 }, { "epoch": 1.32, "logps_train/chosen": -71.28825378417969, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -283.6991882324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7240647673606873, "rewards_train/margins": 15.11401754617691, "rewards_train/rejected": -15.838082313537598, "step": 2673 }, { "epoch": 1.32, "learning_rate": 6.410793152291204e-07, "loss": 0.0, "step": 2674 }, { "epoch": 1.32, "logps_train/chosen": -73.49098205566406, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -285.6538391113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8968521356582642, "rewards_train/margins": 15.259447455406189, "rewards_train/rejected": -16.156299591064453, "step": 2674 }, { "epoch": 1.32, "learning_rate": 6.40818933133515e-07, "loss": 0.0001, "step": 2675 }, { "epoch": 1.32, "logps_train/chosen": -75.98612976074219, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -293.45562744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0396039485931396, "rewards_train/margins": 15.380615472793579, "rewards_train/rejected": -16.42021942138672, "step": 2675 }, { "epoch": 1.32, "learning_rate": 6.405585095517017e-07, "loss": 0.0001, "step": 2676 }, { "epoch": 1.32, "logps_train/chosen": -73.45919036865234, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -282.26092529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8512901067733765, "rewards_train/margins": 14.879390358924866, "rewards_train/rejected": -15.730680465698242, "step": 2676 }, { "epoch": 1.32, "learning_rate": 6.402980445604027e-07, "loss": 0.0001, "step": 2677 }, { "epoch": 1.32, "logps_train/chosen": -72.66571044921875, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -286.2437744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8054870367050171, "rewards_train/margins": 15.211175560951233, "rewards_train/rejected": -16.01666259765625, "step": 2677 }, { "epoch": 1.32, "learning_rate": 6.400375382363532e-07, "loss": 0.0001, "step": 2678 }, { "epoch": 1.32, "logps_train/chosen": -72.23023223876953, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -285.4709777832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7925057411193848, "rewards_train/margins": 15.207570552825928, "rewards_train/rejected": -16.000076293945312, "step": 2678 }, { "epoch": 1.32, "learning_rate": 6.397769906563001e-07, "loss": 0.0, "step": 2679 }, { "epoch": 1.32, "logps_train/chosen": -72.3606948852539, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -282.0457458496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8033545613288879, "rewards_train/margins": 15.007081687450409, "rewards_train/rejected": -15.810436248779297, "step": 2679 }, { "epoch": 1.32, "learning_rate": 6.395164018970024e-07, "loss": 0.0001, "step": 2680 }, { "epoch": 1.32, "logps_train/chosen": -64.28787231445312, "logps_train/ref_chosen": -61.46875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -281.46514892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2811311185359955, "rewards_train/margins": 15.334131866693497, "rewards_train/rejected": -15.615262985229492, "step": 2680 }, { "epoch": 1.32, "learning_rate": 6.392557720352313e-07, "loss": 0.0, "step": 2681 }, { "epoch": 1.32, "logps_train/chosen": -70.74493408203125, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -281.2356262207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7091615200042725, "rewards_train/margins": 14.994089365005493, "rewards_train/rejected": -15.703250885009766, "step": 2681 }, { "epoch": 1.32, "learning_rate": 6.389951011477701e-07, "loss": 0.0, "step": 2682 }, { "epoch": 1.32, "logps_train/chosen": -75.7760009765625, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -287.4719543457031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.119250774383545, "rewards_train/margins": 14.992250919342041, "rewards_train/rejected": -16.111501693725586, "step": 2682 }, { "epoch": 1.32, "learning_rate": 6.387343893114139e-07, "loss": 0.0001, "step": 2683 }, { "epoch": 1.32, "logps_train/chosen": -72.06721496582031, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -284.4598388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9644848704338074, "rewards_train/margins": 15.39990645647049, "rewards_train/rejected": -16.364391326904297, "step": 2683 }, { "epoch": 1.32, "learning_rate": 6.384736366029708e-07, "loss": 0.0, "step": 2684 }, { "epoch": 1.32, "logps_train/chosen": -69.88302612304688, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -283.0760803222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.823360800743103, "rewards_train/margins": 15.295282006263733, "rewards_train/rejected": -16.118642807006836, "step": 2684 }, { "epoch": 1.32, "learning_rate": 6.382128430992599e-07, "loss": 0.0, "step": 2685 }, { "epoch": 1.32, "logps_train/chosen": -70.56787109375, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -268.09930419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8329099416732788, "rewards_train/margins": 13.98043692111969, "rewards_train/rejected": -14.813346862792969, "step": 2685 }, { "epoch": 1.32, "learning_rate": 6.379520088771126e-07, "loss": 0.0002, "step": 2686 }, { "epoch": 1.32, "logps_train/chosen": -75.31996154785156, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -297.34942626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9944958090782166, "rewards_train/margins": 15.855727732181549, "rewards_train/rejected": -16.850223541259766, "step": 2686 }, { "epoch": 1.32, "learning_rate": 6.376911340133728e-07, "loss": 0.0, "step": 2687 }, { "epoch": 1.32, "logps_train/chosen": -67.86186218261719, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -117.5, "logps_train/rejected": -271.0965576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6357467174530029, "rewards_train/margins": 14.72527527809143, "rewards_train/rejected": -15.361021995544434, "step": 2687 }, { "epoch": 1.32, "learning_rate": 6.374302185848958e-07, "loss": 0.0001, "step": 2688 }, { "epoch": 1.32, "logps_train/chosen": -71.16610717773438, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -286.39849853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79073166847229, "rewards_train/margins": 15.246774435043335, "rewards_train/rejected": -16.037506103515625, "step": 2688 }, { "epoch": 1.32, "learning_rate": 6.37169262668549e-07, "loss": 0.0001, "step": 2689 }, { "epoch": 1.32, "logps_train/chosen": -66.34791564941406, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -292.6023254394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.509962797164917, "rewards_train/margins": 16.27102017402649, "rewards_train/rejected": -16.780982971191406, "step": 2689 }, { "epoch": 1.32, "learning_rate": 6.369082663412119e-07, "loss": 0.0, "step": 2690 }, { "epoch": 1.32, "logps_train/chosen": -71.38421630859375, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -300.9215087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6322206258773804, "rewards_train/margins": 16.458222031593323, "rewards_train/rejected": -17.090442657470703, "step": 2690 }, { "epoch": 1.32, "learning_rate": 6.366472296797757e-07, "loss": 0.0, "step": 2691 }, { "epoch": 1.32, "logps_train/chosen": -70.37091064453125, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -284.75592041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4718077778816223, "rewards_train/margins": 15.518775641918182, "rewards_train/rejected": -15.990583419799805, "step": 2691 }, { "epoch": 1.33, "learning_rate": 6.363861527611435e-07, "loss": 0.0001, "step": 2692 }, { "epoch": 1.33, "logps_train/chosen": -72.28042602539062, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -301.949462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8040191531181335, "rewards_train/margins": 16.51211816072464, "rewards_train/rejected": -17.316137313842773, "step": 2692 }, { "epoch": 1.33, "learning_rate": 6.361250356622305e-07, "loss": 0.0, "step": 2693 }, { "epoch": 1.33, "logps_train/chosen": -74.17401123046875, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -294.97930908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8252620100975037, "rewards_train/margins": 15.671789228916168, "rewards_train/rejected": -16.497051239013672, "step": 2693 }, { "epoch": 1.33, "learning_rate": 6.358638784599634e-07, "loss": 0.0, "step": 2694 }, { "epoch": 1.33, "logps_train/chosen": -76.1418685913086, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -295.47503662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9542746543884277, "rewards_train/margins": 15.81286096572876, "rewards_train/rejected": -16.767135620117188, "step": 2694 }, { "epoch": 1.33, "learning_rate": 6.356026812312808e-07, "loss": 0.0, "step": 2695 }, { "epoch": 1.33, "logps_train/chosen": -72.98043060302734, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -287.3854064941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6602009534835815, "rewards_train/margins": 15.449044346809387, "rewards_train/rejected": -16.10924530029297, "step": 2695 }, { "epoch": 1.33, "learning_rate": 6.353414440531335e-07, "loss": 0.0002, "step": 2696 }, { "epoch": 1.33, "logps_train/chosen": -77.332275390625, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -295.893798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0354745388031006, "rewards_train/margins": 15.312597513198853, "rewards_train/rejected": -16.348072052001953, "step": 2696 }, { "epoch": 1.33, "learning_rate": 6.350801670024834e-07, "loss": 0.0, "step": 2697 }, { "epoch": 1.33, "logps_train/chosen": -73.43290710449219, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -285.1260070800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9006640911102295, "rewards_train/margins": 15.302855730056763, "rewards_train/rejected": -16.203519821166992, "step": 2697 }, { "epoch": 1.33, "learning_rate": 6.348188501563048e-07, "loss": 0.0, "step": 2698 }, { "epoch": 1.33, "logps_train/chosen": -73.91393280029297, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -294.33563232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9937617778778076, "rewards_train/margins": 16.026719331741333, "rewards_train/rejected": -17.02048110961914, "step": 2698 }, { "epoch": 1.33, "learning_rate": 6.34557493591583e-07, "loss": 0.0, "step": 2699 }, { "epoch": 1.33, "logps_train/chosen": -73.57282257080078, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -291.4705505371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.016999363899231, "rewards_train/margins": 15.551003336906433, "rewards_train/rejected": -16.568002700805664, "step": 2699 }, { "epoch": 1.33, "learning_rate": 6.342960973853157e-07, "loss": 0.0, "step": 2700 }, { "epoch": 1.33, "logps_train/chosen": -71.01985931396484, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -282.2978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6263022422790527, "rewards_train/margins": 15.283613681793213, "rewards_train/rejected": -15.909915924072266, "step": 2700 }, { "epoch": 1.33, "learning_rate": 6.34034661614512e-07, "loss": 0.0001, "step": 2701 }, { "epoch": 1.33, "logps_train/chosen": -72.20297241210938, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -287.453857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7949061393737793, "rewards_train/margins": 15.745888233184814, "rewards_train/rejected": -16.540794372558594, "step": 2701 }, { "epoch": 1.33, "learning_rate": 6.337731863561924e-07, "loss": 0.0, "step": 2702 }, { "epoch": 1.33, "logps_train/chosen": -76.57597351074219, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -299.483154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0412399768829346, "rewards_train/margins": 16.04242777824402, "rewards_train/rejected": -17.083667755126953, "step": 2702 }, { "epoch": 1.33, "learning_rate": 6.335116716873894e-07, "loss": 0.0011, "step": 2703 }, { "epoch": 1.33, "logps_train/chosen": -77.43821716308594, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -293.2651062011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0928938388824463, "rewards_train/margins": 15.542699575424194, "rewards_train/rejected": -16.63559341430664, "step": 2703 }, { "epoch": 1.33, "learning_rate": 6.332501176851471e-07, "loss": 0.0001, "step": 2704 }, { "epoch": 1.33, "logps_train/chosen": -69.71553039550781, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -281.1242370605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7001179456710815, "rewards_train/margins": 15.104689717292786, "rewards_train/rejected": -15.804807662963867, "step": 2704 }, { "epoch": 1.33, "learning_rate": 6.329885244265211e-07, "loss": 0.0001, "step": 2705 }, { "epoch": 1.33, "logps_train/chosen": -68.88526153564453, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -275.5413818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7637702226638794, "rewards_train/margins": 14.86307418346405, "rewards_train/rejected": -15.62684440612793, "step": 2705 }, { "epoch": 1.33, "learning_rate": 6.327268919885783e-07, "loss": 0.0001, "step": 2706 }, { "epoch": 1.33, "logps_train/chosen": -74.10319519042969, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -295.1615905761719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5940593481063843, "rewards_train/margins": 15.713163018226624, "rewards_train/rejected": -16.307222366333008, "step": 2706 }, { "epoch": 1.33, "learning_rate": 6.324652204483976e-07, "loss": 0.0004, "step": 2707 }, { "epoch": 1.33, "logps_train/chosen": -71.22225952148438, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -284.39666748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7088468074798584, "rewards_train/margins": 15.347614526748657, "rewards_train/rejected": -16.056461334228516, "step": 2707 }, { "epoch": 1.33, "learning_rate": 6.322035098830691e-07, "loss": 0.0, "step": 2708 }, { "epoch": 1.33, "logps_train/chosen": -69.95376586914062, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -284.7841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5874667763710022, "rewards_train/margins": 15.335241734981537, "rewards_train/rejected": -15.922708511352539, "step": 2708 }, { "epoch": 1.33, "learning_rate": 6.319417603696944e-07, "loss": 0.0002, "step": 2709 }, { "epoch": 1.33, "logps_train/chosen": -71.1989517211914, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -287.47900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6943584084510803, "rewards_train/margins": 15.578981816768646, "rewards_train/rejected": -16.273340225219727, "step": 2709 }, { "epoch": 1.33, "learning_rate": 6.316799719853868e-07, "loss": 0.0001, "step": 2710 }, { "epoch": 1.33, "logps_train/chosen": -75.2992172241211, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -281.4379577636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.050697684288025, "rewards_train/margins": 14.984405159950256, "rewards_train/rejected": -16.03510284423828, "step": 2710 }, { "epoch": 1.33, "learning_rate": 6.314181448072711e-07, "loss": 0.0, "step": 2711 }, { "epoch": 1.33, "logps_train/chosen": -77.853515625, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -294.74774169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9594238996505737, "rewards_train/margins": 15.649771809577942, "rewards_train/rejected": -16.609195709228516, "step": 2711 }, { "epoch": 1.34, "learning_rate": 6.311562789124829e-07, "loss": 0.0001, "step": 2712 }, { "epoch": 1.34, "logps_train/chosen": -73.24589538574219, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -286.4983215332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6421190500259399, "rewards_train/margins": 15.245506644248962, "rewards_train/rejected": -15.887625694274902, "step": 2712 }, { "epoch": 1.34, "learning_rate": 6.308943743781701e-07, "loss": 0.0, "step": 2713 }, { "epoch": 1.34, "logps_train/chosen": -75.00102996826172, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -290.456298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8569875359535217, "rewards_train/margins": 15.383124768733978, "rewards_train/rejected": -16.2401123046875, "step": 2713 }, { "epoch": 1.34, "learning_rate": 6.306324312814914e-07, "loss": 0.0008, "step": 2714 }, { "epoch": 1.34, "logps_train/chosen": -69.95181274414062, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -287.7176513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6989163160324097, "rewards_train/margins": 15.370946049690247, "rewards_train/rejected": -16.069862365722656, "step": 2714 }, { "epoch": 1.34, "learning_rate": 6.303704496996167e-07, "loss": 0.0001, "step": 2715 }, { "epoch": 1.34, "logps_train/chosen": -73.68909454345703, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -294.43194580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6683239936828613, "rewards_train/margins": 15.883857250213623, "rewards_train/rejected": -16.552181243896484, "step": 2715 }, { "epoch": 1.34, "learning_rate": 6.301084297097282e-07, "loss": 0.0, "step": 2716 }, { "epoch": 1.34, "logps_train/chosen": -70.79866027832031, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -282.35137939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8503001928329468, "rewards_train/margins": 14.953147530555725, "rewards_train/rejected": -15.803447723388672, "step": 2716 }, { "epoch": 1.34, "learning_rate": 6.29846371389018e-07, "loss": 0.0006, "step": 2717 }, { "epoch": 1.34, "logps_train/chosen": -72.54887390136719, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -279.40570068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9819375872612, "rewards_train/margins": 14.782414257526398, "rewards_train/rejected": -15.764351844787598, "step": 2717 }, { "epoch": 1.34, "learning_rate": 6.295842748146908e-07, "loss": 0.0001, "step": 2718 }, { "epoch": 1.34, "logps_train/chosen": -70.82911682128906, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -283.3575134277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7778828144073486, "rewards_train/margins": 15.179840803146362, "rewards_train/rejected": -15.957723617553711, "step": 2718 }, { "epoch": 1.34, "learning_rate": 6.293221400639619e-07, "loss": 0.0003, "step": 2719 }, { "epoch": 1.34, "logps_train/chosen": -70.15823364257812, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -288.2156982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7503693103790283, "rewards_train/margins": 15.401814222335815, "rewards_train/rejected": -16.152183532714844, "step": 2719 }, { "epoch": 1.34, "learning_rate": 6.290599672140577e-07, "loss": 0.0, "step": 2720 }, { "epoch": 1.34, "logps_train/chosen": -71.01420593261719, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -280.62713623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7386767864227295, "rewards_train/margins": 14.751481294631958, "rewards_train/rejected": -15.490158081054688, "step": 2720 }, { "epoch": 1.34, "learning_rate": 6.287977563422165e-07, "loss": 0.0004, "step": 2721 }, { "epoch": 1.34, "logps_train/chosen": -72.25958251953125, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -290.8366394042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9511538743972778, "rewards_train/margins": 15.761563181877136, "rewards_train/rejected": -16.712717056274414, "step": 2721 }, { "epoch": 1.34, "learning_rate": 6.285355075256871e-07, "loss": 0.0001, "step": 2722 }, { "epoch": 1.34, "logps_train/chosen": -76.30079650878906, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -295.9468688964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9101577401161194, "rewards_train/margins": 15.836286962032318, "rewards_train/rejected": -16.746444702148438, "step": 2722 }, { "epoch": 1.34, "learning_rate": 6.282732208417297e-07, "loss": 0.0001, "step": 2723 }, { "epoch": 1.34, "logps_train/chosen": -70.39279174804688, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -297.6097717285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7072477340698242, "rewards_train/margins": 16.085078239440918, "rewards_train/rejected": -16.792325973510742, "step": 2723 }, { "epoch": 1.34, "learning_rate": 6.280108963676161e-07, "loss": 0.0, "step": 2724 }, { "epoch": 1.34, "logps_train/chosen": -73.52983093261719, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -302.1073913574219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9091354608535767, "rewards_train/margins": 16.33968961238861, "rewards_train/rejected": -17.248825073242188, "step": 2724 }, { "epoch": 1.34, "learning_rate": 6.277485341806285e-07, "loss": 0.0, "step": 2725 }, { "epoch": 1.34, "logps_train/chosen": -74.74819946289062, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -283.72882080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9864891767501831, "rewards_train/margins": 15.053580403327942, "rewards_train/rejected": -16.040069580078125, "step": 2725 }, { "epoch": 1.34, "learning_rate": 6.274861343580609e-07, "loss": 0.0, "step": 2726 }, { "epoch": 1.34, "logps_train/chosen": -73.4987564086914, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -286.3231506347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8822240829467773, "rewards_train/margins": 15.423481941223145, "rewards_train/rejected": -16.305706024169922, "step": 2726 }, { "epoch": 1.34, "learning_rate": 6.272236969772177e-07, "loss": 0.0003, "step": 2727 }, { "epoch": 1.34, "logps_train/chosen": -69.02438354492188, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -276.3716735839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7776339054107666, "rewards_train/margins": 14.820470571517944, "rewards_train/rejected": -15.598104476928711, "step": 2727 }, { "epoch": 1.34, "learning_rate": 6.26961222115415e-07, "loss": 0.0, "step": 2728 }, { "epoch": 1.34, "logps_train/chosen": -71.81407928466797, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -290.7943115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5909539461135864, "rewards_train/margins": 15.530765414237976, "rewards_train/rejected": -16.121719360351562, "step": 2728 }, { "epoch": 1.34, "learning_rate": 6.266987098499794e-07, "loss": 0.0, "step": 2729 }, { "epoch": 1.34, "logps_train/chosen": -69.65269470214844, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -298.2074279785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5542582273483276, "rewards_train/margins": 16.29392445087433, "rewards_train/rejected": -16.848182678222656, "step": 2729 }, { "epoch": 1.34, "learning_rate": 6.26436160258249e-07, "loss": 0.0001, "step": 2730 }, { "epoch": 1.34, "logps_train/chosen": -80.81352233886719, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -295.3243408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.455864429473877, "rewards_train/margins": 15.317878246307373, "rewards_train/rejected": -16.77374267578125, "step": 2730 }, { "epoch": 1.34, "learning_rate": 6.261735734175728e-07, "loss": 0.0001, "step": 2731 }, { "epoch": 1.34, "logps_train/chosen": -73.89744567871094, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -285.5980224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7520978450775146, "rewards_train/margins": 15.466884851455688, "rewards_train/rejected": -16.218982696533203, "step": 2731 }, { "epoch": 1.35, "learning_rate": 6.259109494053103e-07, "loss": 0.0001, "step": 2732 }, { "epoch": 1.35, "logps_train/chosen": -70.74273681640625, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -286.3982238769531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7478579878807068, "rewards_train/margins": 15.700803816318512, "rewards_train/rejected": -16.44866180419922, "step": 2732 }, { "epoch": 1.35, "learning_rate": 6.256482882988325e-07, "loss": 0.0007, "step": 2733 }, { "epoch": 1.35, "logps_train/chosen": -75.88737487792969, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -299.2070617675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0096852779388428, "rewards_train/margins": 16.047250986099243, "rewards_train/rejected": -17.056936264038086, "step": 2733 }, { "epoch": 1.35, "learning_rate": 6.253855901755214e-07, "loss": 0.0, "step": 2734 }, { "epoch": 1.35, "logps_train/chosen": -70.99735260009766, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -291.535400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6098427772521973, "rewards_train/margins": 15.684178829193115, "rewards_train/rejected": -16.294021606445312, "step": 2734 }, { "epoch": 1.35, "learning_rate": 6.251228551127689e-07, "loss": 0.0001, "step": 2735 }, { "epoch": 1.35, "logps_train/chosen": -69.4113998413086, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -289.91546630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6186306476593018, "rewards_train/margins": 15.848307371139526, "rewards_train/rejected": -16.466938018798828, "step": 2735 }, { "epoch": 1.35, "learning_rate": 6.248600831879794e-07, "loss": 0.0001, "step": 2736 }, { "epoch": 1.35, "logps_train/chosen": -71.1102066040039, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -293.4127197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6125587821006775, "rewards_train/margins": 16.32959133386612, "rewards_train/rejected": -16.942150115966797, "step": 2736 }, { "epoch": 1.35, "learning_rate": 6.245972744785666e-07, "loss": 0.0, "step": 2737 }, { "epoch": 1.35, "logps_train/chosen": -76.90678405761719, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -298.362060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.065434217453003, "rewards_train/margins": 15.893821001052856, "rewards_train/rejected": -16.95925521850586, "step": 2737 }, { "epoch": 1.35, "learning_rate": 6.243344290619561e-07, "loss": 0.0001, "step": 2738 }, { "epoch": 1.35, "logps_train/chosen": -74.98150634765625, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -304.0271301269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.150542974472046, "rewards_train/margins": 16.343576669692993, "rewards_train/rejected": -17.49411964416504, "step": 2738 }, { "epoch": 1.35, "learning_rate": 6.240715470155838e-07, "loss": 0.0001, "step": 2739 }, { "epoch": 1.35, "logps_train/chosen": -75.06610870361328, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -292.7354736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0520210266113281, "rewards_train/margins": 15.652044296264648, "rewards_train/rejected": -16.704065322875977, "step": 2739 }, { "epoch": 1.35, "learning_rate": 6.238086284168966e-07, "loss": 0.0, "step": 2740 }, { "epoch": 1.35, "logps_train/chosen": -70.01492309570312, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -273.4157409667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7687527537345886, "rewards_train/margins": 14.594600975513458, "rewards_train/rejected": -15.363353729248047, "step": 2740 }, { "epoch": 1.35, "learning_rate": 6.235456733433518e-07, "loss": 0.0003, "step": 2741 }, { "epoch": 1.35, "logps_train/chosen": -74.19960021972656, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -292.4176025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.039076328277588, "rewards_train/margins": 15.750291347503662, "rewards_train/rejected": -16.78936767578125, "step": 2741 }, { "epoch": 1.35, "learning_rate": 6.232826818724183e-07, "loss": 0.0002, "step": 2742 }, { "epoch": 1.35, "logps_train/chosen": -72.13410949707031, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -289.41168212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9415364861488342, "rewards_train/margins": 15.80998557806015, "rewards_train/rejected": -16.751522064208984, "step": 2742 }, { "epoch": 1.35, "learning_rate": 6.230196540815746e-07, "loss": 0.0, "step": 2743 }, { "epoch": 1.35, "logps_train/chosen": -71.18165588378906, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -284.4868469238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9066908955574036, "rewards_train/margins": 15.424123466014862, "rewards_train/rejected": -16.330814361572266, "step": 2743 }, { "epoch": 1.35, "learning_rate": 6.227565900483108e-07, "loss": 0.0001, "step": 2744 }, { "epoch": 1.35, "logps_train/chosen": -73.17173767089844, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -300.15130615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6576042175292969, "rewards_train/margins": 16.519344329833984, "rewards_train/rejected": -17.17694854736328, "step": 2744 }, { "epoch": 1.35, "learning_rate": 6.224934898501273e-07, "loss": 0.0, "step": 2745 }, { "epoch": 1.35, "logps_train/chosen": -74.55584716796875, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -283.19097900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.093230962753296, "rewards_train/margins": 15.11204743385315, "rewards_train/rejected": -16.205278396606445, "step": 2745 }, { "epoch": 1.35, "learning_rate": 6.22230353564535e-07, "loss": 0.0, "step": 2746 }, { "epoch": 1.35, "logps_train/chosen": -74.43045043945312, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -289.92205810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9630647301673889, "rewards_train/margins": 15.236369550228119, "rewards_train/rejected": -16.199434280395508, "step": 2746 }, { "epoch": 1.35, "learning_rate": 6.219671812690558e-07, "loss": 0.0001, "step": 2747 }, { "epoch": 1.35, "logps_train/chosen": -73.86006164550781, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -289.83624267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0945996046066284, "rewards_train/margins": 15.540003895759583, "rewards_train/rejected": -16.63460350036621, "step": 2747 }, { "epoch": 1.35, "learning_rate": 6.217039730412222e-07, "loss": 0.0, "step": 2748 }, { "epoch": 1.35, "logps_train/chosen": -70.50216674804688, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -280.4327392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8570044040679932, "rewards_train/margins": 15.203552007675171, "rewards_train/rejected": -16.060556411743164, "step": 2748 }, { "epoch": 1.35, "learning_rate": 6.214407289585765e-07, "loss": 0.0001, "step": 2749 }, { "epoch": 1.35, "logps_train/chosen": -73.1430892944336, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -303.8778076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5457050800323486, "rewards_train/margins": 16.507601499557495, "rewards_train/rejected": -17.053306579589844, "step": 2749 }, { "epoch": 1.35, "learning_rate": 6.21177449098673e-07, "loss": 0.0, "step": 2750 }, { "epoch": 1.35, "logps_train/chosen": -74.62257385253906, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -294.24249267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1068862676620483, "rewards_train/margins": 15.65164339542389, "rewards_train/rejected": -16.758529663085938, "step": 2750 }, { "epoch": 1.35, "learning_rate": 6.20914133539075e-07, "loss": 0.0004, "step": 2751 }, { "epoch": 1.35, "logps_train/chosen": -75.57890319824219, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -291.85986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9867963194847107, "rewards_train/margins": 15.551922857761383, "rewards_train/rejected": -16.538719177246094, "step": 2751 }, { "epoch": 1.35, "learning_rate": 6.206507823573576e-07, "loss": 0.0, "step": 2752 }, { "epoch": 1.35, "logps_train/chosen": -78.43498229980469, "logps_train/ref_chosen": -68.375, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -303.66668701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0051686763763428, "rewards_train/margins": 16.1945059299469, "rewards_train/rejected": -17.199674606323242, "step": 2752 }, { "epoch": 1.36, "learning_rate": 6.203873956311054e-07, "loss": 0.0001, "step": 2753 }, { "epoch": 1.36, "logps_train/chosen": -72.17376708984375, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -282.8703308105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8424249887466431, "rewards_train/margins": 15.11975610256195, "rewards_train/rejected": -15.962181091308594, "step": 2753 }, { "epoch": 1.36, "learning_rate": 6.201239734379143e-07, "loss": 0.0002, "step": 2754 }, { "epoch": 1.36, "logps_train/chosen": -67.81605529785156, "logps_train/ref_chosen": -61.21875, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -274.1903076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6594376564025879, "rewards_train/margins": 14.803637027740479, "rewards_train/rejected": -15.463074684143066, "step": 2754 }, { "epoch": 1.36, "learning_rate": 6.198605158553899e-07, "loss": 0.0003, "step": 2755 }, { "epoch": 1.36, "logps_train/chosen": -71.308349609375, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -287.0303955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7460203766822815, "rewards_train/margins": 15.516053140163422, "rewards_train/rejected": -16.262073516845703, "step": 2755 }, { "epoch": 1.36, "learning_rate": 6.195970229611488e-07, "loss": 0.0, "step": 2756 }, { "epoch": 1.36, "logps_train/chosen": -75.18596649169922, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -295.3568115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0128834247589111, "rewards_train/margins": 16.051018476486206, "rewards_train/rejected": -17.063901901245117, "step": 2756 }, { "epoch": 1.36, "learning_rate": 6.193334948328177e-07, "loss": 0.0, "step": 2757 }, { "epoch": 1.36, "logps_train/chosen": -70.41492462158203, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -296.18988037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.738465428352356, "rewards_train/margins": 16.11645781993866, "rewards_train/rejected": -16.854923248291016, "step": 2757 }, { "epoch": 1.36, "learning_rate": 6.190699315480338e-07, "loss": 0.0001, "step": 2758 }, { "epoch": 1.36, "logps_train/chosen": -68.56422424316406, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -285.6533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6348894834518433, "rewards_train/margins": 15.653148770332336, "rewards_train/rejected": -16.28803825378418, "step": 2758 }, { "epoch": 1.36, "learning_rate": 6.188063331844446e-07, "loss": 0.0, "step": 2759 }, { "epoch": 1.36, "logps_train/chosen": -73.57221221923828, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -294.3752136230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7357856631278992, "rewards_train/margins": 15.925026714801788, "rewards_train/rejected": -16.660812377929688, "step": 2759 }, { "epoch": 1.36, "learning_rate": 6.185426998197081e-07, "loss": 0.0, "step": 2760 }, { "epoch": 1.36, "logps_train/chosen": -70.99490356445312, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -297.7637939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7126733064651489, "rewards_train/margins": 16.662635684013367, "rewards_train/rejected": -17.375308990478516, "step": 2760 }, { "epoch": 1.36, "learning_rate": 6.18279031531492e-07, "loss": 0.0004, "step": 2761 }, { "epoch": 1.36, "logps_train/chosen": -73.94464111328125, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -290.93121337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0301082134246826, "rewards_train/margins": 15.65847134590149, "rewards_train/rejected": -16.688579559326172, "step": 2761 }, { "epoch": 1.36, "learning_rate": 6.180153283974754e-07, "loss": 0.0001, "step": 2762 }, { "epoch": 1.36, "logps_train/chosen": -74.0971908569336, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -295.5472106933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9246117472648621, "rewards_train/margins": 15.663263618946075, "rewards_train/rejected": -16.587875366210938, "step": 2762 }, { "epoch": 1.36, "learning_rate": 6.177515904953466e-07, "loss": 0.0001, "step": 2763 }, { "epoch": 1.36, "logps_train/chosen": -72.66387939453125, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -290.48406982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.003204345703125, "rewards_train/margins": 15.668739318847656, "rewards_train/rejected": -16.67194366455078, "step": 2763 }, { "epoch": 1.36, "learning_rate": 6.174878179028046e-07, "loss": 0.0, "step": 2764 }, { "epoch": 1.36, "logps_train/chosen": -78.01790618896484, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -292.8828430175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1465171575546265, "rewards_train/margins": 15.530877709388733, "rewards_train/rejected": -16.67739486694336, "step": 2764 }, { "epoch": 1.36, "learning_rate": 6.17224010697559e-07, "loss": 0.0001, "step": 2765 }, { "epoch": 1.36, "logps_train/chosen": -74.16183471679688, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -293.27655029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.979391872882843, "rewards_train/margins": 15.759347140789032, "rewards_train/rejected": -16.738739013671875, "step": 2765 }, { "epoch": 1.36, "learning_rate": 6.169601689573286e-07, "loss": 0.0, "step": 2766 }, { "epoch": 1.36, "logps_train/chosen": -75.80228424072266, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -291.4013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0846714973449707, "rewards_train/margins": 15.799705028533936, "rewards_train/rejected": -16.884376525878906, "step": 2766 }, { "epoch": 1.36, "learning_rate": 6.166962927598431e-07, "loss": 0.0002, "step": 2767 }, { "epoch": 1.36, "logps_train/chosen": -78.29753112792969, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -294.1561279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2462576627731323, "rewards_train/margins": 15.668869137763977, "rewards_train/rejected": -16.91512680053711, "step": 2767 }, { "epoch": 1.36, "learning_rate": 6.164323821828425e-07, "loss": 0.0001, "step": 2768 }, { "epoch": 1.36, "logps_train/chosen": -77.95667266845703, "logps_train/ref_chosen": -68.0, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -297.5091247558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9950815439224243, "rewards_train/margins": 15.80978524684906, "rewards_train/rejected": -16.804866790771484, "step": 2768 }, { "epoch": 1.36, "learning_rate": 6.161684373040764e-07, "loss": 0.0, "step": 2769 }, { "epoch": 1.36, "logps_train/chosen": -74.60929870605469, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -295.80487060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015421748161316, "rewards_train/margins": 16.173951268196106, "rewards_train/rejected": -17.189373016357422, "step": 2769 }, { "epoch": 1.36, "learning_rate": 6.159044582013048e-07, "loss": 0.0001, "step": 2770 }, { "epoch": 1.36, "logps_train/chosen": -69.07969665527344, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -290.02337646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8122909665107727, "rewards_train/margins": 15.90112990140915, "rewards_train/rejected": -16.713420867919922, "step": 2770 }, { "epoch": 1.36, "learning_rate": 6.156404449522977e-07, "loss": 0.0, "step": 2771 }, { "epoch": 1.36, "logps_train/chosen": -74.79704284667969, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -299.61895751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9585611820220947, "rewards_train/margins": 16.37755560874939, "rewards_train/rejected": -17.336116790771484, "step": 2771 }, { "epoch": 1.36, "learning_rate": 6.153763976348352e-07, "loss": 0.0, "step": 2772 }, { "epoch": 1.36, "logps_train/chosen": -71.1989517211914, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -292.1458740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8694072365760803, "rewards_train/margins": 15.803190648555756, "rewards_train/rejected": -16.672597885131836, "step": 2772 }, { "epoch": 1.37, "learning_rate": 6.151123163267073e-07, "loss": 0.0, "step": 2773 }, { "epoch": 1.37, "logps_train/chosen": -76.7664794921875, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -298.90576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.206677794456482, "rewards_train/margins": 16.033506989479065, "rewards_train/rejected": -17.240184783935547, "step": 2773 }, { "epoch": 1.37, "learning_rate": 6.148482011057145e-07, "loss": 0.0, "step": 2774 }, { "epoch": 1.37, "logps_train/chosen": -77.73919677734375, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -311.6368408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0831971168518066, "rewards_train/margins": 16.747724056243896, "rewards_train/rejected": -17.830921173095703, "step": 2774 }, { "epoch": 1.37, "learning_rate": 6.145840520496666e-07, "loss": 0.0, "step": 2775 }, { "epoch": 1.37, "logps_train/chosen": -75.54133605957031, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -299.26165771484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7675119042396545, "rewards_train/margins": 16.2825807929039, "rewards_train/rejected": -17.050092697143555, "step": 2775 }, { "epoch": 1.37, "learning_rate": 6.143198692363838e-07, "loss": 0.0001, "step": 2776 }, { "epoch": 1.37, "logps_train/chosen": -71.36680603027344, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -283.946044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6946399807929993, "rewards_train/margins": 15.555529773235321, "rewards_train/rejected": -16.25016975402832, "step": 2776 }, { "epoch": 1.37, "learning_rate": 6.140556527436961e-07, "loss": 0.0, "step": 2777 }, { "epoch": 1.37, "logps_train/chosen": -67.96830749511719, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -289.06475830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5040080547332764, "rewards_train/margins": 16.104520082473755, "rewards_train/rejected": -16.60852813720703, "step": 2777 }, { "epoch": 1.37, "learning_rate": 6.137914026494435e-07, "loss": 0.0, "step": 2778 }, { "epoch": 1.37, "logps_train/chosen": -75.33766174316406, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -300.45294189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9813734292984009, "rewards_train/margins": 15.984916090965271, "rewards_train/rejected": -16.966289520263672, "step": 2778 }, { "epoch": 1.37, "learning_rate": 6.135271190314757e-07, "loss": 0.0001, "step": 2779 }, { "epoch": 1.37, "logps_train/chosen": -71.51040649414062, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -290.8473815917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80753493309021, "rewards_train/margins": 15.766313791275024, "rewards_train/rejected": -16.573848724365234, "step": 2779 }, { "epoch": 1.37, "learning_rate": 6.132628019676526e-07, "loss": 0.0002, "step": 2780 }, { "epoch": 1.37, "logps_train/chosen": -72.18657684326172, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -286.23883056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0304739475250244, "rewards_train/margins": 15.593751668930054, "rewards_train/rejected": -16.624225616455078, "step": 2780 }, { "epoch": 1.37, "learning_rate": 6.129984515358439e-07, "loss": 0.0, "step": 2781 }, { "epoch": 1.37, "logps_train/chosen": -75.47273254394531, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -290.6724853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.201911449432373, "rewards_train/margins": 15.295902729034424, "rewards_train/rejected": -16.497814178466797, "step": 2781 }, { "epoch": 1.37, "learning_rate": 6.127340678139289e-07, "loss": 0.0, "step": 2782 }, { "epoch": 1.37, "logps_train/chosen": -70.89232635498047, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -298.2385559082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7934803366661072, "rewards_train/margins": 16.408696711063385, "rewards_train/rejected": -17.202177047729492, "step": 2782 }, { "epoch": 1.37, "learning_rate": 6.124696508797967e-07, "loss": 0.0006, "step": 2783 }, { "epoch": 1.37, "logps_train/chosen": -70.60505676269531, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -290.6740417480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.752595067024231, "rewards_train/margins": 16.044691920280457, "rewards_train/rejected": -16.797286987304688, "step": 2783 }, { "epoch": 1.37, "learning_rate": 6.122052008113464e-07, "loss": 0.0, "step": 2784 }, { "epoch": 1.37, "logps_train/chosen": -69.51402282714844, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -289.2287292480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.617417573928833, "rewards_train/margins": 15.610827207565308, "rewards_train/rejected": -16.22824478149414, "step": 2784 }, { "epoch": 1.37, "learning_rate": 6.11940717686487e-07, "loss": 0.0, "step": 2785 }, { "epoch": 1.37, "logps_train/chosen": -76.74296569824219, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -291.6432189941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4181935787200928, "rewards_train/margins": 15.388803720474243, "rewards_train/rejected": -16.806997299194336, "step": 2785 }, { "epoch": 1.37, "learning_rate": 6.116762015831368e-07, "loss": 0.0, "step": 2786 }, { "epoch": 1.37, "logps_train/chosen": -77.2750473022461, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -298.904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203529953956604, "rewards_train/margins": 15.937289595603943, "rewards_train/rejected": -17.140819549560547, "step": 2786 }, { "epoch": 1.37, "learning_rate": 6.114116525792239e-07, "loss": 0.0, "step": 2787 }, { "epoch": 1.37, "logps_train/chosen": -69.66355895996094, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -282.2646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6159166097640991, "rewards_train/margins": 15.443264603614807, "rewards_train/rejected": -16.059181213378906, "step": 2787 }, { "epoch": 1.37, "learning_rate": 6.111470707526863e-07, "loss": 0.0, "step": 2788 }, { "epoch": 1.37, "logps_train/chosen": -72.6637954711914, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -289.3970031738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8700416088104248, "rewards_train/margins": 15.752322435379028, "rewards_train/rejected": -16.622364044189453, "step": 2788 }, { "epoch": 1.37, "learning_rate": 6.10882456181472e-07, "loss": 0.0001, "step": 2789 }, { "epoch": 1.37, "logps_train/chosen": -70.10208129882812, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -285.5657958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7134796977043152, "rewards_train/margins": 15.224644958972931, "rewards_train/rejected": -15.938124656677246, "step": 2789 }, { "epoch": 1.37, "learning_rate": 6.106178089435377e-07, "loss": 0.0, "step": 2790 }, { "epoch": 1.37, "logps_train/chosen": -80.35645294189453, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -308.8214111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.227881908416748, "rewards_train/margins": 16.43004274368286, "rewards_train/rejected": -17.65792465209961, "step": 2790 }, { "epoch": 1.37, "learning_rate": 6.103531291168504e-07, "loss": 0.0007, "step": 2791 }, { "epoch": 1.37, "logps_train/chosen": -71.2513656616211, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -293.9081115722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7913476824760437, "rewards_train/margins": 16.14335995912552, "rewards_train/rejected": -16.934707641601562, "step": 2791 }, { "epoch": 1.37, "learning_rate": 6.100884167793867e-07, "loss": 0.0, "step": 2792 }, { "epoch": 1.37, "logps_train/chosen": -78.2203369140625, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -289.3722229003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.176379919052124, "rewards_train/margins": 15.410553216934204, "rewards_train/rejected": -16.586933135986328, "step": 2792 }, { "epoch": 1.38, "learning_rate": 6.098236720091326e-07, "loss": 0.0, "step": 2793 }, { "epoch": 1.38, "logps_train/chosen": -67.86962890625, "logps_train/ref_chosen": -60.875, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -287.77783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6985840797424316, "rewards_train/margins": 15.889405727386475, "rewards_train/rejected": -16.587989807128906, "step": 2793 }, { "epoch": 1.38, "learning_rate": 6.095588948840836e-07, "loss": 0.0002, "step": 2794 }, { "epoch": 1.38, "logps_train/chosen": -74.60487365722656, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -294.09051513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0308973789215088, "rewards_train/margins": 15.724445104598999, "rewards_train/rejected": -16.755342483520508, "step": 2794 }, { "epoch": 1.38, "learning_rate": 6.092940854822449e-07, "loss": 0.0, "step": 2795 }, { "epoch": 1.38, "logps_train/chosen": -75.62281799316406, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -288.7410888671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2162861824035645, "rewards_train/margins": 15.243857860565186, "rewards_train/rejected": -16.46014404296875, "step": 2795 }, { "epoch": 1.38, "learning_rate": 6.090292438816312e-07, "loss": 0.0, "step": 2796 }, { "epoch": 1.38, "logps_train/chosen": -77.88016510009766, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -295.1177062988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2664834260940552, "rewards_train/margins": 15.813060641288757, "rewards_train/rejected": -17.079544067382812, "step": 2796 }, { "epoch": 1.38, "learning_rate": 6.087643701602665e-07, "loss": 0.0, "step": 2797 }, { "epoch": 1.38, "logps_train/chosen": -67.8749771118164, "logps_train/ref_chosen": -59.5625, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -292.4307861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83036869764328, "rewards_train/margins": 16.18424540758133, "rewards_train/rejected": -17.01461410522461, "step": 2797 }, { "epoch": 1.38, "learning_rate": 6.084994643961842e-07, "loss": 0.0001, "step": 2798 }, { "epoch": 1.38, "logps_train/chosen": -72.37077331542969, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -288.59088134765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8718430995941162, "rewards_train/margins": 15.635925531387329, "rewards_train/rejected": -16.507768630981445, "step": 2798 }, { "epoch": 1.38, "learning_rate": 6.082345266674278e-07, "loss": 0.0002, "step": 2799 }, { "epoch": 1.38, "logps_train/chosen": -69.59342956542969, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -287.5234069824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7455239295959473, "rewards_train/margins": 15.682401180267334, "rewards_train/rejected": -16.42792510986328, "step": 2799 }, { "epoch": 1.38, "learning_rate": 6.079695570520496e-07, "loss": 0.0001, "step": 2800 }, { "epoch": 1.38, "logps_train/chosen": -74.28582763671875, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -292.89019775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9008479118347168, "rewards_train/margins": 16.032898426055908, "rewards_train/rejected": -16.933746337890625, "step": 2800 }, { "epoch": 1.38, "learning_rate": 6.077045556281112e-07, "loss": 0.0, "step": 2801 }, { "epoch": 1.38, "logps_train/chosen": -70.13784790039062, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -117.9375, "logps_train/rejected": -280.21002197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8228664398193359, "rewards_train/margins": 15.405851364135742, "rewards_train/rejected": -16.228717803955078, "step": 2801 }, { "epoch": 1.38, "learning_rate": 6.07439522473684e-07, "loss": 0.0, "step": 2802 }, { "epoch": 1.38, "logps_train/chosen": -71.59700012207031, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -280.6752624511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8089185953140259, "rewards_train/margins": 15.232631087303162, "rewards_train/rejected": -16.041549682617188, "step": 2802 }, { "epoch": 1.38, "learning_rate": 6.071744576668485e-07, "loss": 0.0003, "step": 2803 }, { "epoch": 1.38, "logps_train/chosen": -73.8967056274414, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -291.7933349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9060766696929932, "rewards_train/margins": 15.502702474594116, "rewards_train/rejected": -16.40877914428711, "step": 2803 }, { "epoch": 1.38, "learning_rate": 6.069093612856945e-07, "loss": 0.0001, "step": 2804 }, { "epoch": 1.38, "logps_train/chosen": -70.98961639404297, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -292.69110107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8160513639450073, "rewards_train/margins": 16.007845044136047, "rewards_train/rejected": -16.823896408081055, "step": 2804 }, { "epoch": 1.38, "learning_rate": 6.066442334083212e-07, "loss": 0.0001, "step": 2805 }, { "epoch": 1.38, "logps_train/chosen": -76.32296752929688, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -297.06378173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0204310417175293, "rewards_train/margins": 16.319151401519775, "rewards_train/rejected": -17.339582443237305, "step": 2805 }, { "epoch": 1.38, "learning_rate": 6.063790741128374e-07, "loss": 0.0, "step": 2806 }, { "epoch": 1.38, "logps_train/chosen": -78.31522369384766, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -300.5684509277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4266884326934814, "rewards_train/margins": 15.878300428390503, "rewards_train/rejected": -17.304988861083984, "step": 2806 }, { "epoch": 1.38, "learning_rate": 6.061138834773603e-07, "loss": 0.0, "step": 2807 }, { "epoch": 1.38, "logps_train/chosen": -75.30191040039062, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -291.025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1583161354064941, "rewards_train/margins": 15.797200679779053, "rewards_train/rejected": -16.955516815185547, "step": 2807 }, { "epoch": 1.38, "learning_rate": 6.058486615800172e-07, "loss": 0.0, "step": 2808 }, { "epoch": 1.38, "logps_train/chosen": -68.12556457519531, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -285.83905029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.620319664478302, "rewards_train/margins": 15.48028153181076, "rewards_train/rejected": -16.100601196289062, "step": 2808 }, { "epoch": 1.38, "learning_rate": 6.055834084989442e-07, "loss": 0.0002, "step": 2809 }, { "epoch": 1.38, "logps_train/chosen": -69.47161865234375, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -291.0379333496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7085386514663696, "rewards_train/margins": 16.16830003261566, "rewards_train/rejected": -16.87683868408203, "step": 2809 }, { "epoch": 1.38, "learning_rate": 6.053181243122865e-07, "loss": 0.0, "step": 2810 }, { "epoch": 1.38, "logps_train/chosen": -76.82710266113281, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -282.686279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.157588243484497, "rewards_train/margins": 15.076468229293823, "rewards_train/rejected": -16.23405647277832, "step": 2810 }, { "epoch": 1.38, "learning_rate": 6.050528090981988e-07, "loss": 0.0, "step": 2811 }, { "epoch": 1.38, "logps_train/chosen": -74.58381652832031, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -297.1933288574219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1758135557174683, "rewards_train/margins": 16.03736698627472, "rewards_train/rejected": -17.213180541992188, "step": 2811 }, { "epoch": 1.38, "learning_rate": 6.047874629348447e-07, "loss": 0.0004, "step": 2812 }, { "epoch": 1.38, "logps_train/chosen": -74.08349609375, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -288.27008056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0301753282546997, "rewards_train/margins": 15.310604929924011, "rewards_train/rejected": -16.34078025817871, "step": 2812 }, { "epoch": 1.39, "learning_rate": 6.045220859003967e-07, "loss": 0.0001, "step": 2813 }, { "epoch": 1.39, "logps_train/chosen": -73.64083099365234, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -293.4024658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9930869340896606, "rewards_train/margins": 15.97450339794159, "rewards_train/rejected": -16.96759033203125, "step": 2813 }, { "epoch": 1.39, "learning_rate": 6.042566780730373e-07, "loss": 0.0, "step": 2814 }, { "epoch": 1.39, "logps_train/chosen": -71.72794342041016, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -294.39044189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9182777404785156, "rewards_train/margins": 16.2879581451416, "rewards_train/rejected": -17.206235885620117, "step": 2814 }, { "epoch": 1.39, "learning_rate": 6.039912395309567e-07, "loss": 0.0, "step": 2815 }, { "epoch": 1.39, "logps_train/chosen": -78.00215911865234, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -297.8199462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1835167407989502, "rewards_train/margins": 15.77533221244812, "rewards_train/rejected": -16.95884895324707, "step": 2815 }, { "epoch": 1.39, "learning_rate": 6.037257703523553e-07, "loss": 0.0005, "step": 2816 }, { "epoch": 1.39, "logps_train/chosen": -77.23687744140625, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -302.1885070800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2222716808319092, "rewards_train/margins": 16.166796922683716, "rewards_train/rejected": -17.389068603515625, "step": 2816 }, { "epoch": 1.39, "learning_rate": 6.034602706154422e-07, "loss": 0.0, "step": 2817 }, { "epoch": 1.39, "logps_train/chosen": -72.81814575195312, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -285.3461608886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8166295886039734, "rewards_train/margins": 15.524137318134308, "rewards_train/rejected": -16.34076690673828, "step": 2817 }, { "epoch": 1.39, "learning_rate": 6.031947403984352e-07, "loss": 0.0, "step": 2818 }, { "epoch": 1.39, "logps_train/chosen": -75.41941833496094, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -299.96502685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0034657716751099, "rewards_train/margins": 16.18922984600067, "rewards_train/rejected": -17.19269561767578, "step": 2818 }, { "epoch": 1.39, "learning_rate": 6.029291797795612e-07, "loss": 0.0, "step": 2819 }, { "epoch": 1.39, "logps_train/chosen": -69.85679626464844, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -289.9671630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6193221807479858, "rewards_train/margins": 16.08530580997467, "rewards_train/rejected": -16.704627990722656, "step": 2819 }, { "epoch": 1.39, "learning_rate": 6.026635888370564e-07, "loss": 0.0, "step": 2820 }, { "epoch": 1.39, "logps_train/chosen": -70.77308654785156, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -290.9161376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.631117582321167, "rewards_train/margins": 15.80170750617981, "rewards_train/rejected": -16.432825088500977, "step": 2820 }, { "epoch": 1.39, "learning_rate": 6.023979676491655e-07, "loss": 0.0, "step": 2821 }, { "epoch": 1.39, "logps_train/chosen": -76.4009780883789, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -293.5801696777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2711033821105957, "rewards_train/margins": 15.815040111541748, "rewards_train/rejected": -17.086143493652344, "step": 2821 }, { "epoch": 1.39, "learning_rate": 6.021323162941424e-07, "loss": 0.0, "step": 2822 }, { "epoch": 1.39, "logps_train/chosen": -74.51659393310547, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -297.1903381347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0005366802215576, "rewards_train/margins": 16.24750256538391, "rewards_train/rejected": -17.24803924560547, "step": 2822 }, { "epoch": 1.39, "learning_rate": 6.018666348502499e-07, "loss": 0.0, "step": 2823 }, { "epoch": 1.39, "logps_train/chosen": -71.33656311035156, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -288.3446960449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8371231555938721, "rewards_train/margins": 15.688118696212769, "rewards_train/rejected": -16.52524185180664, "step": 2823 }, { "epoch": 1.39, "learning_rate": 6.016009233957592e-07, "loss": 0.0, "step": 2824 }, { "epoch": 1.39, "logps_train/chosen": -68.84043884277344, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -286.07037353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5728130340576172, "rewards_train/margins": 15.486272811889648, "rewards_train/rejected": -16.059085845947266, "step": 2824 }, { "epoch": 1.39, "learning_rate": 6.013351820089509e-07, "loss": 0.0, "step": 2825 }, { "epoch": 1.39, "logps_train/chosen": -69.61386108398438, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -282.59466552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7962490320205688, "rewards_train/margins": 15.33245313167572, "rewards_train/rejected": -16.12870216369629, "step": 2825 }, { "epoch": 1.39, "learning_rate": 6.010694107681143e-07, "loss": 0.0, "step": 2826 }, { "epoch": 1.39, "logps_train/chosen": -72.99874877929688, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -291.1993408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0621315240859985, "rewards_train/margins": 15.660585761070251, "rewards_train/rejected": -16.72271728515625, "step": 2826 }, { "epoch": 1.39, "learning_rate": 6.008036097515474e-07, "loss": 0.0, "step": 2827 }, { "epoch": 1.39, "logps_train/chosen": -74.99207305908203, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -297.5185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.005677342414856, "rewards_train/margins": 16.170395731925964, "rewards_train/rejected": -17.17607307434082, "step": 2827 }, { "epoch": 1.39, "learning_rate": 6.005377790375566e-07, "loss": 0.0001, "step": 2828 }, { "epoch": 1.39, "logps_train/chosen": -70.36062622070312, "logps_train/ref_chosen": -59.875, "logps_train/ref_rejected": -119.125, "logps_train/rejected": -281.9273376464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0481715202331543, "rewards_train/margins": 15.22942590713501, "rewards_train/rejected": -16.277597427368164, "step": 2828 }, { "epoch": 1.39, "learning_rate": 6.002719187044579e-07, "loss": 0.0, "step": 2829 }, { "epoch": 1.39, "logps_train/chosen": -71.10789489746094, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -295.6502990722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6477519273757935, "rewards_train/margins": 16.405901789665222, "rewards_train/rejected": -17.053653717041016, "step": 2829 }, { "epoch": 1.39, "learning_rate": 6.000060288305752e-07, "loss": 0.0001, "step": 2830 }, { "epoch": 1.39, "logps_train/chosen": -76.72733306884766, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -303.55560302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0315955877304077, "rewards_train/margins": 16.424354195594788, "rewards_train/rejected": -17.455949783325195, "step": 2830 }, { "epoch": 1.39, "learning_rate": 5.997401094942416e-07, "loss": 0.0, "step": 2831 }, { "epoch": 1.39, "logps_train/chosen": -77.60751342773438, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -286.0794677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.325252890586853, "rewards_train/margins": 14.928399682044983, "rewards_train/rejected": -16.253652572631836, "step": 2831 }, { "epoch": 1.39, "learning_rate": 5.994741607737989e-07, "loss": 0.0, "step": 2832 }, { "epoch": 1.39, "logps_train/chosen": -70.2304916381836, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -290.7599792480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6780540943145752, "rewards_train/margins": 16.11566996574402, "rewards_train/rejected": -16.793724060058594, "step": 2832 }, { "epoch": 1.39, "learning_rate": 5.992081827475969e-07, "loss": 0.0002, "step": 2833 }, { "epoch": 1.39, "logps_train/chosen": -76.78605651855469, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -289.60723876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0382732152938843, "rewards_train/margins": 15.388761162757874, "rewards_train/rejected": -16.427034378051758, "step": 2833 }, { "epoch": 1.4, "learning_rate": 5.989421754939952e-07, "loss": 0.0001, "step": 2834 }, { "epoch": 1.4, "logps_train/chosen": -72.49468231201172, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -282.87445068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9452201128005981, "rewards_train/margins": 15.27152144908905, "rewards_train/rejected": -16.21674156188965, "step": 2834 }, { "epoch": 1.4, "learning_rate": 5.986761390913608e-07, "loss": 0.0002, "step": 2835 }, { "epoch": 1.4, "logps_train/chosen": -73.72505187988281, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -291.2543640136719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8084427118301392, "rewards_train/margins": 15.934181571006775, "rewards_train/rejected": -16.742624282836914, "step": 2835 }, { "epoch": 1.4, "learning_rate": 5.984100736180699e-07, "loss": 0.0001, "step": 2836 }, { "epoch": 1.4, "logps_train/chosen": -76.62876892089844, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -302.0110778808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9047711491584778, "rewards_train/margins": 16.12182492017746, "rewards_train/rejected": -17.026596069335938, "step": 2836 }, { "epoch": 1.4, "learning_rate": 5.981439791525072e-07, "loss": 0.0, "step": 2837 }, { "epoch": 1.4, "logps_train/chosen": -67.80303955078125, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -118.25, "logps_train/rejected": -279.0567932128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6139950156211853, "rewards_train/margins": 15.465514719486237, "rewards_train/rejected": -16.079509735107422, "step": 2837 }, { "epoch": 1.4, "learning_rate": 5.978778557730663e-07, "loss": 0.0001, "step": 2838 }, { "epoch": 1.4, "logps_train/chosen": -75.4000244140625, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -294.8782043457031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0507935285568237, "rewards_train/margins": 15.545132756233215, "rewards_train/rejected": -16.59592628479004, "step": 2838 }, { "epoch": 1.4, "learning_rate": 5.976117035581482e-07, "loss": 0.0002, "step": 2839 }, { "epoch": 1.4, "logps_train/chosen": -70.890380859375, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -279.56854248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9204837083816528, "rewards_train/margins": 14.945987582206726, "rewards_train/rejected": -15.866471290588379, "step": 2839 }, { "epoch": 1.4, "learning_rate": 5.97345522586164e-07, "loss": 0.0001, "step": 2840 }, { "epoch": 1.4, "logps_train/chosen": -77.23114013671875, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -297.56353759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4224796295166016, "rewards_train/margins": 15.727720260620117, "rewards_train/rejected": -17.15019989013672, "step": 2840 }, { "epoch": 1.4, "learning_rate": 5.970793129355317e-07, "loss": 0.0, "step": 2841 }, { "epoch": 1.4, "logps_train/chosen": -77.62625885009766, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -294.4461669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.223954200744629, "rewards_train/margins": 15.428963661193848, "rewards_train/rejected": -16.652917861938477, "step": 2841 }, { "epoch": 1.4, "learning_rate": 5.968130746846788e-07, "loss": 0.0, "step": 2842 }, { "epoch": 1.4, "logps_train/chosen": -74.66943359375, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -300.4143371582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9368160963058472, "rewards_train/margins": 16.35901176929474, "rewards_train/rejected": -17.295827865600586, "step": 2842 }, { "epoch": 1.4, "learning_rate": 5.965468079120409e-07, "loss": 0.0002, "step": 2843 }, { "epoch": 1.4, "logps_train/chosen": -77.70848083496094, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -296.0247802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2470687627792358, "rewards_train/margins": 15.714637398719788, "rewards_train/rejected": -16.961706161499023, "step": 2843 }, { "epoch": 1.4, "learning_rate": 5.96280512696062e-07, "loss": 0.0, "step": 2844 }, { "epoch": 1.4, "logps_train/chosen": -70.53416442871094, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -288.39862060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7241677045822144, "rewards_train/margins": 15.518231511116028, "rewards_train/rejected": -16.242399215698242, "step": 2844 }, { "epoch": 1.4, "learning_rate": 5.960141891151942e-07, "loss": 0.0, "step": 2845 }, { "epoch": 1.4, "logps_train/chosen": -70.08460998535156, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -291.2668151855469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7167621850967407, "rewards_train/margins": 16.02828061580658, "rewards_train/rejected": -16.74504280090332, "step": 2845 }, { "epoch": 1.4, "learning_rate": 5.957478372478983e-07, "loss": 0.0, "step": 2846 }, { "epoch": 1.4, "logps_train/chosen": -75.45100402832031, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -299.0482482910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.040949821472168, "rewards_train/margins": 15.953230857849121, "rewards_train/rejected": -16.99418067932129, "step": 2846 }, { "epoch": 1.4, "learning_rate": 5.954814571726438e-07, "loss": 0.0, "step": 2847 }, { "epoch": 1.4, "logps_train/chosen": -71.12763977050781, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -288.99957275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0089073181152344, "rewards_train/margins": 15.70218276977539, "rewards_train/rejected": -16.711090087890625, "step": 2847 }, { "epoch": 1.4, "learning_rate": 5.952150489679074e-07, "loss": 0.0001, "step": 2848 }, { "epoch": 1.4, "logps_train/chosen": -73.48631286621094, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -288.9228210449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8376939296722412, "rewards_train/margins": 15.43202805519104, "rewards_train/rejected": -16.26972198486328, "step": 2848 }, { "epoch": 1.4, "learning_rate": 5.949486127121753e-07, "loss": 0.0001, "step": 2849 }, { "epoch": 1.4, "logps_train/chosen": -73.01895141601562, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -295.258056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9051663875579834, "rewards_train/margins": 16.201698541641235, "rewards_train/rejected": -17.10686492919922, "step": 2849 }, { "epoch": 1.4, "learning_rate": 5.946821484839412e-07, "loss": 0.0, "step": 2850 }, { "epoch": 1.4, "logps_train/chosen": -71.20897674560547, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -295.7235107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7597649097442627, "rewards_train/margins": 16.350136518478394, "rewards_train/rejected": -17.109901428222656, "step": 2850 }, { "epoch": 1.4, "learning_rate": 5.944156563617072e-07, "loss": 0.0, "step": 2851 }, { "epoch": 1.4, "logps_train/chosen": -73.43309020996094, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -284.78997802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9042462110519409, "rewards_train/margins": 15.442866444587708, "rewards_train/rejected": -16.34711265563965, "step": 2851 }, { "epoch": 1.4, "learning_rate": 5.941491364239837e-07, "loss": 0.0002, "step": 2852 }, { "epoch": 1.4, "logps_train/chosen": -76.2105941772461, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -298.13690185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9372701644897461, "rewards_train/margins": 15.821246147155762, "rewards_train/rejected": -16.758516311645508, "step": 2852 }, { "epoch": 1.4, "learning_rate": 5.938825887492895e-07, "loss": 0.0001, "step": 2853 }, { "epoch": 1.4, "logps_train/chosen": -72.09989166259766, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -289.47540283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7372353076934814, "rewards_train/margins": 15.91157603263855, "rewards_train/rejected": -16.64881134033203, "step": 2853 }, { "epoch": 1.41, "learning_rate": 5.936160134161509e-07, "loss": 0.0, "step": 2854 }, { "epoch": 1.41, "logps_train/chosen": -72.72620391845703, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -300.2182922363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8734502792358398, "rewards_train/margins": 16.538809776306152, "rewards_train/rejected": -17.412260055541992, "step": 2854 }, { "epoch": 1.41, "learning_rate": 5.933494105031031e-07, "loss": 0.0, "step": 2855 }, { "epoch": 1.41, "logps_train/chosen": -66.32603454589844, "logps_train/ref_chosen": -61.71875, "logps_train/ref_rejected": -119.0, "logps_train/rejected": -278.2467041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4604357182979584, "rewards_train/margins": 15.467799335718155, "rewards_train/rejected": -15.928235054016113, "step": 2855 }, { "epoch": 1.41, "learning_rate": 5.930827800886892e-07, "loss": 0.0, "step": 2856 }, { "epoch": 1.41, "logps_train/chosen": -73.99861907958984, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -304.9879150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.821688175201416, "rewards_train/margins": 16.571244716644287, "rewards_train/rejected": -17.392932891845703, "step": 2856 }, { "epoch": 1.41, "learning_rate": 5.9281612225146e-07, "loss": 0.0, "step": 2857 }, { "epoch": 1.41, "logps_train/chosen": -78.79670715332031, "logps_train/ref_chosen": -68.8125, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -311.7294006347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.000520944595337, "rewards_train/margins": 17.08116126060486, "rewards_train/rejected": -18.081682205200195, "step": 2857 }, { "epoch": 1.41, "learning_rate": 5.925494370699752e-07, "loss": 0.0, "step": 2858 }, { "epoch": 1.41, "logps_train/chosen": -80.15415954589844, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -298.193603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3439807891845703, "rewards_train/margins": 15.912637710571289, "rewards_train/rejected": -17.25661849975586, "step": 2858 }, { "epoch": 1.41, "learning_rate": 5.922827246228015e-07, "loss": 0.0001, "step": 2859 }, { "epoch": 1.41, "logps_train/chosen": -76.1581802368164, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -297.51727294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9565895795822144, "rewards_train/margins": 15.740450978279114, "rewards_train/rejected": -16.697040557861328, "step": 2859 }, { "epoch": 1.41, "learning_rate": 5.920159849885144e-07, "loss": 0.0002, "step": 2860 }, { "epoch": 1.41, "logps_train/chosen": -73.405029296875, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -295.8596496582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1046149730682373, "rewards_train/margins": 16.17441487312317, "rewards_train/rejected": -17.279029846191406, "step": 2860 }, { "epoch": 1.41, "learning_rate": 5.917492182456974e-07, "loss": 0.0, "step": 2861 }, { "epoch": 1.41, "logps_train/chosen": -73.83462524414062, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -298.76739501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8194736838340759, "rewards_train/margins": 16.028699457645416, "rewards_train/rejected": -16.848173141479492, "step": 2861 }, { "epoch": 1.41, "learning_rate": 5.914824244729413e-07, "loss": 0.0, "step": 2862 }, { "epoch": 1.41, "logps_train/chosen": -76.9816665649414, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -304.77178955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1073460578918457, "rewards_train/margins": 16.205771923065186, "rewards_train/rejected": -17.31311798095703, "step": 2862 }, { "epoch": 1.41, "learning_rate": 5.91215603748846e-07, "loss": 0.0, "step": 2863 }, { "epoch": 1.41, "logps_train/chosen": -77.72941589355469, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -291.906982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1006273031234741, "rewards_train/margins": 15.714239716529846, "rewards_train/rejected": -16.81486701965332, "step": 2863 }, { "epoch": 1.41, "learning_rate": 5.909487561520181e-07, "loss": 0.0, "step": 2864 }, { "epoch": 1.41, "logps_train/chosen": -72.46575164794922, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -296.0386047363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7324637174606323, "rewards_train/margins": 15.993175625801086, "rewards_train/rejected": -16.72563934326172, "step": 2864 }, { "epoch": 1.41, "learning_rate": 5.90681881761073e-07, "loss": 0.0, "step": 2865 }, { "epoch": 1.41, "logps_train/chosen": -72.77458190917969, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -282.2442321777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.036637783050537, "rewards_train/margins": 14.989447116851807, "rewards_train/rejected": -16.026084899902344, "step": 2865 }, { "epoch": 1.41, "learning_rate": 5.904149806546337e-07, "loss": 0.0003, "step": 2866 }, { "epoch": 1.41, "logps_train/chosen": -74.30728912353516, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -290.76263427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8596839904785156, "rewards_train/margins": 15.663991928100586, "rewards_train/rejected": -16.5236759185791, "step": 2866 }, { "epoch": 1.41, "learning_rate": 5.90148052911331e-07, "loss": 0.0001, "step": 2867 }, { "epoch": 1.41, "logps_train/chosen": -74.84024047851562, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -288.9435729980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9119293689727783, "rewards_train/margins": 15.717339277267456, "rewards_train/rejected": -16.629268646240234, "step": 2867 }, { "epoch": 1.41, "learning_rate": 5.898810986098034e-07, "loss": 0.0, "step": 2868 }, { "epoch": 1.41, "logps_train/chosen": -77.26251220703125, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -134.25, "logps_train/rejected": -312.75836181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0328917503356934, "rewards_train/margins": 16.814133167266846, "rewards_train/rejected": -17.84702491760254, "step": 2868 }, { "epoch": 1.41, "learning_rate": 5.896141178286979e-07, "loss": 0.0, "step": 2869 }, { "epoch": 1.41, "logps_train/chosen": -76.47667694091797, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -299.8733215332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2285999059677124, "rewards_train/margins": 16.143598198890686, "rewards_train/rejected": -17.3721981048584, "step": 2869 }, { "epoch": 1.41, "learning_rate": 5.893471106466685e-07, "loss": 0.0003, "step": 2870 }, { "epoch": 1.41, "logps_train/chosen": -72.87037658691406, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -283.37677001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8978287577629089, "rewards_train/margins": 15.304838478565216, "rewards_train/rejected": -16.202667236328125, "step": 2870 }, { "epoch": 1.41, "learning_rate": 5.890800771423774e-07, "loss": 0.0005, "step": 2871 }, { "epoch": 1.41, "logps_train/chosen": -75.41647338867188, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -296.7560729980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0002408027648926, "rewards_train/margins": 16.054370403289795, "rewards_train/rejected": -17.054611206054688, "step": 2871 }, { "epoch": 1.41, "learning_rate": 5.888130173944947e-07, "loss": 0.0002, "step": 2872 }, { "epoch": 1.41, "logps_train/chosen": -72.23143005371094, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -287.4599914550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8836405873298645, "rewards_train/margins": 15.649272620677948, "rewards_train/rejected": -16.532913208007812, "step": 2872 }, { "epoch": 1.41, "learning_rate": 5.885459314816974e-07, "loss": 0.0, "step": 2873 }, { "epoch": 1.41, "logps_train/chosen": -76.2448959350586, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -290.2340087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0543229579925537, "rewards_train/margins": 15.397888422012329, "rewards_train/rejected": -16.452211380004883, "step": 2873 }, { "epoch": 1.42, "learning_rate": 5.882788194826713e-07, "loss": 0.0001, "step": 2874 }, { "epoch": 1.42, "logps_train/chosen": -73.40557861328125, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -293.36444091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7512022852897644, "rewards_train/margins": 16.062438309192657, "rewards_train/rejected": -16.813640594482422, "step": 2874 }, { "epoch": 1.42, "learning_rate": 5.880116814761094e-07, "loss": 0.0, "step": 2875 }, { "epoch": 1.42, "logps_train/chosen": -70.57622528076172, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -292.0292663574219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4332085847854614, "rewards_train/margins": 15.926551699638367, "rewards_train/rejected": -16.359760284423828, "step": 2875 }, { "epoch": 1.42, "learning_rate": 5.877445175407123e-07, "loss": 0.0, "step": 2876 }, { "epoch": 1.42, "logps_train/chosen": -79.53883361816406, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -302.9947204589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1915782690048218, "rewards_train/margins": 16.316386818885803, "rewards_train/rejected": -17.507965087890625, "step": 2876 }, { "epoch": 1.42, "learning_rate": 5.874773277551882e-07, "loss": 0.0, "step": 2877 }, { "epoch": 1.42, "logps_train/chosen": -69.621337890625, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -281.16845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7524175047874451, "rewards_train/margins": 15.266770422458649, "rewards_train/rejected": -16.019187927246094, "step": 2877 }, { "epoch": 1.42, "learning_rate": 5.87210112198253e-07, "loss": 0.0001, "step": 2878 }, { "epoch": 1.42, "logps_train/chosen": -74.75962829589844, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -298.1656799316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9494977593421936, "rewards_train/margins": 16.32917732000351, "rewards_train/rejected": -17.278675079345703, "step": 2878 }, { "epoch": 1.42, "learning_rate": 5.869428709486303e-07, "loss": 0.0, "step": 2879 }, { "epoch": 1.42, "logps_train/chosen": -72.79342651367188, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -297.4817810058594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9331024289131165, "rewards_train/margins": 16.387049853801727, "rewards_train/rejected": -17.320152282714844, "step": 2879 }, { "epoch": 1.42, "learning_rate": 5.866756040850513e-07, "loss": 0.0, "step": 2880 }, { "epoch": 1.42, "logps_train/chosen": -70.88472747802734, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -290.46282958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8792439699172974, "rewards_train/margins": 15.80175769329071, "rewards_train/rejected": -16.681001663208008, "step": 2880 }, { "epoch": 1.42, "learning_rate": 5.864083116862543e-07, "loss": 0.0, "step": 2881 }, { "epoch": 1.42, "logps_train/chosen": -72.16404724121094, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -300.15228271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.759959876537323, "rewards_train/margins": 16.4336856007576, "rewards_train/rejected": -17.193645477294922, "step": 2881 }, { "epoch": 1.42, "learning_rate": 5.861409938309857e-07, "loss": 0.0, "step": 2882 }, { "epoch": 1.42, "logps_train/chosen": -70.85807800292969, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -296.4054870605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6549485921859741, "rewards_train/margins": 16.076714158058167, "rewards_train/rejected": -16.73166275024414, "step": 2882 }, { "epoch": 1.42, "learning_rate": 5.858736505979989e-07, "loss": 0.0, "step": 2883 }, { "epoch": 1.42, "logps_train/chosen": -73.881591796875, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -288.6929931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1170166730880737, "rewards_train/margins": 15.497252583503723, "rewards_train/rejected": -16.614269256591797, "step": 2883 }, { "epoch": 1.42, "learning_rate": 5.856062820660554e-07, "loss": 0.0, "step": 2884 }, { "epoch": 1.42, "logps_train/chosen": -77.28306579589844, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -309.6546630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0505237579345703, "rewards_train/margins": 16.766555786132812, "rewards_train/rejected": -17.817079544067383, "step": 2884 }, { "epoch": 1.42, "learning_rate": 5.853388883139234e-07, "loss": 0.0, "step": 2885 }, { "epoch": 1.42, "logps_train/chosen": -75.51112365722656, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -287.7391357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.22435462474823, "rewards_train/margins": 15.39975655078888, "rewards_train/rejected": -16.62411117553711, "step": 2885 }, { "epoch": 1.42, "learning_rate": 5.850714694203791e-07, "loss": 0.0001, "step": 2886 }, { "epoch": 1.42, "logps_train/chosen": -72.99223327636719, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -295.04937744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6931685209274292, "rewards_train/margins": 16.056787610054016, "rewards_train/rejected": -16.749956130981445, "step": 2886 }, { "epoch": 1.42, "learning_rate": 5.848040254642059e-07, "loss": 0.0, "step": 2887 }, { "epoch": 1.42, "logps_train/chosen": -74.0888442993164, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -296.6271667480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0712380409240723, "rewards_train/margins": 16.30671453475952, "rewards_train/rejected": -17.377952575683594, "step": 2887 }, { "epoch": 1.42, "learning_rate": 5.845365565241944e-07, "loss": 0.0, "step": 2888 }, { "epoch": 1.42, "logps_train/chosen": -77.53413391113281, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -293.64178466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2794394493103027, "rewards_train/margins": 15.619600772857666, "rewards_train/rejected": -16.89904022216797, "step": 2888 }, { "epoch": 1.42, "learning_rate": 5.842690626791433e-07, "loss": 0.0001, "step": 2889 }, { "epoch": 1.42, "logps_train/chosen": -68.236572265625, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -289.4908447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5276119709014893, "rewards_train/margins": 15.81073260307312, "rewards_train/rejected": -16.33834457397461, "step": 2889 }, { "epoch": 1.42, "learning_rate": 5.840015440078574e-07, "loss": 0.0, "step": 2890 }, { "epoch": 1.42, "logps_train/chosen": -73.60861206054688, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -303.67938232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8911828398704529, "rewards_train/margins": 16.80478197336197, "rewards_train/rejected": -17.695964813232422, "step": 2890 }, { "epoch": 1.42, "learning_rate": 5.837340005891497e-07, "loss": 0.0, "step": 2891 }, { "epoch": 1.42, "logps_train/chosen": -71.59349060058594, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -283.47625732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.860130250453949, "rewards_train/margins": 15.338129103183746, "rewards_train/rejected": -16.198259353637695, "step": 2891 }, { "epoch": 1.42, "learning_rate": 5.834664325018408e-07, "loss": 0.0, "step": 2892 }, { "epoch": 1.42, "logps_train/chosen": -74.23111724853516, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -295.02984619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0481845140457153, "rewards_train/margins": 16.00528609752655, "rewards_train/rejected": -17.053470611572266, "step": 2892 }, { "epoch": 1.42, "learning_rate": 5.831988398247575e-07, "loss": 0.0001, "step": 2893 }, { "epoch": 1.42, "logps_train/chosen": -65.49122619628906, "logps_train/ref_chosen": -59.4375, "logps_train/ref_rejected": -117.0625, "logps_train/rejected": -274.43994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6040292978286743, "rewards_train/margins": 15.134937644004822, "rewards_train/rejected": -15.738966941833496, "step": 2893 }, { "epoch": 1.42, "learning_rate": 5.829312226367343e-07, "loss": 0.0, "step": 2894 }, { "epoch": 1.42, "logps_train/chosen": -76.83230590820312, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -309.34991455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3788607120513916, "rewards_train/margins": 16.632056951522827, "rewards_train/rejected": -18.01091766357422, "step": 2894 }, { "epoch": 1.43, "learning_rate": 5.826635810166136e-07, "loss": 0.0, "step": 2895 }, { "epoch": 1.43, "logps_train/chosen": -69.54395294189453, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -305.07452392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4543953239917755, "rewards_train/margins": 17.248561829328537, "rewards_train/rejected": -17.702957153320312, "step": 2895 }, { "epoch": 1.43, "learning_rate": 5.823959150432441e-07, "loss": 0.0, "step": 2896 }, { "epoch": 1.43, "logps_train/chosen": -67.10779571533203, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -282.41229248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.478626549243927, "rewards_train/margins": 15.672317206859589, "rewards_train/rejected": -16.150943756103516, "step": 2896 }, { "epoch": 1.43, "learning_rate": 5.821282247954821e-07, "loss": 0.0001, "step": 2897 }, { "epoch": 1.43, "logps_train/chosen": -75.12483215332031, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -311.16754150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9753736257553101, "rewards_train/margins": 16.857006669044495, "rewards_train/rejected": -17.832380294799805, "step": 2897 }, { "epoch": 1.43, "learning_rate": 5.818605103521909e-07, "loss": 0.0, "step": 2898 }, { "epoch": 1.43, "logps_train/chosen": -78.86449432373047, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -305.5201721191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3214348554611206, "rewards_train/margins": 16.328824162483215, "rewards_train/rejected": -17.650259017944336, "step": 2898 }, { "epoch": 1.43, "learning_rate": 5.815927717922407e-07, "loss": 0.0, "step": 2899 }, { "epoch": 1.43, "logps_train/chosen": -72.88914489746094, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -287.4879455566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9233385324478149, "rewards_train/margins": 15.415494322776794, "rewards_train/rejected": -16.33883285522461, "step": 2899 }, { "epoch": 1.43, "learning_rate": 5.813250091945095e-07, "loss": 0.0001, "step": 2900 }, { "epoch": 1.43, "logps_train/chosen": -72.08932495117188, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -284.22015380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0085422992706299, "rewards_train/margins": 15.370163679122925, "rewards_train/rejected": -16.378705978393555, "step": 2900 }, { "epoch": 1.43, "learning_rate": 5.810572226378821e-07, "loss": 0.0, "step": 2901 }, { "epoch": 1.43, "logps_train/chosen": -76.28341674804688, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -298.5644226074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1395485401153564, "rewards_train/margins": 15.930367231369019, "rewards_train/rejected": -17.069915771484375, "step": 2901 }, { "epoch": 1.43, "learning_rate": 5.807894122012497e-07, "loss": 0.0, "step": 2902 }, { "epoch": 1.43, "logps_train/chosen": -71.20284271240234, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -298.5839538574219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7520710229873657, "rewards_train/margins": 16.42663609981537, "rewards_train/rejected": -17.178707122802734, "step": 2902 }, { "epoch": 1.43, "learning_rate": 5.805215779635113e-07, "loss": 0.0, "step": 2903 }, { "epoch": 1.43, "logps_train/chosen": -77.6084213256836, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -301.4896545410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3232932090759277, "rewards_train/margins": 16.03055429458618, "rewards_train/rejected": -17.35384750366211, "step": 2903 }, { "epoch": 1.43, "learning_rate": 5.802537200035728e-07, "loss": 0.0, "step": 2904 }, { "epoch": 1.43, "logps_train/chosen": -75.91413879394531, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -296.51025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2191970348358154, "rewards_train/margins": 16.207221269607544, "rewards_train/rejected": -17.42641830444336, "step": 2904 }, { "epoch": 1.43, "learning_rate": 5.799858384003469e-07, "loss": 0.0, "step": 2905 }, { "epoch": 1.43, "logps_train/chosen": -72.03900146484375, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -299.4559326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0043388605117798, "rewards_train/margins": 16.651800513267517, "rewards_train/rejected": -17.656139373779297, "step": 2905 }, { "epoch": 1.43, "learning_rate": 5.797179332327531e-07, "loss": 0.0, "step": 2906 }, { "epoch": 1.43, "logps_train/chosen": -72.81645202636719, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -288.8427429199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0942919254302979, "rewards_train/margins": 15.55541205406189, "rewards_train/rejected": -16.649703979492188, "step": 2906 }, { "epoch": 1.43, "learning_rate": 5.794500045797184e-07, "loss": 0.0, "step": 2907 }, { "epoch": 1.43, "logps_train/chosen": -78.38694763183594, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -298.1593017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.146067500114441, "rewards_train/margins": 15.921132206916809, "rewards_train/rejected": -17.06719970703125, "step": 2907 }, { "epoch": 1.43, "learning_rate": 5.791820525201761e-07, "loss": 0.0, "step": 2908 }, { "epoch": 1.43, "logps_train/chosen": -73.680908203125, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -284.751220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0890381336212158, "rewards_train/margins": 15.249367475509644, "rewards_train/rejected": -16.33840560913086, "step": 2908 }, { "epoch": 1.43, "learning_rate": 5.789140771330669e-07, "loss": 0.0, "step": 2909 }, { "epoch": 1.43, "logps_train/chosen": -75.44847106933594, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -292.12384033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9402079582214355, "rewards_train/margins": 15.541901111602783, "rewards_train/rejected": -16.48210906982422, "step": 2909 }, { "epoch": 1.43, "learning_rate": 5.786460784973381e-07, "loss": 0.0004, "step": 2910 }, { "epoch": 1.43, "logps_train/chosen": -74.91542053222656, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -295.50323486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95555579662323, "rewards_train/margins": 15.866103291511536, "rewards_train/rejected": -16.821659088134766, "step": 2910 }, { "epoch": 1.43, "learning_rate": 5.783780566919438e-07, "loss": 0.0, "step": 2911 }, { "epoch": 1.43, "logps_train/chosen": -77.10577392578125, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -302.5072021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1262506246566772, "rewards_train/margins": 16.2661691904068, "rewards_train/rejected": -17.392419815063477, "step": 2911 }, { "epoch": 1.43, "learning_rate": 5.781100117958453e-07, "loss": 0.0, "step": 2912 }, { "epoch": 1.43, "logps_train/chosen": -73.52105712890625, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -291.5880126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9826720952987671, "rewards_train/margins": 15.77603304386139, "rewards_train/rejected": -16.758705139160156, "step": 2912 }, { "epoch": 1.43, "learning_rate": 5.778419438880102e-07, "loss": 0.0001, "step": 2913 }, { "epoch": 1.43, "logps_train/chosen": -76.010009765625, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -300.11968994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0894775390625, "rewards_train/margins": 16.155208587646484, "rewards_train/rejected": -17.244686126708984, "step": 2913 }, { "epoch": 1.43, "learning_rate": 5.77573853047413e-07, "loss": 0.0, "step": 2914 }, { "epoch": 1.43, "logps_train/chosen": -73.20381164550781, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -303.91021728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7885936498641968, "rewards_train/margins": 16.387046456336975, "rewards_train/rejected": -17.175640106201172, "step": 2914 }, { "epoch": 1.44, "learning_rate": 5.773057393530354e-07, "loss": 0.0, "step": 2915 }, { "epoch": 1.44, "logps_train/chosen": -78.29811096191406, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -134.5, "logps_train/rejected": -313.810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.141847014427185, "rewards_train/margins": 16.790724396705627, "rewards_train/rejected": -17.932571411132812, "step": 2915 }, { "epoch": 1.44, "learning_rate": 5.770376028838654e-07, "loss": 0.0, "step": 2916 }, { "epoch": 1.44, "logps_train/chosen": -69.32192993164062, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -297.3494873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6957672238349915, "rewards_train/margins": 16.3714057803154, "rewards_train/rejected": -17.06717300415039, "step": 2916 }, { "epoch": 1.44, "learning_rate": 5.767694437188975e-07, "loss": 0.0001, "step": 2917 }, { "epoch": 1.44, "logps_train/chosen": -76.00819396972656, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -290.0014953613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.111659288406372, "rewards_train/margins": 15.380287885665894, "rewards_train/rejected": -16.491947174072266, "step": 2917 }, { "epoch": 1.44, "learning_rate": 5.765012619371338e-07, "loss": 0.0, "step": 2918 }, { "epoch": 1.44, "logps_train/chosen": -76.52472686767578, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -299.33026123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2165844440460205, "rewards_train/margins": 16.111315488815308, "rewards_train/rejected": -17.327899932861328, "step": 2918 }, { "epoch": 1.44, "learning_rate": 5.76233057617582e-07, "loss": 0.0001, "step": 2919 }, { "epoch": 1.44, "logps_train/chosen": -78.30117797851562, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -300.3825378417969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3897367715835571, "rewards_train/margins": 15.77737557888031, "rewards_train/rejected": -17.167112350463867, "step": 2919 }, { "epoch": 1.44, "learning_rate": 5.759648308392568e-07, "loss": 0.0, "step": 2920 }, { "epoch": 1.44, "logps_train/chosen": -76.25547790527344, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -297.4716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2713487148284912, "rewards_train/margins": 15.913368463516235, "rewards_train/rejected": -17.184717178344727, "step": 2920 }, { "epoch": 1.44, "learning_rate": 5.756965816811801e-07, "loss": 0.0001, "step": 2921 }, { "epoch": 1.44, "logps_train/chosen": -76.20460510253906, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -296.94903564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2635759115219116, "rewards_train/margins": 16.129374146461487, "rewards_train/rejected": -17.3929500579834, "step": 2921 }, { "epoch": 1.44, "learning_rate": 5.754283102223796e-07, "loss": 0.0, "step": 2922 }, { "epoch": 1.44, "logps_train/chosen": -78.2158203125, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -302.83203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390869379043579, "rewards_train/margins": 16.277930974960327, "rewards_train/rejected": -17.668800354003906, "step": 2922 }, { "epoch": 1.44, "learning_rate": 5.7516001654189e-07, "loss": 0.0, "step": 2923 }, { "epoch": 1.44, "logps_train/chosen": -70.06147766113281, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -290.7735900878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5983354449272156, "rewards_train/margins": 16.144063770771027, "rewards_train/rejected": -16.742399215698242, "step": 2923 }, { "epoch": 1.44, "learning_rate": 5.748917007187523e-07, "loss": 0.0007, "step": 2924 }, { "epoch": 1.44, "logps_train/chosen": -70.37632751464844, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -297.73748779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4462258815765381, "rewards_train/margins": 16.78113579750061, "rewards_train/rejected": -17.22736167907715, "step": 2924 }, { "epoch": 1.44, "learning_rate": 5.746233628320141e-07, "loss": 0.0, "step": 2925 }, { "epoch": 1.44, "logps_train/chosen": -74.77024841308594, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -135.375, "logps_train/rejected": -318.86004638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7588117122650146, "rewards_train/margins": 17.59438157081604, "rewards_train/rejected": -18.353193283081055, "step": 2925 }, { "epoch": 1.44, "learning_rate": 5.743550029607297e-07, "loss": 0.0, "step": 2926 }, { "epoch": 1.44, "logps_train/chosen": -74.92665100097656, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -300.10516357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328022480010986, "rewards_train/margins": 16.149445295333862, "rewards_train/rejected": -17.28224754333496, "step": 2926 }, { "epoch": 1.44, "learning_rate": 5.740866211839598e-07, "loss": 0.0, "step": 2927 }, { "epoch": 1.44, "logps_train/chosen": -76.24271392822266, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -301.6737060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0214394330978394, "rewards_train/margins": 16.302960515022278, "rewards_train/rejected": -17.324399948120117, "step": 2927 }, { "epoch": 1.44, "learning_rate": 5.738182175807713e-07, "loss": 0.0, "step": 2928 }, { "epoch": 1.44, "logps_train/chosen": -75.5929946899414, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -300.517822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0199438333511353, "rewards_train/margins": 16.434863448143005, "rewards_train/rejected": -17.45480728149414, "step": 2928 }, { "epoch": 1.44, "learning_rate": 5.735497922302379e-07, "loss": 0.0, "step": 2929 }, { "epoch": 1.44, "logps_train/chosen": -77.95109558105469, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -306.33953857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0748944282531738, "rewards_train/margins": 16.385230541229248, "rewards_train/rejected": -17.460124969482422, "step": 2929 }, { "epoch": 1.44, "learning_rate": 5.732813452114393e-07, "loss": 0.0001, "step": 2930 }, { "epoch": 1.44, "logps_train/chosen": -78.6234359741211, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -301.9594421386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0951564311981201, "rewards_train/margins": 16.015633821487427, "rewards_train/rejected": -17.110790252685547, "step": 2930 }, { "epoch": 1.44, "learning_rate": 5.73012876603462e-07, "loss": 0.0001, "step": 2931 }, { "epoch": 1.44, "logps_train/chosen": -73.82591247558594, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -303.2991638183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7566144466400146, "rewards_train/margins": 16.673988580703735, "rewards_train/rejected": -17.43060302734375, "step": 2931 }, { "epoch": 1.44, "learning_rate": 5.727443864853985e-07, "loss": 0.0, "step": 2932 }, { "epoch": 1.44, "logps_train/chosen": -67.74401092529297, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -292.928466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5538442134857178, "rewards_train/margins": 16.44422936439514, "rewards_train/rejected": -16.99807357788086, "step": 2932 }, { "epoch": 1.44, "learning_rate": 5.72475874936348e-07, "loss": 0.0001, "step": 2933 }, { "epoch": 1.44, "logps_train/chosen": -71.58199310302734, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -296.7598876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.771041214466095, "rewards_train/margins": 16.401383101940155, "rewards_train/rejected": -17.17242431640625, "step": 2933 }, { "epoch": 1.44, "learning_rate": 5.722073420354156e-07, "loss": 0.0001, "step": 2934 }, { "epoch": 1.44, "logps_train/chosen": -73.37767791748047, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -283.6967468261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.006126880645752, "rewards_train/margins": 15.231906414031982, "rewards_train/rejected": -16.238033294677734, "step": 2934 }, { "epoch": 1.45, "learning_rate": 5.71938787861713e-07, "loss": 0.0001, "step": 2935 }, { "epoch": 1.45, "logps_train/chosen": -75.14750671386719, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -293.242919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1648974418640137, "rewards_train/margins": 15.63322114944458, "rewards_train/rejected": -16.798118591308594, "step": 2935 }, { "epoch": 1.45, "learning_rate": 5.71670212494358e-07, "loss": 0.0001, "step": 2936 }, { "epoch": 1.45, "logps_train/chosen": -74.54569244384766, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -306.958740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.987479567527771, "rewards_train/margins": 17.008835434913635, "rewards_train/rejected": -17.996315002441406, "step": 2936 }, { "epoch": 1.45, "learning_rate": 5.714016160124749e-07, "loss": 0.0, "step": 2937 }, { "epoch": 1.45, "logps_train/chosen": -75.37513732910156, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -304.4021301269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7953751087188721, "rewards_train/margins": 16.769640684127808, "rewards_train/rejected": -17.56501579284668, "step": 2937 }, { "epoch": 1.45, "learning_rate": 5.71132998495194e-07, "loss": 0.0, "step": 2938 }, { "epoch": 1.45, "logps_train/chosen": -75.69385528564453, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -285.278076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0757819414138794, "rewards_train/margins": 15.192795634269714, "rewards_train/rejected": -16.268577575683594, "step": 2938 }, { "epoch": 1.45, "learning_rate": 5.708643600216516e-07, "loss": 0.0004, "step": 2939 }, { "epoch": 1.45, "logps_train/chosen": -76.41486358642578, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -299.0766906738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2931464910507202, "rewards_train/margins": 15.974189400672913, "rewards_train/rejected": -17.267335891723633, "step": 2939 }, { "epoch": 1.45, "learning_rate": 5.705957006709907e-07, "loss": 0.0, "step": 2940 }, { "epoch": 1.45, "logps_train/chosen": -69.80908966064453, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -285.21240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.743604302406311, "rewards_train/margins": 15.778515219688416, "rewards_train/rejected": -16.522119522094727, "step": 2940 }, { "epoch": 1.45, "learning_rate": 5.703270205223598e-07, "loss": 0.0, "step": 2941 }, { "epoch": 1.45, "logps_train/chosen": -75.39640045166016, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -305.4532470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0300695896148682, "rewards_train/margins": 16.44181799888611, "rewards_train/rejected": -17.471887588500977, "step": 2941 }, { "epoch": 1.45, "learning_rate": 5.700583196549143e-07, "loss": 0.0, "step": 2942 }, { "epoch": 1.45, "logps_train/chosen": -74.00466918945312, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -306.381591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9095004796981812, "rewards_train/margins": 16.889598488807678, "rewards_train/rejected": -17.79909896850586, "step": 2942 }, { "epoch": 1.45, "learning_rate": 5.697895981478149e-07, "loss": 0.0, "step": 2943 }, { "epoch": 1.45, "logps_train/chosen": -75.95045471191406, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -306.6598815917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0195574760437012, "rewards_train/margins": 16.500338077545166, "rewards_train/rejected": -17.519895553588867, "step": 2943 }, { "epoch": 1.45, "learning_rate": 5.695208560802291e-07, "loss": 0.0003, "step": 2944 }, { "epoch": 1.45, "logps_train/chosen": -73.15929412841797, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -292.47119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8920522332191467, "rewards_train/margins": 15.877332150936127, "rewards_train/rejected": -16.769384384155273, "step": 2944 }, { "epoch": 1.45, "learning_rate": 5.692520935313301e-07, "loss": 0.0, "step": 2945 }, { "epoch": 1.45, "logps_train/chosen": -71.59854125976562, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -289.1732177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9134191274642944, "rewards_train/margins": 15.619722962379456, "rewards_train/rejected": -16.53314208984375, "step": 2945 }, { "epoch": 1.45, "learning_rate": 5.689833105802968e-07, "loss": 0.0002, "step": 2946 }, { "epoch": 1.45, "logps_train/chosen": -78.32167053222656, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -309.360107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3603403568267822, "rewards_train/margins": 16.78177237510681, "rewards_train/rejected": -18.142112731933594, "step": 2946 }, { "epoch": 1.45, "learning_rate": 5.687145073063149e-07, "loss": 0.0001, "step": 2947 }, { "epoch": 1.45, "logps_train/chosen": -82.22319030761719, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -301.933349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5431194305419922, "rewards_train/margins": 15.831073760986328, "rewards_train/rejected": -17.37419319152832, "step": 2947 }, { "epoch": 1.45, "learning_rate": 5.684456837885753e-07, "loss": 0.0002, "step": 2948 }, { "epoch": 1.45, "logps_train/chosen": -73.60786437988281, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -297.86236572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9743850827217102, "rewards_train/margins": 16.181677997112274, "rewards_train/rejected": -17.156063079833984, "step": 2948 }, { "epoch": 1.45, "learning_rate": 5.681768401062756e-07, "loss": 0.0, "step": 2949 }, { "epoch": 1.45, "logps_train/chosen": -76.01188659667969, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -288.77679443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.223893642425537, "rewards_train/margins": 15.517845630645752, "rewards_train/rejected": -16.74173927307129, "step": 2949 }, { "epoch": 1.45, "learning_rate": 5.679079763386187e-07, "loss": 0.0, "step": 2950 }, { "epoch": 1.45, "logps_train/chosen": -75.83452606201172, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -289.8267517089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2565972805023193, "rewards_train/margins": 15.285352945327759, "rewards_train/rejected": -16.541950225830078, "step": 2950 }, { "epoch": 1.45, "learning_rate": 5.676390925648138e-07, "loss": 0.0001, "step": 2951 }, { "epoch": 1.45, "logps_train/chosen": -75.116455078125, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -298.255126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1099374294281006, "rewards_train/margins": 15.879152536392212, "rewards_train/rejected": -16.989089965820312, "step": 2951 }, { "epoch": 1.45, "learning_rate": 5.673701888640756e-07, "loss": 0.0, "step": 2952 }, { "epoch": 1.45, "logps_train/chosen": -77.13003540039062, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -298.13653564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.236685037612915, "rewards_train/margins": 16.171842336654663, "rewards_train/rejected": -17.408527374267578, "step": 2952 }, { "epoch": 1.45, "learning_rate": 5.671012653156254e-07, "loss": 0.0003, "step": 2953 }, { "epoch": 1.45, "logps_train/chosen": -70.68838500976562, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -293.7299499511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7408843040466309, "rewards_train/margins": 16.18679666519165, "rewards_train/rejected": -16.92768096923828, "step": 2953 }, { "epoch": 1.45, "learning_rate": 5.668323219986896e-07, "loss": 0.0001, "step": 2954 }, { "epoch": 1.45, "logps_train/chosen": -75.73530578613281, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -296.01092529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9722613096237183, "rewards_train/margins": 15.994943737983704, "rewards_train/rejected": -16.967205047607422, "step": 2954 }, { "epoch": 1.45, "learning_rate": 5.665633589925008e-07, "loss": 0.0001, "step": 2955 }, { "epoch": 1.45, "logps_train/chosen": -78.85245513916016, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -304.4086608886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4371989965438843, "rewards_train/margins": 16.205520272254944, "rewards_train/rejected": -17.642719268798828, "step": 2955 }, { "epoch": 1.46, "learning_rate": 5.662943763762972e-07, "loss": 0.0, "step": 2956 }, { "epoch": 1.46, "logps_train/chosen": -71.13301849365234, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -292.48211669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7470909953117371, "rewards_train/margins": 16.123583137989044, "rewards_train/rejected": -16.87067413330078, "step": 2956 }, { "epoch": 1.46, "learning_rate": 5.660253742293229e-07, "loss": 0.0001, "step": 2957 }, { "epoch": 1.46, "logps_train/chosen": -73.96369934082031, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -300.68310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7497391104698181, "rewards_train/margins": 16.518426477909088, "rewards_train/rejected": -17.268165588378906, "step": 2957 }, { "epoch": 1.46, "learning_rate": 5.657563526308279e-07, "loss": 0.0, "step": 2958 }, { "epoch": 1.46, "logps_train/chosen": -74.15020751953125, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -298.687255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9405578374862671, "rewards_train/margins": 16.27328646183014, "rewards_train/rejected": -17.213844299316406, "step": 2958 }, { "epoch": 1.46, "learning_rate": 5.654873116600678e-07, "loss": 0.0, "step": 2959 }, { "epoch": 1.46, "logps_train/chosen": -73.88133239746094, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -294.8537292480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8889875411987305, "rewards_train/margins": 16.233691215515137, "rewards_train/rejected": -17.122678756713867, "step": 2959 }, { "epoch": 1.46, "learning_rate": 5.652182513963038e-07, "loss": 0.0001, "step": 2960 }, { "epoch": 1.46, "logps_train/chosen": -78.79098510742188, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -297.0841369628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2354702949523926, "rewards_train/margins": 15.872064113616943, "rewards_train/rejected": -17.107534408569336, "step": 2960 }, { "epoch": 1.46, "learning_rate": 5.649491719188027e-07, "loss": 0.0, "step": 2961 }, { "epoch": 1.46, "logps_train/chosen": -73.29867553710938, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -292.8897705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9392423629760742, "rewards_train/margins": 15.892800331115723, "rewards_train/rejected": -16.832042694091797, "step": 2961 }, { "epoch": 1.46, "learning_rate": 5.646800733068375e-07, "loss": 0.0, "step": 2962 }, { "epoch": 1.46, "logps_train/chosen": -73.72425842285156, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -288.0777587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0604631900787354, "rewards_train/margins": 15.423871755599976, "rewards_train/rejected": -16.48433494567871, "step": 2962 }, { "epoch": 1.46, "learning_rate": 5.64410955639686e-07, "loss": 0.0001, "step": 2963 }, { "epoch": 1.46, "logps_train/chosen": -77.0970687866211, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -295.7297058105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1770896911621094, "rewards_train/margins": 15.953887939453125, "rewards_train/rejected": -17.130977630615234, "step": 2963 }, { "epoch": 1.46, "learning_rate": 5.641418189966325e-07, "loss": 0.0, "step": 2964 }, { "epoch": 1.46, "logps_train/chosen": -74.6469497680664, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -297.9503173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0932106971740723, "rewards_train/margins": 16.163445949554443, "rewards_train/rejected": -17.256656646728516, "step": 2964 }, { "epoch": 1.46, "learning_rate": 5.638726634569664e-07, "loss": 0.0001, "step": 2965 }, { "epoch": 1.46, "logps_train/chosen": -72.51861572265625, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -302.36273193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.872711181640625, "rewards_train/margins": 17.008487701416016, "rewards_train/rejected": -17.88119888305664, "step": 2965 }, { "epoch": 1.46, "learning_rate": 5.636034890999824e-07, "loss": 0.0, "step": 2966 }, { "epoch": 1.46, "logps_train/chosen": -72.25972747802734, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -290.4306335449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9840292930603027, "rewards_train/margins": 15.841501712799072, "rewards_train/rejected": -16.825531005859375, "step": 2966 }, { "epoch": 1.46, "learning_rate": 5.633342960049815e-07, "loss": 0.0004, "step": 2967 }, { "epoch": 1.46, "logps_train/chosen": -74.89842224121094, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -292.9670104980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1115214824676514, "rewards_train/margins": 16.03127408027649, "rewards_train/rejected": -17.14279556274414, "step": 2967 }, { "epoch": 1.46, "learning_rate": 5.630650842512696e-07, "loss": 0.0001, "step": 2968 }, { "epoch": 1.46, "logps_train/chosen": -70.8672866821289, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -290.8521728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.750840425491333, "rewards_train/margins": 15.951224088668823, "rewards_train/rejected": -16.702064514160156, "step": 2968 }, { "epoch": 1.46, "learning_rate": 5.627958539181583e-07, "loss": 0.0001, "step": 2969 }, { "epoch": 1.46, "logps_train/chosen": -72.24736022949219, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -306.8542785644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4547160267829895, "rewards_train/margins": 17.19233101606369, "rewards_train/rejected": -17.64704704284668, "step": 2969 }, { "epoch": 1.46, "learning_rate": 5.625266050849649e-07, "loss": 0.0001, "step": 2970 }, { "epoch": 1.46, "logps_train/chosen": -68.91314697265625, "logps_train/ref_chosen": -60.4375, "logps_train/ref_rejected": -116.8125, "logps_train/rejected": -278.88165283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8494447469711304, "rewards_train/margins": 15.3574720621109, "rewards_train/rejected": -16.20691680908203, "step": 2970 }, { "epoch": 1.46, "learning_rate": 5.622573378310116e-07, "loss": 0.0002, "step": 2971 }, { "epoch": 1.46, "logps_train/chosen": -75.36564636230469, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -306.66522216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9419851899147034, "rewards_train/margins": 16.905007302761078, "rewards_train/rejected": -17.84699249267578, "step": 2971 }, { "epoch": 1.46, "learning_rate": 5.619880522356263e-07, "loss": 0.0, "step": 2972 }, { "epoch": 1.46, "logps_train/chosen": -74.59666442871094, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -307.0655212402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9910138845443726, "rewards_train/margins": 16.928133606910706, "rewards_train/rejected": -17.919147491455078, "step": 2972 }, { "epoch": 1.46, "learning_rate": 5.617187483781427e-07, "loss": 0.0, "step": 2973 }, { "epoch": 1.46, "logps_train/chosen": -74.30113220214844, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -298.27392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9177830815315247, "rewards_train/margins": 16.346867263317108, "rewards_train/rejected": -17.264650344848633, "step": 2973 }, { "epoch": 1.46, "learning_rate": 5.614494263378994e-07, "loss": 0.0, "step": 2974 }, { "epoch": 1.46, "logps_train/chosen": -76.47364807128906, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -307.65472412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.264746904373169, "rewards_train/margins": 16.691155195236206, "rewards_train/rejected": -17.955902099609375, "step": 2974 }, { "epoch": 1.46, "learning_rate": 5.611800861942403e-07, "loss": 0.0, "step": 2975 }, { "epoch": 1.46, "logps_train/chosen": -80.55827331542969, "logps_train/ref_chosen": -68.6875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -301.047607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1854169368743896, "rewards_train/margins": 16.239464044570923, "rewards_train/rejected": -17.424880981445312, "step": 2975 }, { "epoch": 1.47, "learning_rate": 5.60910728026515e-07, "loss": 0.0, "step": 2976 }, { "epoch": 1.47, "logps_train/chosen": -76.27410888671875, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -288.76019287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1339542865753174, "rewards_train/margins": 15.263058423995972, "rewards_train/rejected": -16.39701271057129, "step": 2976 }, { "epoch": 1.47, "learning_rate": 5.606413519140783e-07, "loss": 0.0001, "step": 2977 }, { "epoch": 1.47, "logps_train/chosen": -77.59507751464844, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -302.8525695800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3108255863189697, "rewards_train/margins": 16.398650884628296, "rewards_train/rejected": -17.709476470947266, "step": 2977 }, { "epoch": 1.47, "learning_rate": 5.603719579362901e-07, "loss": 0.0007, "step": 2978 }, { "epoch": 1.47, "logps_train/chosen": -80.95504760742188, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -298.21832275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5276824235916138, "rewards_train/margins": 15.638779520988464, "rewards_train/rejected": -17.166461944580078, "step": 2978 }, { "epoch": 1.47, "learning_rate": 5.601025461725158e-07, "loss": 0.0001, "step": 2979 }, { "epoch": 1.47, "logps_train/chosen": -78.64631652832031, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -302.219970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1463217735290527, "rewards_train/margins": 16.393351078033447, "rewards_train/rejected": -17.5396728515625, "step": 2979 }, { "epoch": 1.47, "learning_rate": 5.598331167021258e-07, "loss": 0.0001, "step": 2980 }, { "epoch": 1.47, "logps_train/chosen": -76.06163024902344, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -307.3857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9191030263900757, "rewards_train/margins": 16.83670675754547, "rewards_train/rejected": -17.755809783935547, "step": 2980 }, { "epoch": 1.47, "learning_rate": 5.595636696044959e-07, "loss": 0.0001, "step": 2981 }, { "epoch": 1.47, "logps_train/chosen": -78.24333190917969, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -136.375, "logps_train/rejected": -316.80035400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0674481391906738, "rewards_train/margins": 16.970693111419678, "rewards_train/rejected": -18.03814125061035, "step": 2981 }, { "epoch": 1.47, "learning_rate": 5.592942049590069e-07, "loss": 0.0, "step": 2982 }, { "epoch": 1.47, "logps_train/chosen": -75.13641357421875, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -305.3979187011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9234561920166016, "rewards_train/margins": 16.66745948791504, "rewards_train/rejected": -17.59091567993164, "step": 2982 }, { "epoch": 1.47, "learning_rate": 5.590247228450451e-07, "loss": 0.0, "step": 2983 }, { "epoch": 1.47, "logps_train/chosen": -72.95628356933594, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -294.747802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79914391040802, "rewards_train/margins": 16.190380930900574, "rewards_train/rejected": -16.989524841308594, "step": 2983 }, { "epoch": 1.47, "learning_rate": 5.587552233420016e-07, "loss": 0.0, "step": 2984 }, { "epoch": 1.47, "logps_train/chosen": -75.9063949584961, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -302.0711975097656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.188979148864746, "rewards_train/margins": 16.40231990814209, "rewards_train/rejected": -17.591299057006836, "step": 2984 }, { "epoch": 1.47, "learning_rate": 5.584857065292729e-07, "loss": 0.0, "step": 2985 }, { "epoch": 1.47, "logps_train/chosen": -71.54792022705078, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -118.25, "logps_train/rejected": -278.4694519042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8919746279716492, "rewards_train/margins": 15.129288494586945, "rewards_train/rejected": -16.021263122558594, "step": 2985 }, { "epoch": 1.47, "learning_rate": 5.582161724862605e-07, "loss": 0.0001, "step": 2986 }, { "epoch": 1.47, "logps_train/chosen": -73.34908294677734, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -292.52374267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7855921983718872, "rewards_train/margins": 16.210528254508972, "rewards_train/rejected": -16.99612045288086, "step": 2986 }, { "epoch": 1.47, "learning_rate": 5.579466212923707e-07, "loss": 0.0, "step": 2987 }, { "epoch": 1.47, "logps_train/chosen": -71.1693344116211, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -292.2058410644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7263575792312622, "rewards_train/margins": 16.08221709728241, "rewards_train/rejected": -16.808574676513672, "step": 2987 }, { "epoch": 1.47, "learning_rate": 5.576770530270152e-07, "loss": 0.0, "step": 2988 }, { "epoch": 1.47, "logps_train/chosen": -76.84152221679688, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -312.06561279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1543184518814087, "rewards_train/margins": 17.165915846824646, "rewards_train/rejected": -18.320234298706055, "step": 2988 }, { "epoch": 1.47, "learning_rate": 5.574074677696108e-07, "loss": 0.0, "step": 2989 }, { "epoch": 1.47, "logps_train/chosen": -72.64022064208984, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -301.662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9519615769386292, "rewards_train/margins": 16.51756662130356, "rewards_train/rejected": -17.469528198242188, "step": 2989 }, { "epoch": 1.47, "learning_rate": 5.571378655995791e-07, "loss": 0.0, "step": 2990 }, { "epoch": 1.47, "logps_train/chosen": -75.73641967773438, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -310.77325439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9388272762298584, "rewards_train/margins": 17.013012170791626, "rewards_train/rejected": -17.951839447021484, "step": 2990 }, { "epoch": 1.47, "learning_rate": 5.568682465963466e-07, "loss": 0.0, "step": 2991 }, { "epoch": 1.47, "logps_train/chosen": -74.22956848144531, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -286.58367919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1001543998718262, "rewards_train/margins": 15.262024402618408, "rewards_train/rejected": -16.362178802490234, "step": 2991 }, { "epoch": 1.47, "learning_rate": 5.565986108393448e-07, "loss": 0.0, "step": 2992 }, { "epoch": 1.47, "logps_train/chosen": -74.70698547363281, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -292.5552978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9490185379981995, "rewards_train/margins": 15.742837846279144, "rewards_train/rejected": -16.691856384277344, "step": 2992 }, { "epoch": 1.47, "learning_rate": 5.563289584080105e-07, "loss": 0.0, "step": 2993 }, { "epoch": 1.47, "logps_train/chosen": -76.51309967041016, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -302.02154541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9951335191726685, "rewards_train/margins": 16.35800063610077, "rewards_train/rejected": -17.353134155273438, "step": 2993 }, { "epoch": 1.47, "learning_rate": 5.560592893817848e-07, "loss": 0.0001, "step": 2994 }, { "epoch": 1.47, "logps_train/chosen": -74.71177673339844, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -300.697021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1037461757659912, "rewards_train/margins": 16.45248246192932, "rewards_train/rejected": -17.556228637695312, "step": 2994 }, { "epoch": 1.47, "learning_rate": 5.557896038401142e-07, "loss": 0.0004, "step": 2995 }, { "epoch": 1.47, "logps_train/chosen": -81.80516815185547, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -313.50146484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.324022889137268, "rewards_train/margins": 16.737988829612732, "rewards_train/rejected": -18.06201171875, "step": 2995 }, { "epoch": 1.48, "learning_rate": 5.555199018624499e-07, "loss": 0.0, "step": 2996 }, { "epoch": 1.48, "logps_train/chosen": -79.96589660644531, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -303.74383544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2448318004608154, "rewards_train/margins": 16.315390825271606, "rewards_train/rejected": -17.560222625732422, "step": 2996 }, { "epoch": 1.48, "learning_rate": 5.552501835282479e-07, "loss": 0.0, "step": 2997 }, { "epoch": 1.48, "logps_train/chosen": -75.84443664550781, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -293.8582763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.27570378780365, "rewards_train/margins": 15.701823830604553, "rewards_train/rejected": -16.977527618408203, "step": 2997 }, { "epoch": 1.48, "learning_rate": 5.54980448916969e-07, "loss": 0.001, "step": 2998 }, { "epoch": 1.48, "logps_train/chosen": -74.55243682861328, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -283.9672546386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0761423110961914, "rewards_train/margins": 15.168726921081543, "rewards_train/rejected": -16.244869232177734, "step": 2998 }, { "epoch": 1.48, "learning_rate": 5.547106981080788e-07, "loss": 0.0, "step": 2999 }, { "epoch": 1.48, "logps_train/chosen": -76.23806762695312, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -303.9088439941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1427028179168701, "rewards_train/margins": 16.652480363845825, "rewards_train/rejected": -17.795183181762695, "step": 2999 }, { "epoch": 1.48, "learning_rate": 5.544409311810479e-07, "loss": 0.0, "step": 3000 }, { "epoch": 1.48, "logps_train/chosen": -74.8760757446289, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -303.72052001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.128330111503601, "rewards_train/margins": 16.682979702949524, "rewards_train/rejected": -17.811309814453125, "step": 3000 }, { "epoch": 1.48, "learning_rate": 5.541711482153511e-07, "loss": 0.0, "step": 3001 }, { "epoch": 1.48, "logps_train/chosen": -80.62979125976562, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -299.7648620605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6713780164718628, "rewards_train/margins": 16.20393669605255, "rewards_train/rejected": -17.875314712524414, "step": 3001 }, { "epoch": 1.48, "learning_rate": 5.539013492904687e-07, "loss": 0.0, "step": 3002 }, { "epoch": 1.48, "logps_train/chosen": -81.00912475585938, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -308.583740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4259612560272217, "rewards_train/margins": 16.69598698616028, "rewards_train/rejected": -18.1219482421875, "step": 3002 }, { "epoch": 1.48, "learning_rate": 5.536315344858849e-07, "loss": 0.0, "step": 3003 }, { "epoch": 1.48, "logps_train/chosen": -81.01741027832031, "logps_train/ref_chosen": -68.75, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -309.2563781738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2298166751861572, "rewards_train/margins": 16.665645360946655, "rewards_train/rejected": -17.895462036132812, "step": 3003 }, { "epoch": 1.48, "learning_rate": 5.533617038810892e-07, "loss": 0.0, "step": 3004 }, { "epoch": 1.48, "logps_train/chosen": -81.36286926269531, "logps_train/ref_chosen": -68.1875, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -316.2684020996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.317561388015747, "rewards_train/margins": 17.08955454826355, "rewards_train/rejected": -18.407115936279297, "step": 3004 }, { "epoch": 1.48, "learning_rate": 5.530918575555756e-07, "loss": 0.0001, "step": 3005 }, { "epoch": 1.48, "logps_train/chosen": -76.93453216552734, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -302.7272644042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2052208185195923, "rewards_train/margins": 16.561014533042908, "rewards_train/rejected": -17.7662353515625, "step": 3005 }, { "epoch": 1.48, "learning_rate": 5.528219955888426e-07, "loss": 0.0, "step": 3006 }, { "epoch": 1.48, "logps_train/chosen": -72.23668670654297, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -303.3812255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.940710186958313, "rewards_train/margins": 16.95600688457489, "rewards_train/rejected": -17.896717071533203, "step": 3006 }, { "epoch": 1.48, "learning_rate": 5.52552118060393e-07, "loss": 0.0, "step": 3007 }, { "epoch": 1.48, "logps_train/chosen": -67.0151596069336, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -293.88116455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41460171341896057, "rewards_train/margins": 16.50412806868553, "rewards_train/rejected": -16.918729782104492, "step": 3007 }, { "epoch": 1.48, "learning_rate": 5.52282225049735e-07, "loss": 0.0, "step": 3008 }, { "epoch": 1.48, "logps_train/chosen": -73.23955535888672, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -303.4081726074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.806255578994751, "rewards_train/margins": 16.43236517906189, "rewards_train/rejected": -17.23862075805664, "step": 3008 }, { "epoch": 1.48, "learning_rate": 5.520123166363806e-07, "loss": 0.0, "step": 3009 }, { "epoch": 1.48, "logps_train/chosen": -73.74119567871094, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -309.6785583496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7467759847640991, "rewards_train/margins": 17.043591141700745, "rewards_train/rejected": -17.790367126464844, "step": 3009 }, { "epoch": 1.48, "learning_rate": 5.517423928998467e-07, "loss": 0.0, "step": 3010 }, { "epoch": 1.48, "logps_train/chosen": -76.28648376464844, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -309.3116760253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234605312347412, "rewards_train/margins": 16.917269229888916, "rewards_train/rejected": -18.151874542236328, "step": 3010 }, { "epoch": 1.48, "learning_rate": 5.514724539196548e-07, "loss": 0.0, "step": 3011 }, { "epoch": 1.48, "logps_train/chosen": -69.37020874023438, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -287.4771423339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6080654859542847, "rewards_train/margins": 15.694039463996887, "rewards_train/rejected": -16.302104949951172, "step": 3011 }, { "epoch": 1.48, "learning_rate": 5.512024997753306e-07, "loss": 0.0, "step": 3012 }, { "epoch": 1.48, "logps_train/chosen": -71.00794982910156, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -118.0625, "logps_train/rejected": -277.18841552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8934213519096375, "rewards_train/margins": 15.019948780536652, "rewards_train/rejected": -15.913370132446289, "step": 3012 }, { "epoch": 1.48, "learning_rate": 5.509325305464047e-07, "loss": 0.0001, "step": 3013 }, { "epoch": 1.48, "logps_train/chosen": -76.6315689086914, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -309.313232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1203830242156982, "rewards_train/margins": 17.03057026863098, "rewards_train/rejected": -18.15095329284668, "step": 3013 }, { "epoch": 1.48, "learning_rate": 5.506625463124113e-07, "loss": 0.0001, "step": 3014 }, { "epoch": 1.48, "logps_train/chosen": -72.85064697265625, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -306.98040771484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8119692802429199, "rewards_train/margins": 16.793055057525635, "rewards_train/rejected": -17.605024337768555, "step": 3014 }, { "epoch": 1.48, "learning_rate": 5.5039254715289e-07, "loss": 0.0, "step": 3015 }, { "epoch": 1.48, "logps_train/chosen": -71.36474609375, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -292.5015869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7617923617362976, "rewards_train/margins": 15.993838131427765, "rewards_train/rejected": -16.755630493164062, "step": 3015 }, { "epoch": 1.48, "learning_rate": 5.501225331473843e-07, "loss": 0.0, "step": 3016 }, { "epoch": 1.48, "logps_train/chosen": -76.10005950927734, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -297.2369384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0059044361114502, "rewards_train/margins": 16.11095118522644, "rewards_train/rejected": -17.11685562133789, "step": 3016 }, { "epoch": 1.49, "learning_rate": 5.498525043754422e-07, "loss": 0.0, "step": 3017 }, { "epoch": 1.49, "logps_train/chosen": -77.44454956054688, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -298.0039367675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2269744873046875, "rewards_train/margins": 15.816680908203125, "rewards_train/rejected": -17.043655395507812, "step": 3017 }, { "epoch": 1.49, "learning_rate": 5.495824609166158e-07, "loss": 0.0, "step": 3018 }, { "epoch": 1.49, "logps_train/chosen": -72.74288940429688, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -119.5, "logps_train/rejected": -283.82220458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.090743899345398, "rewards_train/margins": 15.339035153388977, "rewards_train/rejected": -16.429779052734375, "step": 3018 }, { "epoch": 1.49, "learning_rate": 5.493124028504619e-07, "loss": 0.0001, "step": 3019 }, { "epoch": 1.49, "logps_train/chosen": -67.51615905761719, "logps_train/ref_chosen": -60.8125, "logps_train/ref_rejected": -117.5625, "logps_train/rejected": -282.929443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6692425012588501, "rewards_train/margins": 15.865110754966736, "rewards_train/rejected": -16.534353256225586, "step": 3019 }, { "epoch": 1.49, "learning_rate": 5.490423302565413e-07, "loss": 0.0, "step": 3020 }, { "epoch": 1.49, "logps_train/chosen": -73.9119873046875, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -295.3203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9322138428688049, "rewards_train/margins": 16.008364617824554, "rewards_train/rejected": -16.94057846069336, "step": 3020 }, { "epoch": 1.49, "learning_rate": 5.487722432144192e-07, "loss": 0.0, "step": 3021 }, { "epoch": 1.49, "logps_train/chosen": -74.44184875488281, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -300.2120361328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9698683619499207, "rewards_train/margins": 16.429507553577423, "rewards_train/rejected": -17.399375915527344, "step": 3021 }, { "epoch": 1.49, "learning_rate": 5.485021418036653e-07, "loss": 0.0, "step": 3022 }, { "epoch": 1.49, "logps_train/chosen": -70.54607391357422, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -294.87677001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7156667709350586, "rewards_train/margins": 16.320496559143066, "rewards_train/rejected": -17.036163330078125, "step": 3022 }, { "epoch": 1.49, "learning_rate": 5.482320261038533e-07, "loss": 0.0001, "step": 3023 }, { "epoch": 1.49, "logps_train/chosen": -72.27191162109375, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -298.9812927246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7859808206558228, "rewards_train/margins": 16.545695662498474, "rewards_train/rejected": -17.331676483154297, "step": 3023 }, { "epoch": 1.49, "learning_rate": 5.479618961945606e-07, "loss": 0.0002, "step": 3024 }, { "epoch": 1.49, "logps_train/chosen": -74.55874633789062, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -290.75390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1005524396896362, "rewards_train/margins": 15.47532570362091, "rewards_train/rejected": -16.575878143310547, "step": 3024 }, { "epoch": 1.49, "learning_rate": 5.476917521553699e-07, "loss": 0.0, "step": 3025 }, { "epoch": 1.49, "logps_train/chosen": -71.48621368408203, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -295.5633850097656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7199346423149109, "rewards_train/margins": 16.27815419435501, "rewards_train/rejected": -16.998088836669922, "step": 3025 }, { "epoch": 1.49, "learning_rate": 5.474215940658671e-07, "loss": 0.0001, "step": 3026 }, { "epoch": 1.49, "logps_train/chosen": -73.08903503417969, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -292.7137451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9714034795761108, "rewards_train/margins": 16.07316744327545, "rewards_train/rejected": -17.044570922851562, "step": 3026 }, { "epoch": 1.49, "learning_rate": 5.471514220056427e-07, "loss": 0.0, "step": 3027 }, { "epoch": 1.49, "logps_train/chosen": -75.42628479003906, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -300.336669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9643086791038513, "rewards_train/margins": 16.157200872898102, "rewards_train/rejected": -17.121509552001953, "step": 3027 }, { "epoch": 1.49, "learning_rate": 5.468812360542912e-07, "loss": 0.0, "step": 3028 }, { "epoch": 1.49, "logps_train/chosen": -74.64940643310547, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -292.247314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9591789245605469, "rewards_train/margins": 15.672731399536133, "rewards_train/rejected": -16.63191032409668, "step": 3028 }, { "epoch": 1.49, "learning_rate": 5.466110362914113e-07, "loss": 0.0, "step": 3029 }, { "epoch": 1.49, "logps_train/chosen": -73.67591857910156, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -301.9637451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0829731225967407, "rewards_train/margins": 16.53049123287201, "rewards_train/rejected": -17.61346435546875, "step": 3029 }, { "epoch": 1.49, "learning_rate": 5.463408227966054e-07, "loss": 0.0, "step": 3030 }, { "epoch": 1.49, "logps_train/chosen": -73.87831115722656, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -296.5740051269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.084559440612793, "rewards_train/margins": 16.134804725646973, "rewards_train/rejected": -17.219364166259766, "step": 3030 }, { "epoch": 1.49, "learning_rate": 5.460705956494806e-07, "loss": 0.0, "step": 3031 }, { "epoch": 1.49, "logps_train/chosen": -78.20744323730469, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -294.19769287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2441813945770264, "rewards_train/margins": 15.540723085403442, "rewards_train/rejected": -16.78490447998047, "step": 3031 }, { "epoch": 1.49, "learning_rate": 5.458003549296475e-07, "loss": 0.0, "step": 3032 }, { "epoch": 1.49, "logps_train/chosen": -71.88531494140625, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -299.9135437011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.851470947265625, "rewards_train/margins": 16.44310760498047, "rewards_train/rejected": -17.294578552246094, "step": 3032 }, { "epoch": 1.49, "learning_rate": 5.455301007167206e-07, "loss": 0.0, "step": 3033 }, { "epoch": 1.49, "logps_train/chosen": -74.18769836425781, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -281.69439697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0285600423812866, "rewards_train/margins": 15.153378129005432, "rewards_train/rejected": -16.18193817138672, "step": 3033 }, { "epoch": 1.49, "learning_rate": 5.45259833090319e-07, "loss": 0.0002, "step": 3034 }, { "epoch": 1.49, "logps_train/chosen": -71.04507446289062, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -120.5625, "logps_train/rejected": -284.9173583984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9061188697814941, "rewards_train/margins": 15.529658794403076, "rewards_train/rejected": -16.43577766418457, "step": 3034 }, { "epoch": 1.49, "learning_rate": 5.44989552130065e-07, "loss": 0.0, "step": 3035 }, { "epoch": 1.49, "logps_train/chosen": -74.22924041748047, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -304.8701477050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7459705471992493, "rewards_train/margins": 16.806082904338837, "rewards_train/rejected": -17.552053451538086, "step": 3035 }, { "epoch": 1.49, "learning_rate": 5.447192579155853e-07, "loss": 0.0, "step": 3036 }, { "epoch": 1.49, "logps_train/chosen": -75.37483215332031, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -294.8138427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0580400228500366, "rewards_train/margins": 15.801174759864807, "rewards_train/rejected": -16.859214782714844, "step": 3036 }, { "epoch": 1.5, "learning_rate": 5.444489505265106e-07, "loss": 0.0, "step": 3037 }, { "epoch": 1.5, "logps_train/chosen": -74.21952056884766, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -301.48193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.069169044494629, "rewards_train/margins": 16.384100914001465, "rewards_train/rejected": -17.453269958496094, "step": 3037 }, { "epoch": 1.5, "learning_rate": 5.441786300424751e-07, "loss": 0.0002, "step": 3038 }, { "epoch": 1.5, "logps_train/chosen": -68.96857452392578, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -300.26751708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5245431661605835, "rewards_train/margins": 16.91231596469879, "rewards_train/rejected": -17.436859130859375, "step": 3038 }, { "epoch": 1.5, "learning_rate": 5.439082965431171e-07, "loss": 0.0, "step": 3039 }, { "epoch": 1.5, "logps_train/chosen": -72.2557373046875, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -303.2959899902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7780147790908813, "rewards_train/margins": 16.567453742027283, "rewards_train/rejected": -17.345468521118164, "step": 3039 }, { "epoch": 1.5, "learning_rate": 5.436379501080787e-07, "loss": 0.0, "step": 3040 }, { "epoch": 1.5, "logps_train/chosen": -74.38545227050781, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -290.28570556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015337705612183, "rewards_train/margins": 15.72996723651886, "rewards_train/rejected": -16.831501007080078, "step": 3040 }, { "epoch": 1.5, "learning_rate": 5.433675908170055e-07, "loss": 0.0, "step": 3041 }, { "epoch": 1.5, "logps_train/chosen": -68.87985229492188, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -283.3476257324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5651342868804932, "rewards_train/margins": 15.437255620956421, "rewards_train/rejected": -16.002389907836914, "step": 3041 }, { "epoch": 1.5, "learning_rate": 5.430972187495477e-07, "loss": 0.0001, "step": 3042 }, { "epoch": 1.5, "logps_train/chosen": -74.4808349609375, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -296.4639892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.12713623046875, "rewards_train/margins": 15.951343536376953, "rewards_train/rejected": -17.078479766845703, "step": 3042 }, { "epoch": 1.5, "learning_rate": 5.428268339853585e-07, "loss": 0.0001, "step": 3043 }, { "epoch": 1.5, "logps_train/chosen": -71.52796936035156, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -295.7260437011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8352676630020142, "rewards_train/margins": 16.180452704429626, "rewards_train/rejected": -17.01572036743164, "step": 3043 }, { "epoch": 1.5, "learning_rate": 5.425564366040947e-07, "loss": 0.0001, "step": 3044 }, { "epoch": 1.5, "logps_train/chosen": -74.45515441894531, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -297.54217529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1147538423538208, "rewards_train/margins": 16.150989413261414, "rewards_train/rejected": -17.265743255615234, "step": 3044 }, { "epoch": 1.5, "learning_rate": 5.422860266854178e-07, "loss": 0.0, "step": 3045 }, { "epoch": 1.5, "logps_train/chosen": -80.47396850585938, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -311.7347717285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.378793478012085, "rewards_train/margins": 16.62114930152893, "rewards_train/rejected": -17.999942779541016, "step": 3045 }, { "epoch": 1.5, "learning_rate": 5.420156043089919e-07, "loss": 0.0, "step": 3046 }, { "epoch": 1.5, "logps_train/chosen": -76.26663970947266, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -310.8177795410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9973664283752441, "rewards_train/margins": 17.130311489105225, "rewards_train/rejected": -18.12767791748047, "step": 3046 }, { "epoch": 1.5, "learning_rate": 5.417451695544855e-07, "loss": 0.0, "step": 3047 }, { "epoch": 1.5, "logps_train/chosen": -79.42799377441406, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -309.5654602050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.283326268196106, "rewards_train/margins": 16.438746333122253, "rewards_train/rejected": -17.72207260131836, "step": 3047 }, { "epoch": 1.5, "learning_rate": 5.414747225015707e-07, "loss": 0.0, "step": 3048 }, { "epoch": 1.5, "logps_train/chosen": -78.20292663574219, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -297.15203857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3368451595306396, "rewards_train/margins": 15.601894617080688, "rewards_train/rejected": -16.938739776611328, "step": 3048 }, { "epoch": 1.5, "learning_rate": 5.412042632299227e-07, "loss": 0.0, "step": 3049 }, { "epoch": 1.5, "logps_train/chosen": -75.09754943847656, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -290.236572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0114636421203613, "rewards_train/margins": 15.525325298309326, "rewards_train/rejected": -16.536788940429688, "step": 3049 }, { "epoch": 1.5, "learning_rate": 5.409337918192205e-07, "loss": 0.0001, "step": 3050 }, { "epoch": 1.5, "logps_train/chosen": -72.30329895019531, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -295.49188232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8065999746322632, "rewards_train/margins": 16.264269471168518, "rewards_train/rejected": -17.07086944580078, "step": 3050 }, { "epoch": 1.5, "learning_rate": 5.406633083491471e-07, "loss": 0.0, "step": 3051 }, { "epoch": 1.5, "logps_train/chosen": -72.10537719726562, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -294.5986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8566802740097046, "rewards_train/margins": 16.050450921058655, "rewards_train/rejected": -16.90713119506836, "step": 3051 }, { "epoch": 1.5, "learning_rate": 5.403928128993887e-07, "loss": 0.0001, "step": 3052 }, { "epoch": 1.5, "logps_train/chosen": -75.75326538085938, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -295.20806884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0923678874969482, "rewards_train/margins": 16.12687611579895, "rewards_train/rejected": -17.2192440032959, "step": 3052 }, { "epoch": 1.5, "learning_rate": 5.40122305549635e-07, "loss": 0.0, "step": 3053 }, { "epoch": 1.5, "logps_train/chosen": -75.80624389648438, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -298.0381774902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.36861252784729, "rewards_train/margins": 15.941159963607788, "rewards_train/rejected": -17.309772491455078, "step": 3053 }, { "epoch": 1.5, "learning_rate": 5.398517863795795e-07, "loss": 0.0, "step": 3054 }, { "epoch": 1.5, "logps_train/chosen": -73.12611389160156, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -295.21337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.032191514968872, "rewards_train/margins": 15.984459638595581, "rewards_train/rejected": -17.016651153564453, "step": 3054 }, { "epoch": 1.5, "learning_rate": 5.395812554689188e-07, "loss": 0.0, "step": 3055 }, { "epoch": 1.5, "logps_train/chosen": -67.46430206298828, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -286.683837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5030221343040466, "rewards_train/margins": 15.889338552951813, "rewards_train/rejected": -16.39236068725586, "step": 3055 }, { "epoch": 1.5, "learning_rate": 5.393107128973531e-07, "loss": 0.0, "step": 3056 }, { "epoch": 1.5, "logps_train/chosen": -74.70835876464844, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -302.37548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9598011374473572, "rewards_train/margins": 16.508800089359283, "rewards_train/rejected": -17.46860122680664, "step": 3056 }, { "epoch": 1.51, "learning_rate": 5.39040158744586e-07, "loss": 0.0, "step": 3057 }, { "epoch": 1.51, "logps_train/chosen": -71.59992218017578, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -295.3903503417969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6417309045791626, "rewards_train/margins": 16.40155255794525, "rewards_train/rejected": -17.043283462524414, "step": 3057 }, { "epoch": 1.51, "learning_rate": 5.387695930903249e-07, "loss": 0.0002, "step": 3058 }, { "epoch": 1.51, "logps_train/chosen": -73.60265350341797, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -118.25, "logps_train/rejected": -285.5079650878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0975216627120972, "rewards_train/margins": 15.630375981330872, "rewards_train/rejected": -16.72789764404297, "step": 3058 }, { "epoch": 1.51, "learning_rate": 5.3849901601428e-07, "loss": 0.0001, "step": 3059 }, { "epoch": 1.51, "logps_train/chosen": -73.84497833251953, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -307.68963623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0571540594100952, "rewards_train/margins": 17.00136148929596, "rewards_train/rejected": -18.058515548706055, "step": 3059 }, { "epoch": 1.51, "learning_rate": 5.382284275961652e-07, "loss": 0.0, "step": 3060 }, { "epoch": 1.51, "logps_train/chosen": -74.09309387207031, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -308.24835205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0305736064910889, "rewards_train/margins": 16.829270601272583, "rewards_train/rejected": -17.859844207763672, "step": 3060 }, { "epoch": 1.51, "learning_rate": 5.379578279156976e-07, "loss": 0.0001, "step": 3061 }, { "epoch": 1.51, "logps_train/chosen": -70.4588623046875, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -296.721923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7411497235298157, "rewards_train/margins": 16.543689906597137, "rewards_train/rejected": -17.284839630126953, "step": 3061 }, { "epoch": 1.51, "learning_rate": 5.376872170525977e-07, "loss": 0.0, "step": 3062 }, { "epoch": 1.51, "logps_train/chosen": -75.88682556152344, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -310.89227294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1042101383209229, "rewards_train/margins": 16.916606664657593, "rewards_train/rejected": -18.020816802978516, "step": 3062 }, { "epoch": 1.51, "learning_rate": 5.374165950865896e-07, "loss": 0.0001, "step": 3063 }, { "epoch": 1.51, "logps_train/chosen": -75.61492919921875, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -309.07470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9224799275398254, "rewards_train/margins": 16.954425513744354, "rewards_train/rejected": -17.87690544128418, "step": 3063 }, { "epoch": 1.51, "learning_rate": 5.371459620974e-07, "loss": 0.0, "step": 3064 }, { "epoch": 1.51, "logps_train/chosen": -72.23789978027344, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -291.8834228515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7840441465377808, "rewards_train/margins": 16.133304715156555, "rewards_train/rejected": -16.917348861694336, "step": 3064 }, { "epoch": 1.51, "learning_rate": 5.368753181647593e-07, "loss": 0.0, "step": 3065 }, { "epoch": 1.51, "logps_train/chosen": -67.84239196777344, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -282.18499755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.678820013999939, "rewards_train/margins": 15.31887972354889, "rewards_train/rejected": -15.997699737548828, "step": 3065 }, { "epoch": 1.51, "learning_rate": 5.366046633684012e-07, "loss": 0.0, "step": 3066 }, { "epoch": 1.51, "logps_train/chosen": -71.43512725830078, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -296.407470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8093814849853516, "rewards_train/margins": 16.20177459716797, "rewards_train/rejected": -17.01115608215332, "step": 3066 }, { "epoch": 1.51, "learning_rate": 5.363339977880619e-07, "loss": 0.0002, "step": 3067 }, { "epoch": 1.51, "logps_train/chosen": -74.33905029296875, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -293.35357666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9246770739555359, "rewards_train/margins": 15.920739948749542, "rewards_train/rejected": -16.845417022705078, "step": 3067 }, { "epoch": 1.51, "learning_rate": 5.360633215034819e-07, "loss": 0.0001, "step": 3068 }, { "epoch": 1.51, "logps_train/chosen": -73.49502563476562, "logps_train/ref_chosen": -60.0, "logps_train/ref_rejected": -119.4375, "logps_train/rejected": -283.37506103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3506746292114258, "rewards_train/margins": 15.04430103302002, "rewards_train/rejected": -16.394975662231445, "step": 3068 }, { "epoch": 1.51, "learning_rate": 5.357926345944041e-07, "loss": 0.0, "step": 3069 }, { "epoch": 1.51, "logps_train/chosen": -76.3420639038086, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -295.7842102050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1355737447738647, "rewards_train/margins": 15.976881623268127, "rewards_train/rejected": -17.112455368041992, "step": 3069 }, { "epoch": 1.51, "learning_rate": 5.355219371405745e-07, "loss": 0.0001, "step": 3070 }, { "epoch": 1.51, "logps_train/chosen": -75.394287109375, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -301.6773376464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0894770622253418, "rewards_train/margins": 16.27811098098755, "rewards_train/rejected": -17.36758804321289, "step": 3070 }, { "epoch": 1.51, "learning_rate": 5.352512292217426e-07, "loss": 0.0, "step": 3071 }, { "epoch": 1.51, "logps_train/chosen": -70.75175476074219, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -287.9137268066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6976847648620605, "rewards_train/margins": 15.799889087677002, "rewards_train/rejected": -16.497573852539062, "step": 3071 }, { "epoch": 1.51, "learning_rate": 5.349805109176609e-07, "loss": 0.0001, "step": 3072 }, { "epoch": 1.51, "logps_train/chosen": -63.82147979736328, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -283.9897766113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.18478485941886902, "rewards_train/margins": 16.20227912068367, "rewards_train/rejected": -16.38706398010254, "step": 3072 }, { "epoch": 1.51, "learning_rate": 5.347097823080842e-07, "loss": 0.0, "step": 3073 }, { "epoch": 1.51, "logps_train/chosen": -69.19862365722656, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -298.6640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6369515061378479, "rewards_train/margins": 16.692150056362152, "rewards_train/rejected": -17.3291015625, "step": 3073 }, { "epoch": 1.51, "learning_rate": 5.344390434727717e-07, "loss": 0.0, "step": 3074 }, { "epoch": 1.51, "logps_train/chosen": -68.99765014648438, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -118.0, "logps_train/rejected": -289.96112060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8077245950698853, "rewards_train/margins": 16.384873747825623, "rewards_train/rejected": -17.192598342895508, "step": 3074 }, { "epoch": 1.51, "learning_rate": 5.341682944914846e-07, "loss": 0.0001, "step": 3075 }, { "epoch": 1.51, "logps_train/chosen": -71.52500915527344, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -289.60205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9199811816215515, "rewards_train/margins": 15.827136814594269, "rewards_train/rejected": -16.74711799621582, "step": 3075 }, { "epoch": 1.51, "learning_rate": 5.338975354439873e-07, "loss": 0.0, "step": 3076 }, { "epoch": 1.51, "logps_train/chosen": -73.5258560180664, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -301.3034973144531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8580545783042908, "rewards_train/margins": 16.847199022769928, "rewards_train/rejected": -17.70525360107422, "step": 3076 }, { "epoch": 1.52, "learning_rate": 5.336267664100474e-07, "loss": 0.0, "step": 3077 }, { "epoch": 1.52, "logps_train/chosen": -75.91650390625, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -300.75421142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.067090392112732, "rewards_train/margins": 16.217465043067932, "rewards_train/rejected": -17.284555435180664, "step": 3077 }, { "epoch": 1.52, "learning_rate": 5.333559874694351e-07, "loss": 0.0, "step": 3078 }, { "epoch": 1.52, "logps_train/chosen": -73.20075225830078, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -288.55322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9587468504905701, "rewards_train/margins": 15.572114050388336, "rewards_train/rejected": -16.530860900878906, "step": 3078 }, { "epoch": 1.52, "learning_rate": 5.330851987019239e-07, "loss": 0.0001, "step": 3079 }, { "epoch": 1.52, "logps_train/chosen": -71.9898681640625, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -292.55078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8965939879417419, "rewards_train/margins": 16.05169779062271, "rewards_train/rejected": -16.948291778564453, "step": 3079 }, { "epoch": 1.52, "learning_rate": 5.328144001872899e-07, "loss": 0.0, "step": 3080 }, { "epoch": 1.52, "logps_train/chosen": -74.35261535644531, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -292.7928161621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0361649990081787, "rewards_train/margins": 15.798338174819946, "rewards_train/rejected": -16.834503173828125, "step": 3080 }, { "epoch": 1.52, "learning_rate": 5.325435920053123e-07, "loss": 0.0, "step": 3081 }, { "epoch": 1.52, "logps_train/chosen": -73.57662200927734, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -289.6429748535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1200647354125977, "rewards_train/margins": 15.801020622253418, "rewards_train/rejected": -16.921085357666016, "step": 3081 }, { "epoch": 1.52, "learning_rate": 5.322727742357727e-07, "loss": 0.0001, "step": 3082 }, { "epoch": 1.52, "logps_train/chosen": -71.64064025878906, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -291.95361328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9060080051422119, "rewards_train/margins": 16.070459127426147, "rewards_train/rejected": -16.97646713256836, "step": 3082 }, { "epoch": 1.52, "learning_rate": 5.320019469584561e-07, "loss": 0.0001, "step": 3083 }, { "epoch": 1.52, "logps_train/chosen": -79.34774017333984, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -301.966552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.322323203086853, "rewards_train/margins": 16.134685158729553, "rewards_train/rejected": -17.457008361816406, "step": 3083 }, { "epoch": 1.52, "learning_rate": 5.317311102531501e-07, "loss": 0.0, "step": 3084 }, { "epoch": 1.52, "logps_train/chosen": -76.56241607666016, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -292.300537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1507728099822998, "rewards_train/margins": 15.65931248664856, "rewards_train/rejected": -16.81008529663086, "step": 3084 }, { "epoch": 1.52, "learning_rate": 5.314602641996447e-07, "loss": 0.0, "step": 3085 }, { "epoch": 1.52, "logps_train/chosen": -75.44711303710938, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -292.61932373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0653165578842163, "rewards_train/margins": 15.762339234352112, "rewards_train/rejected": -16.827655792236328, "step": 3085 }, { "epoch": 1.52, "learning_rate": 5.311894088777333e-07, "loss": 0.0002, "step": 3086 }, { "epoch": 1.52, "logps_train/chosen": -75.42405700683594, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -305.7237548828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1306631565093994, "rewards_train/margins": 16.581945180892944, "rewards_train/rejected": -17.712608337402344, "step": 3086 }, { "epoch": 1.52, "learning_rate": 5.309185443672117e-07, "loss": 0.0, "step": 3087 }, { "epoch": 1.52, "logps_train/chosen": -74.41583251953125, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -295.54156494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1204901933670044, "rewards_train/margins": 15.84621512889862, "rewards_train/rejected": -16.966705322265625, "step": 3087 }, { "epoch": 1.52, "learning_rate": 5.30647670747878e-07, "loss": 0.0004, "step": 3088 }, { "epoch": 1.52, "logps_train/chosen": -74.66287994384766, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -295.44580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0991004705429077, "rewards_train/margins": 15.968137383460999, "rewards_train/rejected": -17.067237854003906, "step": 3088 }, { "epoch": 1.52, "learning_rate": 5.303767880995339e-07, "loss": 0.0, "step": 3089 }, { "epoch": 1.52, "logps_train/chosen": -76.78559875488281, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -308.83062744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1660106182098389, "rewards_train/margins": 16.82467007637024, "rewards_train/rejected": -17.990680694580078, "step": 3089 }, { "epoch": 1.52, "learning_rate": 5.30105896501983e-07, "loss": 0.0001, "step": 3090 }, { "epoch": 1.52, "logps_train/chosen": -78.23255920410156, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -301.32635498046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.388783574104309, "rewards_train/margins": 16.262015223503113, "rewards_train/rejected": -17.650798797607422, "step": 3090 }, { "epoch": 1.52, "learning_rate": 5.298349960350319e-07, "loss": 0.0, "step": 3091 }, { "epoch": 1.52, "logps_train/chosen": -75.4026870727539, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -304.60809326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1641943454742432, "rewards_train/margins": 16.788705587387085, "rewards_train/rejected": -17.952899932861328, "step": 3091 }, { "epoch": 1.52, "learning_rate": 5.295640867784897e-07, "loss": 0.0, "step": 3092 }, { "epoch": 1.52, "logps_train/chosen": -70.37527465820312, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -290.7520751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8213168978691101, "rewards_train/margins": 16.01736718416214, "rewards_train/rejected": -16.83868408203125, "step": 3092 }, { "epoch": 1.52, "learning_rate": 5.292931688121679e-07, "loss": 0.0001, "step": 3093 }, { "epoch": 1.52, "logps_train/chosen": -78.5833740234375, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -301.76019287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4170293807983398, "rewards_train/margins": 16.093613624572754, "rewards_train/rejected": -17.510643005371094, "step": 3093 }, { "epoch": 1.52, "learning_rate": 5.29022242215881e-07, "loss": 0.0004, "step": 3094 }, { "epoch": 1.52, "logps_train/chosen": -71.258056640625, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -290.2945556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.737963855266571, "rewards_train/margins": 16.03719598054886, "rewards_train/rejected": -16.77515983581543, "step": 3094 }, { "epoch": 1.52, "learning_rate": 5.287513070694457e-07, "loss": 0.0001, "step": 3095 }, { "epoch": 1.52, "logps_train/chosen": -78.4071044921875, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -311.1475830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1647334098815918, "rewards_train/margins": 17.056665897369385, "rewards_train/rejected": -18.221399307250977, "step": 3095 }, { "epoch": 1.52, "learning_rate": 5.284803634526812e-07, "loss": 0.0, "step": 3096 }, { "epoch": 1.52, "logps_train/chosen": -78.26624298095703, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -310.64349365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2049448490142822, "rewards_train/margins": 16.844075918197632, "rewards_train/rejected": -18.049020767211914, "step": 3096 }, { "epoch": 1.52, "learning_rate": 5.282094114454097e-07, "loss": 0.0, "step": 3097 }, { "epoch": 1.52, "logps_train/chosen": -79.46599578857422, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -306.48297119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4132009744644165, "rewards_train/margins": 16.33871018886566, "rewards_train/rejected": -17.751911163330078, "step": 3097 }, { "epoch": 1.53, "learning_rate": 5.279384511274551e-07, "loss": 0.0, "step": 3098 }, { "epoch": 1.53, "logps_train/chosen": -72.49763488769531, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -298.3333740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7855542302131653, "rewards_train/margins": 16.588749587535858, "rewards_train/rejected": -17.374303817749023, "step": 3098 }, { "epoch": 1.53, "learning_rate": 5.279384511274551e-07, "loss": 0.0015, "step": 3099 }, { "epoch": 1.53, "logps_train/chosen": -74.21502685546875, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -305.15423583984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9423031806945801, "rewards_train/margins": 16.904563426971436, "rewards_train/rejected": -17.846866607666016, "step": 3099 }, { "epoch": 1.53, "learning_rate": 5.27667482578644e-07, "loss": 0.0, "step": 3100 }, { "epoch": 1.53, "logps_train/chosen": -72.37098693847656, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -300.3395080566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7664934992790222, "rewards_train/margins": 16.540017426013947, "rewards_train/rejected": -17.30651092529297, "step": 3100 }, { "epoch": 1.53, "learning_rate": 5.273965058788063e-07, "loss": 0.0, "step": 3101 }, { "epoch": 1.53, "logps_train/chosen": -76.93672180175781, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -293.3022766113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4292681217193604, "rewards_train/margins": 15.565757513046265, "rewards_train/rejected": -16.995025634765625, "step": 3101 }, { "epoch": 1.53, "learning_rate": 5.271255211077729e-07, "loss": 0.0, "step": 3102 }, { "epoch": 1.53, "logps_train/chosen": -76.29000854492188, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -300.30535888671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9397183656692505, "rewards_train/margins": 16.28007686138153, "rewards_train/rejected": -17.21979522705078, "step": 3102 }, { "epoch": 1.53, "learning_rate": 5.268545283453779e-07, "loss": 0.0, "step": 3103 }, { "epoch": 1.53, "logps_train/chosen": -71.5550308227539, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -295.049072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7222995162010193, "rewards_train/margins": 16.41322523355484, "rewards_train/rejected": -17.13552474975586, "step": 3103 }, { "epoch": 1.53, "learning_rate": 5.265835276714578e-07, "loss": 0.0001, "step": 3104 }, { "epoch": 1.53, "logps_train/chosen": -74.57176208496094, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -289.8418273925781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.24482262134552, "rewards_train/margins": 15.59394919872284, "rewards_train/rejected": -16.83877182006836, "step": 3104 }, { "epoch": 1.53, "learning_rate": 5.263125191658509e-07, "loss": 0.0001, "step": 3105 }, { "epoch": 1.53, "logps_train/chosen": -72.64054870605469, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -293.27752685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8451336622238159, "rewards_train/margins": 16.11630928516388, "rewards_train/rejected": -16.961442947387695, "step": 3105 }, { "epoch": 1.53, "learning_rate": 5.260415029083982e-07, "loss": 0.0001, "step": 3106 }, { "epoch": 1.53, "logps_train/chosen": -72.60618591308594, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -296.25494384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.913499653339386, "rewards_train/margins": 16.358383357524872, "rewards_train/rejected": -17.271883010864258, "step": 3106 }, { "epoch": 1.53, "learning_rate": 5.257704789789434e-07, "loss": 0.0005, "step": 3107 }, { "epoch": 1.53, "logps_train/chosen": -74.15087890625, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -302.0791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8170900344848633, "rewards_train/margins": 16.324515342712402, "rewards_train/rejected": -17.141605377197266, "step": 3107 }, { "epoch": 1.53, "learning_rate": 5.254994474573313e-07, "loss": 0.0, "step": 3108 }, { "epoch": 1.53, "logps_train/chosen": -67.33040618896484, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -291.50189208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.597542405128479, "rewards_train/margins": 16.27413308620453, "rewards_train/rejected": -16.871675491333008, "step": 3108 }, { "epoch": 1.53, "learning_rate": 5.2522840842341e-07, "loss": 0.0, "step": 3109 }, { "epoch": 1.53, "logps_train/chosen": -79.6935806274414, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -305.59454345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3400124311447144, "rewards_train/margins": 16.434577107429504, "rewards_train/rejected": -17.77458953857422, "step": 3109 }, { "epoch": 1.53, "learning_rate": 5.249573619570294e-07, "loss": 0.0001, "step": 3110 }, { "epoch": 1.53, "logps_train/chosen": -71.23760223388672, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -305.42388916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6909477710723877, "rewards_train/margins": 16.85964274406433, "rewards_train/rejected": -17.55059051513672, "step": 3110 }, { "epoch": 1.53, "learning_rate": 5.246863081380415e-07, "loss": 0.0, "step": 3111 }, { "epoch": 1.53, "logps_train/chosen": -78.16558837890625, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -300.9420471191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2279363870620728, "rewards_train/margins": 16.48741090297699, "rewards_train/rejected": -17.715347290039062, "step": 3111 }, { "epoch": 1.53, "learning_rate": 5.244152470463006e-07, "loss": 0.0002, "step": 3112 }, { "epoch": 1.53, "logps_train/chosen": -78.6888427734375, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -301.4097900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4426629543304443, "rewards_train/margins": 16.4371817111969, "rewards_train/rejected": -17.879844665527344, "step": 3112 }, { "epoch": 1.53, "learning_rate": 5.241441787616632e-07, "loss": 0.0, "step": 3113 }, { "epoch": 1.53, "logps_train/chosen": -71.17835998535156, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -289.026123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8206676244735718, "rewards_train/margins": 16.070224404335022, "rewards_train/rejected": -16.890892028808594, "step": 3113 }, { "epoch": 1.53, "learning_rate": 5.238731033639879e-07, "loss": 0.0, "step": 3114 }, { "epoch": 1.53, "logps_train/chosen": -73.90206909179688, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -306.9971618652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7465550899505615, "rewards_train/margins": 17.25364899635315, "rewards_train/rejected": -18.00020408630371, "step": 3114 }, { "epoch": 1.53, "learning_rate": 5.236020209331352e-07, "loss": 0.0001, "step": 3115 }, { "epoch": 1.53, "logps_train/chosen": -75.65577697753906, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -118.5, "logps_train/rejected": -292.4498291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3536152839660645, "rewards_train/margins": 16.044785976409912, "rewards_train/rejected": -17.398401260375977, "step": 3115 }, { "epoch": 1.53, "learning_rate": 5.233309315489678e-07, "loss": 0.0, "step": 3116 }, { "epoch": 1.53, "logps_train/chosen": -80.51898956298828, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -312.29901123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4029730558395386, "rewards_train/margins": 16.968433499336243, "rewards_train/rejected": -18.37140655517578, "step": 3116 }, { "epoch": 1.53, "learning_rate": 5.230598352913505e-07, "loss": 0.0, "step": 3117 }, { "epoch": 1.53, "logps_train/chosen": -69.66383361816406, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -282.0736999511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6306415796279907, "rewards_train/margins": 15.589521765708923, "rewards_train/rejected": -16.220163345336914, "step": 3117 }, { "epoch": 1.54, "learning_rate": 5.227887322401503e-07, "loss": 0.0, "step": 3118 }, { "epoch": 1.54, "logps_train/chosen": -72.9344482421875, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -293.27667236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8421266078948975, "rewards_train/margins": 15.905069589614868, "rewards_train/rejected": -16.747196197509766, "step": 3118 }, { "epoch": 1.54, "learning_rate": 5.225176224752358e-07, "loss": 0.0001, "step": 3119 }, { "epoch": 1.54, "logps_train/chosen": -78.37238311767578, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -293.4942321777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3574528694152832, "rewards_train/margins": 15.892703533172607, "rewards_train/rejected": -17.25015640258789, "step": 3119 }, { "epoch": 1.54, "learning_rate": 5.222465060764776e-07, "loss": 0.0002, "step": 3120 }, { "epoch": 1.54, "logps_train/chosen": -74.71453857421875, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -284.53955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1107358932495117, "rewards_train/margins": 15.419537544250488, "rewards_train/rejected": -16.5302734375, "step": 3120 }, { "epoch": 1.54, "learning_rate": 5.219753831237487e-07, "loss": 0.0, "step": 3121 }, { "epoch": 1.54, "logps_train/chosen": -77.032958984375, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -305.1519470214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3056881427764893, "rewards_train/margins": 16.44271159172058, "rewards_train/rejected": -17.74839973449707, "step": 3121 }, { "epoch": 1.54, "learning_rate": 5.217042536969238e-07, "loss": 0.0, "step": 3122 }, { "epoch": 1.54, "logps_train/chosen": -72.35386657714844, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -298.3011169433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8046733140945435, "rewards_train/margins": 16.868307948112488, "rewards_train/rejected": -17.67298126220703, "step": 3122 }, { "epoch": 1.54, "learning_rate": 5.21433117875879e-07, "loss": 0.0, "step": 3123 }, { "epoch": 1.54, "logps_train/chosen": -76.00130462646484, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -309.6854248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.039681315422058, "rewards_train/margins": 16.91313660144806, "rewards_train/rejected": -17.952817916870117, "step": 3123 }, { "epoch": 1.54, "learning_rate": 5.211619757404933e-07, "loss": 0.0, "step": 3124 }, { "epoch": 1.54, "logps_train/chosen": -74.79449462890625, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -308.2337646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9292054176330566, "rewards_train/margins": 16.850030422210693, "rewards_train/rejected": -17.77923583984375, "step": 3124 }, { "epoch": 1.54, "learning_rate": 5.208908273706467e-07, "loss": 0.0, "step": 3125 }, { "epoch": 1.54, "logps_train/chosen": -83.0631103515625, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -302.2210998535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6153936386108398, "rewards_train/margins": 16.11668109893799, "rewards_train/rejected": -17.732074737548828, "step": 3125 }, { "epoch": 1.54, "learning_rate": 5.206196728462212e-07, "loss": 0.0, "step": 3126 }, { "epoch": 1.54, "logps_train/chosen": -71.49913787841797, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -304.7198486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7792347073554993, "rewards_train/margins": 17.10954588651657, "rewards_train/rejected": -17.88878059387207, "step": 3126 }, { "epoch": 1.54, "learning_rate": 5.203485122471012e-07, "loss": 0.0, "step": 3127 }, { "epoch": 1.54, "logps_train/chosen": -78.85379028320312, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -301.29833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.377859354019165, "rewards_train/margins": 16.239476919174194, "rewards_train/rejected": -17.61733627319336, "step": 3127 }, { "epoch": 1.54, "learning_rate": 5.200773456531721e-07, "loss": 0.0, "step": 3128 }, { "epoch": 1.54, "logps_train/chosen": -71.14312744140625, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -294.05828857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7144101858139038, "rewards_train/margins": 16.10997450351715, "rewards_train/rejected": -16.824384689331055, "step": 3128 }, { "epoch": 1.54, "learning_rate": 5.198061731443214e-07, "loss": 0.0001, "step": 3129 }, { "epoch": 1.54, "logps_train/chosen": -67.90116882324219, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -299.06158447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4236123263835907, "rewards_train/margins": 17.03782108426094, "rewards_train/rejected": -17.46143341064453, "step": 3129 }, { "epoch": 1.54, "learning_rate": 5.195349948004386e-07, "loss": 0.0, "step": 3130 }, { "epoch": 1.54, "logps_train/chosen": -77.94573974609375, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -306.7179260253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3750178813934326, "rewards_train/margins": 16.39281964302063, "rewards_train/rejected": -17.767837524414062, "step": 3130 }, { "epoch": 1.54, "learning_rate": 5.192638107014144e-07, "loss": 0.0, "step": 3131 }, { "epoch": 1.54, "logps_train/chosen": -74.09974670410156, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -291.974365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9822162389755249, "rewards_train/margins": 15.945104241371155, "rewards_train/rejected": -16.92732048034668, "step": 3131 }, { "epoch": 1.54, "learning_rate": 5.189926209271415e-07, "loss": 0.0, "step": 3132 }, { "epoch": 1.54, "logps_train/chosen": -72.2325439453125, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -297.82073974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7088012099266052, "rewards_train/margins": 16.60515981912613, "rewards_train/rejected": -17.313961029052734, "step": 3132 }, { "epoch": 1.54, "learning_rate": 5.187214255575146e-07, "loss": 0.0001, "step": 3133 }, { "epoch": 1.54, "logps_train/chosen": -73.3211669921875, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -305.0773010253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0141475200653076, "rewards_train/margins": 16.826884508132935, "rewards_train/rejected": -17.841032028198242, "step": 3133 }, { "epoch": 1.54, "learning_rate": 5.184502246724291e-07, "loss": 0.0001, "step": 3134 }, { "epoch": 1.54, "logps_train/chosen": -74.2900390625, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -303.9140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6428223848342896, "rewards_train/margins": 16.857471346855164, "rewards_train/rejected": -17.500293731689453, "step": 3134 }, { "epoch": 1.54, "learning_rate": 5.18179018351783e-07, "loss": 0.0, "step": 3135 }, { "epoch": 1.54, "logps_train/chosen": -78.95890045166016, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -306.6099853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2454016208648682, "rewards_train/margins": 16.498117208480835, "rewards_train/rejected": -17.743518829345703, "step": 3135 }, { "epoch": 1.54, "learning_rate": 5.179078066754756e-07, "loss": 0.0, "step": 3136 }, { "epoch": 1.54, "logps_train/chosen": -74.14356231689453, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -301.36138916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.013477087020874, "rewards_train/margins": 16.213825464248657, "rewards_train/rejected": -17.22730255126953, "step": 3136 }, { "epoch": 1.54, "learning_rate": 5.176365897234073e-07, "loss": 0.0, "step": 3137 }, { "epoch": 1.54, "logps_train/chosen": -77.80284118652344, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -296.7799072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3489367961883545, "rewards_train/margins": 15.783204317092896, "rewards_train/rejected": -17.13214111328125, "step": 3137 }, { "epoch": 1.55, "learning_rate": 5.173653675754807e-07, "loss": 0.0, "step": 3138 }, { "epoch": 1.55, "logps_train/chosen": -73.7066650390625, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -298.3566589355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9311158657073975, "rewards_train/margins": 16.367733240127563, "rewards_train/rejected": -17.29884910583496, "step": 3138 }, { "epoch": 1.55, "learning_rate": 5.170941403115997e-07, "loss": 0.0, "step": 3139 }, { "epoch": 1.55, "logps_train/chosen": -73.56575012207031, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -304.42791748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0509599447250366, "rewards_train/margins": 16.887001633644104, "rewards_train/rejected": -17.93796157836914, "step": 3139 }, { "epoch": 1.55, "learning_rate": 5.168229080116696e-07, "loss": 0.0, "step": 3140 }, { "epoch": 1.55, "logps_train/chosen": -72.04141998291016, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -311.0226745605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8979412317276001, "rewards_train/margins": 17.446903586387634, "rewards_train/rejected": -18.344844818115234, "step": 3140 }, { "epoch": 1.55, "learning_rate": 5.165516707555974e-07, "loss": 0.0, "step": 3141 }, { "epoch": 1.55, "logps_train/chosen": -74.23736572265625, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -300.11981201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8778864741325378, "rewards_train/margins": 16.43155413866043, "rewards_train/rejected": -17.30944061279297, "step": 3141 }, { "epoch": 1.55, "learning_rate": 5.16280428623291e-07, "loss": 0.0, "step": 3142 }, { "epoch": 1.55, "logps_train/chosen": -73.87427520751953, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -304.1092529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9939212799072266, "rewards_train/margins": 16.850210189819336, "rewards_train/rejected": -17.844131469726562, "step": 3142 }, { "epoch": 1.55, "learning_rate": 5.160091816946607e-07, "loss": 0.0, "step": 3143 }, { "epoch": 1.55, "logps_train/chosen": -74.5519027709961, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -299.70123291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2051897048950195, "rewards_train/margins": 16.44203472137451, "rewards_train/rejected": -17.64722442626953, "step": 3143 }, { "epoch": 1.55, "learning_rate": 5.157379300496174e-07, "loss": 0.0, "step": 3144 }, { "epoch": 1.55, "logps_train/chosen": -78.76676177978516, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -309.41949462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.227384328842163, "rewards_train/margins": 16.557728052139282, "rewards_train/rejected": -17.785112380981445, "step": 3144 }, { "epoch": 1.55, "learning_rate": 5.154666737680737e-07, "loss": 0.0, "step": 3145 }, { "epoch": 1.55, "logps_train/chosen": -69.51344299316406, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -290.3603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5297138094902039, "rewards_train/margins": 16.044993221759796, "rewards_train/rejected": -16.57470703125, "step": 3145 }, { "epoch": 1.55, "learning_rate": 5.151954129299436e-07, "loss": 0.0001, "step": 3146 }, { "epoch": 1.55, "logps_train/chosen": -72.95228576660156, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -309.0725402832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9209609627723694, "rewards_train/margins": 17.178089559078217, "rewards_train/rejected": -18.099050521850586, "step": 3146 }, { "epoch": 1.55, "learning_rate": 5.149241476151423e-07, "loss": 0.0, "step": 3147 }, { "epoch": 1.55, "logps_train/chosen": -75.70857238769531, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -302.60296630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1007896661758423, "rewards_train/margins": 16.451305747032166, "rewards_train/rejected": -17.552095413208008, "step": 3147 }, { "epoch": 1.55, "learning_rate": 5.146528779035863e-07, "loss": 0.0, "step": 3148 }, { "epoch": 1.55, "logps_train/chosen": -70.72528076171875, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -296.89495849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.461443692445755, "rewards_train/margins": 16.59025689959526, "rewards_train/rejected": -17.051700592041016, "step": 3148 }, { "epoch": 1.55, "learning_rate": 5.143816038751937e-07, "loss": 0.0, "step": 3149 }, { "epoch": 1.55, "logps_train/chosen": -74.20352172851562, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -305.7796630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7793366312980652, "rewards_train/margins": 16.869238197803497, "rewards_train/rejected": -17.648574829101562, "step": 3149 }, { "epoch": 1.55, "learning_rate": 5.141103256098836e-07, "loss": 0.0, "step": 3150 }, { "epoch": 1.55, "logps_train/chosen": -74.48318481445312, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -297.2689514160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0371849536895752, "rewards_train/margins": 16.726135969161987, "rewards_train/rejected": -17.763320922851562, "step": 3150 }, { "epoch": 1.55, "learning_rate": 5.138390431875763e-07, "loss": 0.0, "step": 3151 }, { "epoch": 1.55, "logps_train/chosen": -74.7149429321289, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -294.4209899902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1104594469070435, "rewards_train/margins": 16.202536463737488, "rewards_train/rejected": -17.31299591064453, "step": 3151 }, { "epoch": 1.55, "learning_rate": 5.135677566881934e-07, "loss": 0.0, "step": 3152 }, { "epoch": 1.55, "logps_train/chosen": -81.1619873046875, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -299.23779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5884648561477661, "rewards_train/margins": 15.865636706352234, "rewards_train/rejected": -17.4541015625, "step": 3152 }, { "epoch": 1.55, "learning_rate": 5.132964661916578e-07, "loss": 0.0001, "step": 3153 }, { "epoch": 1.55, "logps_train/chosen": -74.89863586425781, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -292.446533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.101240873336792, "rewards_train/margins": 15.939799547195435, "rewards_train/rejected": -17.041040420532227, "step": 3153 }, { "epoch": 1.55, "learning_rate": 5.130251717778938e-07, "loss": 0.0, "step": 3154 }, { "epoch": 1.55, "logps_train/chosen": -81.1635971069336, "logps_train/ref_chosen": -69.0, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -307.58453369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2158225774765015, "rewards_train/margins": 16.552053093910217, "rewards_train/rejected": -17.76787567138672, "step": 3154 }, { "epoch": 1.55, "learning_rate": 5.127538735268262e-07, "loss": 0.0, "step": 3155 }, { "epoch": 1.55, "logps_train/chosen": -68.93806457519531, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -293.13067626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.598909318447113, "rewards_train/margins": 16.53212743997574, "rewards_train/rejected": -17.13103675842285, "step": 3155 }, { "epoch": 1.55, "learning_rate": 5.124825715183814e-07, "loss": 0.0, "step": 3156 }, { "epoch": 1.55, "logps_train/chosen": -77.02590942382812, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -309.8753356933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3007349967956543, "rewards_train/margins": 16.938557147979736, "rewards_train/rejected": -18.23929214477539, "step": 3156 }, { "epoch": 1.55, "learning_rate": 5.122112658324867e-07, "loss": 0.0, "step": 3157 }, { "epoch": 1.55, "logps_train/chosen": -76.94353485107422, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -298.3848876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.324383020401001, "rewards_train/margins": 16.31039786338806, "rewards_train/rejected": -17.634780883789062, "step": 3157 }, { "epoch": 1.55, "learning_rate": 5.119399565490706e-07, "loss": 0.0, "step": 3158 }, { "epoch": 1.55, "logps_train/chosen": -79.95044708251953, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -311.94171142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2148692607879639, "rewards_train/margins": 17.11367917060852, "rewards_train/rejected": -18.328548431396484, "step": 3158 }, { "epoch": 1.56, "learning_rate": 5.116686437480629e-07, "loss": 0.0, "step": 3159 }, { "epoch": 1.56, "logps_train/chosen": -78.37104797363281, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -300.69451904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1509721279144287, "rewards_train/margins": 16.312673807144165, "rewards_train/rejected": -17.463645935058594, "step": 3159 }, { "epoch": 1.56, "learning_rate": 5.113973275093941e-07, "loss": 0.0, "step": 3160 }, { "epoch": 1.56, "logps_train/chosen": -73.49526977539062, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -306.795654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0886133909225464, "rewards_train/margins": 17.03392231464386, "rewards_train/rejected": -18.122535705566406, "step": 3160 }, { "epoch": 1.56, "learning_rate": 5.111260079129955e-07, "loss": 0.0001, "step": 3161 }, { "epoch": 1.56, "logps_train/chosen": -76.35204315185547, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -305.7625732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1079583168029785, "rewards_train/margins": 16.66459035873413, "rewards_train/rejected": -17.77254867553711, "step": 3161 }, { "epoch": 1.56, "learning_rate": 5.108546850388001e-07, "loss": 0.0, "step": 3162 }, { "epoch": 1.56, "logps_train/chosen": -75.30523681640625, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -298.8446044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0858951807022095, "rewards_train/margins": 16.3419269323349, "rewards_train/rejected": -17.42782211303711, "step": 3162 }, { "epoch": 1.56, "learning_rate": 5.105833589667411e-07, "loss": 0.0, "step": 3163 }, { "epoch": 1.56, "logps_train/chosen": -72.37734985351562, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -293.4683837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8661046028137207, "rewards_train/margins": 16.12775754928589, "rewards_train/rejected": -16.99386215209961, "step": 3163 }, { "epoch": 1.56, "learning_rate": 5.103120297767532e-07, "loss": 0.0, "step": 3164 }, { "epoch": 1.56, "logps_train/chosen": -80.50574493408203, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -309.7080383300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3559457063674927, "rewards_train/margins": 16.652650713920593, "rewards_train/rejected": -18.008596420288086, "step": 3164 }, { "epoch": 1.56, "learning_rate": 5.100406975487718e-07, "loss": 0.0003, "step": 3165 }, { "epoch": 1.56, "logps_train/chosen": -76.78654479980469, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -297.4705810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3014323711395264, "rewards_train/margins": 15.88249135017395, "rewards_train/rejected": -17.183923721313477, "step": 3165 }, { "epoch": 1.56, "learning_rate": 5.09769362362733e-07, "loss": 0.0001, "step": 3166 }, { "epoch": 1.56, "logps_train/chosen": -71.770263671875, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -297.904541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.935668408870697, "rewards_train/margins": 16.596775591373444, "rewards_train/rejected": -17.53244400024414, "step": 3166 }, { "epoch": 1.56, "learning_rate": 5.094980242985742e-07, "loss": 0.0, "step": 3167 }, { "epoch": 1.56, "logps_train/chosen": -76.96563720703125, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -310.06585693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2713682651519775, "rewards_train/margins": 16.825258016586304, "rewards_train/rejected": -18.09662628173828, "step": 3167 }, { "epoch": 1.56, "learning_rate": 5.092266834362333e-07, "loss": 0.0, "step": 3168 }, { "epoch": 1.56, "logps_train/chosen": -75.8788833618164, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -314.2178649902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1157207489013672, "rewards_train/margins": 17.51402473449707, "rewards_train/rejected": -18.629745483398438, "step": 3168 }, { "epoch": 1.56, "learning_rate": 5.089553398556492e-07, "loss": 0.0, "step": 3169 }, { "epoch": 1.56, "logps_train/chosen": -78.0792007446289, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -305.88726806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.223447561264038, "rewards_train/margins": 16.572019815444946, "rewards_train/rejected": -17.795467376708984, "step": 3169 }, { "epoch": 1.56, "learning_rate": 5.086839936367616e-07, "loss": 0.0, "step": 3170 }, { "epoch": 1.56, "logps_train/chosen": -72.4722900390625, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -298.4755554199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6922975778579712, "rewards_train/margins": 16.72781503200531, "rewards_train/rejected": -17.42011260986328, "step": 3170 }, { "epoch": 1.56, "learning_rate": 5.08412644859511e-07, "loss": 0.0, "step": 3171 }, { "epoch": 1.56, "logps_train/chosen": -71.05451965332031, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -284.3983154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8772779703140259, "rewards_train/margins": 15.585112929344177, "rewards_train/rejected": -16.462390899658203, "step": 3171 }, { "epoch": 1.56, "learning_rate": 5.081412936038383e-07, "loss": 0.0, "step": 3172 }, { "epoch": 1.56, "logps_train/chosen": -74.10942077636719, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -302.1976318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0992233753204346, "rewards_train/margins": 16.591339826583862, "rewards_train/rejected": -17.690563201904297, "step": 3172 }, { "epoch": 1.56, "learning_rate": 5.078699399496857e-07, "loss": 0.0, "step": 3173 }, { "epoch": 1.56, "logps_train/chosen": -69.23501586914062, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -302.08294677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7793358564376831, "rewards_train/margins": 16.720269322395325, "rewards_train/rejected": -17.499605178833008, "step": 3173 }, { "epoch": 1.56, "learning_rate": 5.075985839769954e-07, "loss": 0.0, "step": 3174 }, { "epoch": 1.56, "logps_train/chosen": -74.55692291259766, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -299.0819396972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8891155123710632, "rewards_train/margins": 16.436510860919952, "rewards_train/rejected": -17.325626373291016, "step": 3174 }, { "epoch": 1.56, "learning_rate": 5.073272257657112e-07, "loss": 0.0008, "step": 3175 }, { "epoch": 1.56, "logps_train/chosen": -73.20086669921875, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -284.135009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.775604784488678, "rewards_train/margins": 15.521150052547455, "rewards_train/rejected": -16.296754837036133, "step": 3175 }, { "epoch": 1.56, "learning_rate": 5.070558653957769e-07, "loss": 0.0001, "step": 3176 }, { "epoch": 1.56, "logps_train/chosen": -75.98167419433594, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -294.4191589355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1977770328521729, "rewards_train/margins": 16.0114266872406, "rewards_train/rejected": -17.209203720092773, "step": 3176 }, { "epoch": 1.56, "learning_rate": 5.06784502947137e-07, "loss": 0.0, "step": 3177 }, { "epoch": 1.56, "logps_train/chosen": -69.61395263671875, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -295.95501708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7139346599578857, "rewards_train/margins": 16.430737733840942, "rewards_train/rejected": -17.144672393798828, "step": 3177 }, { "epoch": 1.56, "learning_rate": 5.065131384997367e-07, "loss": 0.0, "step": 3178 }, { "epoch": 1.56, "logps_train/chosen": -73.75281524658203, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -301.50335693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0692758560180664, "rewards_train/margins": 16.51895236968994, "rewards_train/rejected": -17.588228225708008, "step": 3178 }, { "epoch": 1.57, "learning_rate": 5.062417721335218e-07, "loss": 0.0, "step": 3179 }, { "epoch": 1.57, "logps_train/chosen": -76.45726013183594, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -305.384521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9382063150405884, "rewards_train/margins": 16.96958100795746, "rewards_train/rejected": -17.907787322998047, "step": 3179 }, { "epoch": 1.57, "learning_rate": 5.059704039284387e-07, "loss": 0.0, "step": 3180 }, { "epoch": 1.57, "logps_train/chosen": -78.84024047851562, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -300.2625427246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3018461465835571, "rewards_train/margins": 16.163859724998474, "rewards_train/rejected": -17.46570587158203, "step": 3180 }, { "epoch": 1.57, "learning_rate": 5.056990339644343e-07, "loss": 0.0, "step": 3181 }, { "epoch": 1.57, "logps_train/chosen": -72.00064086914062, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -294.84686279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0382720232009888, "rewards_train/margins": 16.055840373039246, "rewards_train/rejected": -17.094112396240234, "step": 3181 }, { "epoch": 1.57, "learning_rate": 5.054276623214562e-07, "loss": 0.0, "step": 3182 }, { "epoch": 1.57, "logps_train/chosen": -75.95796203613281, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -303.8739318847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0382276773452759, "rewards_train/margins": 16.782416701316833, "rewards_train/rejected": -17.82064437866211, "step": 3182 }, { "epoch": 1.57, "learning_rate": 5.051562890794521e-07, "loss": 0.0, "step": 3183 }, { "epoch": 1.57, "logps_train/chosen": -74.94134521484375, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -301.3495178222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1231136322021484, "rewards_train/margins": 16.450510025024414, "rewards_train/rejected": -17.573623657226562, "step": 3183 }, { "epoch": 1.57, "learning_rate": 5.048849143183704e-07, "loss": 0.0, "step": 3184 }, { "epoch": 1.57, "logps_train/chosen": -70.64651489257812, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -298.900634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6708030700683594, "rewards_train/margins": 16.5347900390625, "rewards_train/rejected": -17.20559310913086, "step": 3184 }, { "epoch": 1.57, "learning_rate": 5.046135381181601e-07, "loss": 0.0, "step": 3185 }, { "epoch": 1.57, "logps_train/chosen": -77.6527099609375, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -310.773193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9419798851013184, "rewards_train/margins": 17.10159921646118, "rewards_train/rejected": -18.0435791015625, "step": 3185 }, { "epoch": 1.57, "learning_rate": 5.043421605587702e-07, "loss": 0.0, "step": 3186 }, { "epoch": 1.57, "logps_train/chosen": -71.32229614257812, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -304.6409912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8025422096252441, "rewards_train/margins": 16.990708827972412, "rewards_train/rejected": -17.793251037597656, "step": 3186 }, { "epoch": 1.57, "learning_rate": 5.040707817201504e-07, "loss": 0.0001, "step": 3187 }, { "epoch": 1.57, "logps_train/chosen": -77.62550354003906, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -305.25213623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9457041025161743, "rewards_train/margins": 16.579312682151794, "rewards_train/rejected": -17.52501678466797, "step": 3187 }, { "epoch": 1.57, "learning_rate": 5.037994016822513e-07, "loss": 0.0, "step": 3188 }, { "epoch": 1.57, "logps_train/chosen": -70.55378723144531, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -290.76873779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8403884172439575, "rewards_train/margins": 16.197617411613464, "rewards_train/rejected": -17.038005828857422, "step": 3188 }, { "epoch": 1.57, "learning_rate": 5.035280205250223e-07, "loss": 0.0, "step": 3189 }, { "epoch": 1.57, "logps_train/chosen": -77.53115844726562, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -304.40960693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8939844369888306, "rewards_train/margins": 16.467286467552185, "rewards_train/rejected": -17.361270904541016, "step": 3189 }, { "epoch": 1.57, "learning_rate": 5.032566383284148e-07, "loss": 0.0, "step": 3190 }, { "epoch": 1.57, "logps_train/chosen": -71.29751586914062, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -296.0858154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9542140364646912, "rewards_train/margins": 16.027219831943512, "rewards_train/rejected": -16.981433868408203, "step": 3190 }, { "epoch": 1.57, "learning_rate": 5.029852551723799e-07, "loss": 0.0001, "step": 3191 }, { "epoch": 1.57, "logps_train/chosen": -73.65133666992188, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -291.4896240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0615932941436768, "rewards_train/margins": 15.81339716911316, "rewards_train/rejected": -16.874990463256836, "step": 3191 }, { "epoch": 1.57, "learning_rate": 5.027138711368683e-07, "loss": 0.0, "step": 3192 }, { "epoch": 1.57, "logps_train/chosen": -72.57933044433594, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -304.442138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.819212019443512, "rewards_train/margins": 16.839553773403168, "rewards_train/rejected": -17.65876579284668, "step": 3192 }, { "epoch": 1.57, "learning_rate": 5.024424863018323e-07, "loss": 0.0, "step": 3193 }, { "epoch": 1.57, "logps_train/chosen": -76.43632507324219, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -307.318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0782766342163086, "rewards_train/margins": 16.56147289276123, "rewards_train/rejected": -17.63974952697754, "step": 3193 }, { "epoch": 1.57, "learning_rate": 5.021711007472233e-07, "loss": 0.0, "step": 3194 }, { "epoch": 1.57, "logps_train/chosen": -74.08087921142578, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -295.32373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8900702595710754, "rewards_train/margins": 15.989373862743378, "rewards_train/rejected": -16.879444122314453, "step": 3194 }, { "epoch": 1.57, "learning_rate": 5.018997145529931e-07, "loss": 0.0, "step": 3195 }, { "epoch": 1.57, "logps_train/chosen": -81.09782409667969, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -297.695068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5461103916168213, "rewards_train/margins": 15.712507009506226, "rewards_train/rejected": -17.258617401123047, "step": 3195 }, { "epoch": 1.57, "learning_rate": 5.016283277990945e-07, "loss": 0.0, "step": 3196 }, { "epoch": 1.57, "logps_train/chosen": -74.63748168945312, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -306.5416259765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0268340110778809, "rewards_train/margins": 16.852619647979736, "rewards_train/rejected": -17.879453659057617, "step": 3196 }, { "epoch": 1.57, "learning_rate": 5.013569405654794e-07, "loss": 0.0, "step": 3197 }, { "epoch": 1.57, "logps_train/chosen": -74.43637084960938, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -304.45355224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.052743673324585, "rewards_train/margins": 16.59261155128479, "rewards_train/rejected": -17.645355224609375, "step": 3197 }, { "epoch": 1.57, "learning_rate": 5.010855529321004e-07, "loss": 0.0007, "step": 3198 }, { "epoch": 1.57, "logps_train/chosen": -71.96235656738281, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -287.005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8524366021156311, "rewards_train/margins": 15.618121564388275, "rewards_train/rejected": -16.470558166503906, "step": 3198 }, { "epoch": 1.58, "learning_rate": 5.008141649789102e-07, "loss": 0.0, "step": 3199 }, { "epoch": 1.58, "logps_train/chosen": -72.4034652709961, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -293.4765319824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8248193860054016, "rewards_train/margins": 16.107158839702606, "rewards_train/rejected": -16.931978225708008, "step": 3199 }, { "epoch": 1.58, "learning_rate": 5.005427767858616e-07, "loss": 0.0, "step": 3200 }, { "epoch": 1.58, "logps_train/chosen": -73.57992553710938, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -301.3006591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0973484516143799, "rewards_train/margins": 16.333496809005737, "rewards_train/rejected": -17.430845260620117, "step": 3200 }, { "epoch": 1.58, "learning_rate": 5.002713884329072e-07, "loss": 0.0002, "step": 3201 }, { "epoch": 1.58, "logps_train/chosen": -71.00715637207031, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -296.15399169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7813308238983154, "rewards_train/margins": 16.29691243171692, "rewards_train/rejected": -17.078243255615234, "step": 3201 }, { "epoch": 1.58, "learning_rate": 5e-07, "loss": 0.0, "step": 3202 }, { "epoch": 1.58, "logps_train/chosen": -67.19761657714844, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -301.88458251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4957383871078491, "rewards_train/margins": 17.136520981788635, "rewards_train/rejected": -17.632259368896484, "step": 3202 }, { "epoch": 1.58, "learning_rate": 4.997286115670928e-07, "loss": 0.0, "step": 3203 }, { "epoch": 1.58, "logps_train/chosen": -68.98036193847656, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -298.23284912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6844135522842407, "rewards_train/margins": 16.39932096004486, "rewards_train/rejected": -17.0837345123291, "step": 3203 }, { "epoch": 1.58, "learning_rate": 4.994572232141384e-07, "loss": 0.0, "step": 3204 }, { "epoch": 1.58, "logps_train/chosen": -68.33281707763672, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -289.77734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48977604508399963, "rewards_train/margins": 15.917008012533188, "rewards_train/rejected": -16.406784057617188, "step": 3204 }, { "epoch": 1.58, "learning_rate": 4.991858350210897e-07, "loss": 0.0, "step": 3205 }, { "epoch": 1.58, "logps_train/chosen": -74.03856658935547, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -304.18701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9384760856628418, "rewards_train/margins": 16.751417636871338, "rewards_train/rejected": -17.68989372253418, "step": 3205 }, { "epoch": 1.58, "learning_rate": 4.989144470678996e-07, "loss": 0.0, "step": 3206 }, { "epoch": 1.58, "logps_train/chosen": -79.07087707519531, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -309.82269287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3019604682922363, "rewards_train/margins": 16.80775022506714, "rewards_train/rejected": -18.109710693359375, "step": 3206 }, { "epoch": 1.58, "learning_rate": 4.986430594345206e-07, "loss": 0.0, "step": 3207 }, { "epoch": 1.58, "logps_train/chosen": -77.70085144042969, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -302.12689208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2549476623535156, "rewards_train/margins": 16.25930404663086, "rewards_train/rejected": -17.514251708984375, "step": 3207 }, { "epoch": 1.58, "learning_rate": 4.983716722009054e-07, "loss": 0.0, "step": 3208 }, { "epoch": 1.58, "logps_train/chosen": -73.20317840576172, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -293.114501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0005426406860352, "rewards_train/margins": 16.14718723297119, "rewards_train/rejected": -17.147729873657227, "step": 3208 }, { "epoch": 1.58, "learning_rate": 4.981002854470068e-07, "loss": 0.0001, "step": 3209 }, { "epoch": 1.58, "logps_train/chosen": -73.98524475097656, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -299.0867919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9894185066223145, "rewards_train/margins": 16.49689817428589, "rewards_train/rejected": -17.486316680908203, "step": 3209 }, { "epoch": 1.58, "learning_rate": 4.978288992527767e-07, "loss": 0.0, "step": 3210 }, { "epoch": 1.58, "logps_train/chosen": -75.1568374633789, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -299.4934387207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.161996841430664, "rewards_train/margins": 16.485445022583008, "rewards_train/rejected": -17.647441864013672, "step": 3210 }, { "epoch": 1.58, "learning_rate": 4.975575136981678e-07, "loss": 0.0001, "step": 3211 }, { "epoch": 1.58, "logps_train/chosen": -79.59745788574219, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -308.3641662597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2283986806869507, "rewards_train/margins": 16.43233358860016, "rewards_train/rejected": -17.66073226928711, "step": 3211 }, { "epoch": 1.58, "learning_rate": 4.972861288631316e-07, "loss": 0.0, "step": 3212 }, { "epoch": 1.58, "logps_train/chosen": -76.14270782470703, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -296.5726013183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0705212354660034, "rewards_train/margins": 16.326584935188293, "rewards_train/rejected": -17.397106170654297, "step": 3212 }, { "epoch": 1.58, "learning_rate": 4.970147448276201e-07, "loss": 0.0, "step": 3213 }, { "epoch": 1.58, "logps_train/chosen": -75.38810729980469, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -305.1162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0647624731063843, "rewards_train/margins": 16.571855187416077, "rewards_train/rejected": -17.63661766052246, "step": 3213 }, { "epoch": 1.58, "learning_rate": 4.967433616715851e-07, "loss": 0.0, "step": 3214 }, { "epoch": 1.58, "logps_train/chosen": -74.99909973144531, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -307.17059326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8926833271980286, "rewards_train/margins": 16.8966423869133, "rewards_train/rejected": -17.789325714111328, "step": 3214 }, { "epoch": 1.58, "learning_rate": 4.964719794749777e-07, "loss": 0.0, "step": 3215 }, { "epoch": 1.58, "logps_train/chosen": -70.68140411376953, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -294.711181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7701422572135925, "rewards_train/margins": 16.270602524280548, "rewards_train/rejected": -17.04074478149414, "step": 3215 }, { "epoch": 1.58, "learning_rate": 4.96200598317749e-07, "loss": 0.0, "step": 3216 }, { "epoch": 1.58, "logps_train/chosen": -76.86911010742188, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -299.5228576660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2807096242904663, "rewards_train/margins": 16.21610701084137, "rewards_train/rejected": -17.496816635131836, "step": 3216 }, { "epoch": 1.58, "learning_rate": 4.959292182798495e-07, "loss": 0.0, "step": 3217 }, { "epoch": 1.58, "logps_train/chosen": -76.41600799560547, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -304.0096435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1206046342849731, "rewards_train/margins": 16.722744822502136, "rewards_train/rejected": -17.84334945678711, "step": 3217 }, { "epoch": 1.58, "learning_rate": 4.956578394412298e-07, "loss": 0.0, "step": 3218 }, { "epoch": 1.58, "logps_train/chosen": -70.611083984375, "logps_train/ref_chosen": -60.6875, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -293.94561767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9925538301467896, "rewards_train/margins": 16.22339427471161, "rewards_train/rejected": -17.2159481048584, "step": 3218 }, { "epoch": 1.58, "learning_rate": 4.953864618818399e-07, "loss": 0.0002, "step": 3219 }, { "epoch": 1.58, "logps_train/chosen": -80.74032592773438, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -307.042724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5632905960083008, "rewards_train/margins": 16.294398307800293, "rewards_train/rejected": -17.857688903808594, "step": 3219 }, { "epoch": 1.59, "learning_rate": 4.951150856816296e-07, "loss": 0.0002, "step": 3220 }, { "epoch": 1.59, "logps_train/chosen": -79.04844665527344, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -309.0533752441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2247179746627808, "rewards_train/margins": 16.455324292182922, "rewards_train/rejected": -17.680042266845703, "step": 3220 }, { "epoch": 1.59, "learning_rate": 4.948437109205479e-07, "loss": 0.0, "step": 3221 }, { "epoch": 1.59, "logps_train/chosen": -71.58063507080078, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -304.47979736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8118230700492859, "rewards_train/margins": 16.915159046649933, "rewards_train/rejected": -17.72698211669922, "step": 3221 }, { "epoch": 1.59, "learning_rate": 4.945723376785437e-07, "loss": 0.0, "step": 3222 }, { "epoch": 1.59, "logps_train/chosen": -79.61184692382812, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -313.56011962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3918975591659546, "rewards_train/margins": 16.895070672035217, "rewards_train/rejected": -18.286968231201172, "step": 3222 }, { "epoch": 1.59, "learning_rate": 4.943009660355657e-07, "loss": 0.0, "step": 3223 }, { "epoch": 1.59, "logps_train/chosen": -76.69790649414062, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -304.99200439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2273590564727783, "rewards_train/margins": 16.652260541915894, "rewards_train/rejected": -17.879619598388672, "step": 3223 }, { "epoch": 1.59, "learning_rate": 4.940295960715612e-07, "loss": 0.0, "step": 3224 }, { "epoch": 1.59, "logps_train/chosen": -74.00581359863281, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -311.3442687988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1659622192382812, "rewards_train/margins": 17.47046661376953, "rewards_train/rejected": -18.636428833007812, "step": 3224 }, { "epoch": 1.59, "learning_rate": 4.937582278664782e-07, "loss": 0.0, "step": 3225 }, { "epoch": 1.59, "logps_train/chosen": -71.60311889648438, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -292.88775634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9528900384902954, "rewards_train/margins": 15.888227820396423, "rewards_train/rejected": -16.84111785888672, "step": 3225 }, { "epoch": 1.59, "learning_rate": 4.934868615002635e-07, "loss": 0.0, "step": 3226 }, { "epoch": 1.59, "logps_train/chosen": -73.60242462158203, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -280.61248779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8367076516151428, "rewards_train/margins": 15.124347150325775, "rewards_train/rejected": -15.961054801940918, "step": 3226 }, { "epoch": 1.59, "learning_rate": 4.932154970528631e-07, "loss": 0.0, "step": 3227 }, { "epoch": 1.59, "logps_train/chosen": -76.75469970703125, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -286.8477783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1140680313110352, "rewards_train/margins": 15.137703895568848, "rewards_train/rejected": -16.251771926879883, "step": 3227 }, { "epoch": 1.59, "learning_rate": 4.929441346042233e-07, "loss": 0.0002, "step": 3228 }, { "epoch": 1.59, "logps_train/chosen": -72.25946044921875, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -288.90386962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0112972259521484, "rewards_train/margins": 15.749597549438477, "rewards_train/rejected": -16.760894775390625, "step": 3228 }, { "epoch": 1.59, "learning_rate": 4.926727742342889e-07, "loss": 0.0001, "step": 3229 }, { "epoch": 1.59, "logps_train/chosen": -69.4557113647461, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -288.9351501464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6541162729263306, "rewards_train/margins": 15.988030791282654, "rewards_train/rejected": -16.642147064208984, "step": 3229 }, { "epoch": 1.59, "learning_rate": 4.924014160230045e-07, "loss": 0.0, "step": 3230 }, { "epoch": 1.59, "logps_train/chosen": -72.389892578125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -304.22955322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7364016175270081, "rewards_train/margins": 16.97942441701889, "rewards_train/rejected": -17.7158260345459, "step": 3230 }, { "epoch": 1.59, "learning_rate": 4.921300600503145e-07, "loss": 0.0, "step": 3231 }, { "epoch": 1.59, "logps_train/chosen": -74.38287353515625, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -297.5909423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03828763961792, "rewards_train/margins": 16.364655017852783, "rewards_train/rejected": -17.402942657470703, "step": 3231 }, { "epoch": 1.59, "learning_rate": 4.918587063961618e-07, "loss": 0.0, "step": 3232 }, { "epoch": 1.59, "logps_train/chosen": -72.78968811035156, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -299.5006103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8718397617340088, "rewards_train/margins": 16.376561880111694, "rewards_train/rejected": -17.248401641845703, "step": 3232 }, { "epoch": 1.59, "learning_rate": 4.915873551404892e-07, "loss": 0.0, "step": 3233 }, { "epoch": 1.59, "logps_train/chosen": -75.27458190917969, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -307.50958251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7715011835098267, "rewards_train/margins": 17.008022665977478, "rewards_train/rejected": -17.779523849487305, "step": 3233 }, { "epoch": 1.59, "learning_rate": 4.913160063632384e-07, "loss": 0.0, "step": 3234 }, { "epoch": 1.59, "logps_train/chosen": -76.3849105834961, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -309.64691162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2161767482757568, "rewards_train/margins": 16.949690103530884, "rewards_train/rejected": -18.16586685180664, "step": 3234 }, { "epoch": 1.59, "learning_rate": 4.910446601443508e-07, "loss": 0.0, "step": 3235 }, { "epoch": 1.59, "logps_train/chosen": -74.46540832519531, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -292.886474609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1487623453140259, "rewards_train/margins": 15.934565901756287, "rewards_train/rejected": -17.083328247070312, "step": 3235 }, { "epoch": 1.59, "learning_rate": 4.907733165637667e-07, "loss": 0.0, "step": 3236 }, { "epoch": 1.59, "logps_train/chosen": -75.59618377685547, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -303.44207763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9326654672622681, "rewards_train/margins": 16.4971364736557, "rewards_train/rejected": -17.42980194091797, "step": 3236 }, { "epoch": 1.59, "learning_rate": 4.905019757014258e-07, "loss": 0.0, "step": 3237 }, { "epoch": 1.59, "logps_train/chosen": -73.82723236083984, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -300.55523681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.041073203086853, "rewards_train/margins": 16.273826241493225, "rewards_train/rejected": -17.314899444580078, "step": 3237 }, { "epoch": 1.59, "learning_rate": 4.902306376372671e-07, "loss": 0.0, "step": 3238 }, { "epoch": 1.59, "logps_train/chosen": -76.52140808105469, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -294.0560302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.273038625717163, "rewards_train/margins": 15.907123804092407, "rewards_train/rejected": -17.18016242980957, "step": 3238 }, { "epoch": 1.59, "learning_rate": 4.899593024512283e-07, "loss": 0.0001, "step": 3239 }, { "epoch": 1.59, "logps_train/chosen": -70.47109985351562, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -295.5910949707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8322657346725464, "rewards_train/margins": 16.41795551776886, "rewards_train/rejected": -17.250221252441406, "step": 3239 }, { "epoch": 1.6, "learning_rate": 4.896879702232468e-07, "loss": 0.0, "step": 3240 }, { "epoch": 1.6, "logps_train/chosen": -69.55319213867188, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -296.137939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6458464860916138, "rewards_train/margins": 16.677616953849792, "rewards_train/rejected": -17.323463439941406, "step": 3240 }, { "epoch": 1.6, "learning_rate": 4.894166410332589e-07, "loss": 0.0, "step": 3241 }, { "epoch": 1.6, "logps_train/chosen": -74.26004028320312, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -295.699951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0255166292190552, "rewards_train/margins": 16.13075816631317, "rewards_train/rejected": -17.156274795532227, "step": 3241 }, { "epoch": 1.6, "learning_rate": 4.891453149611999e-07, "loss": 0.0001, "step": 3242 }, { "epoch": 1.6, "logps_train/chosen": -73.61697387695312, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -306.2367858886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0238066911697388, "rewards_train/margins": 17.06266486644745, "rewards_train/rejected": -18.086471557617188, "step": 3242 }, { "epoch": 1.6, "learning_rate": 4.888739920870046e-07, "loss": 0.0, "step": 3243 }, { "epoch": 1.6, "logps_train/chosen": -73.0733871459961, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -295.77166748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8308736085891724, "rewards_train/margins": 16.402639269828796, "rewards_train/rejected": -17.23351287841797, "step": 3243 }, { "epoch": 1.6, "learning_rate": 4.886026724906058e-07, "loss": 0.0, "step": 3244 }, { "epoch": 1.6, "logps_train/chosen": -78.8122329711914, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -312.9080810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2967504262924194, "rewards_train/margins": 16.91637098789215, "rewards_train/rejected": -18.21312141418457, "step": 3244 }, { "epoch": 1.6, "learning_rate": 4.883313562519369e-07, "loss": 0.0002, "step": 3245 }, { "epoch": 1.6, "logps_train/chosen": -78.08354949951172, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -309.3690490722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3260308504104614, "rewards_train/margins": 16.77366816997528, "rewards_train/rejected": -18.099699020385742, "step": 3245 }, { "epoch": 1.6, "learning_rate": 4.880600434509294e-07, "loss": 0.0, "step": 3246 }, { "epoch": 1.6, "logps_train/chosen": -75.35044860839844, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -310.0343933105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9602886438369751, "rewards_train/margins": 17.241297125816345, "rewards_train/rejected": -18.20158576965332, "step": 3246 }, { "epoch": 1.6, "learning_rate": 4.877887341675134e-07, "loss": 0.0, "step": 3247 }, { "epoch": 1.6, "logps_train/chosen": -79.08305358886719, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -305.8039855957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4756884574890137, "rewards_train/margins": 16.250412464141846, "rewards_train/rejected": -17.72610092163086, "step": 3247 }, { "epoch": 1.6, "learning_rate": 4.875174284816188e-07, "loss": 0.0, "step": 3248 }, { "epoch": 1.6, "logps_train/chosen": -79.3433837890625, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -312.5564270019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.162414789199829, "rewards_train/margins": 16.878677129745483, "rewards_train/rejected": -18.041091918945312, "step": 3248 }, { "epoch": 1.6, "learning_rate": 4.872461264731739e-07, "loss": 0.0001, "step": 3249 }, { "epoch": 1.6, "logps_train/chosen": -74.88249969482422, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -298.8830261230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0158381462097168, "rewards_train/margins": 16.247612476348877, "rewards_train/rejected": -17.263450622558594, "step": 3249 }, { "epoch": 1.6, "learning_rate": 4.869748282221061e-07, "loss": 0.0, "step": 3250 }, { "epoch": 1.6, "logps_train/chosen": -76.6288833618164, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -309.8623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.240671157836914, "rewards_train/margins": 16.726909637451172, "rewards_train/rejected": -17.967580795288086, "step": 3250 }, { "epoch": 1.6, "learning_rate": 4.867035338083421e-07, "loss": 0.0, "step": 3251 }, { "epoch": 1.6, "logps_train/chosen": -75.79051208496094, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -311.1575622558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9995103478431702, "rewards_train/margins": 17.20755523443222, "rewards_train/rejected": -18.20706558227539, "step": 3251 }, { "epoch": 1.6, "learning_rate": 4.864322433118066e-07, "loss": 0.0, "step": 3252 }, { "epoch": 1.6, "logps_train/chosen": -79.8017578125, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -312.76995849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2129395008087158, "rewards_train/margins": 17.00175166130066, "rewards_train/rejected": -18.214691162109375, "step": 3252 }, { "epoch": 1.6, "learning_rate": 4.861609568124239e-07, "loss": 0.0, "step": 3253 }, { "epoch": 1.6, "logps_train/chosen": -74.89569091796875, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -298.81719970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1615909337997437, "rewards_train/margins": 16.32735788822174, "rewards_train/rejected": -17.488948822021484, "step": 3253 }, { "epoch": 1.6, "learning_rate": 4.858896743901165e-07, "loss": 0.0, "step": 3254 }, { "epoch": 1.6, "logps_train/chosen": -68.35733795166016, "logps_train/ref_chosen": -61.15625, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -287.48419189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7191811800003052, "rewards_train/margins": 15.848865389823914, "rewards_train/rejected": -16.56804656982422, "step": 3254 }, { "epoch": 1.6, "learning_rate": 4.856183961248062e-07, "loss": 0.0, "step": 3255 }, { "epoch": 1.6, "logps_train/chosen": -74.64200592041016, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -295.39178466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1776037216186523, "rewards_train/margins": 16.263916969299316, "rewards_train/rejected": -17.44152069091797, "step": 3255 }, { "epoch": 1.6, "learning_rate": 4.853471220964136e-07, "loss": 0.0, "step": 3256 }, { "epoch": 1.6, "logps_train/chosen": -73.15292358398438, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -302.85296630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8121428489685059, "rewards_train/margins": 16.49156427383423, "rewards_train/rejected": -17.303707122802734, "step": 3256 }, { "epoch": 1.6, "learning_rate": 4.850758523848577e-07, "loss": 0.0, "step": 3257 }, { "epoch": 1.6, "logps_train/chosen": -72.95173645019531, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -304.31353759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7790115475654602, "rewards_train/margins": 16.80775946378708, "rewards_train/rejected": -17.58677101135254, "step": 3257 }, { "epoch": 1.6, "learning_rate": 4.848045870700564e-07, "loss": 0.0004, "step": 3258 }, { "epoch": 1.6, "logps_train/chosen": -79.89852905273438, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -305.0736389160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4618741273880005, "rewards_train/margins": 16.315996050834656, "rewards_train/rejected": -17.777870178222656, "step": 3258 }, { "epoch": 1.6, "learning_rate": 4.845333262319263e-07, "loss": 0.0004, "step": 3259 }, { "epoch": 1.6, "logps_train/chosen": -78.13768005371094, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -299.9347229003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1041979789733887, "rewards_train/margins": 16.252848148345947, "rewards_train/rejected": -17.357046127319336, "step": 3259 }, { "epoch": 1.61, "learning_rate": 4.842620699503825e-07, "loss": 0.0, "step": 3260 }, { "epoch": 1.61, "logps_train/chosen": -71.60755920410156, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -304.5881042480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7313616871833801, "rewards_train/margins": 17.028623282909393, "rewards_train/rejected": -17.759984970092773, "step": 3260 }, { "epoch": 1.61, "learning_rate": 4.839908183053394e-07, "loss": 0.0, "step": 3261 }, { "epoch": 1.61, "logps_train/chosen": -74.12982177734375, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -307.57830810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8487733602523804, "rewards_train/margins": 17.144068360328674, "rewards_train/rejected": -17.992841720581055, "step": 3261 }, { "epoch": 1.61, "learning_rate": 4.837195713767089e-07, "loss": 0.0, "step": 3262 }, { "epoch": 1.61, "logps_train/chosen": -73.69911193847656, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -297.20758056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9119036793708801, "rewards_train/margins": 16.24586457014084, "rewards_train/rejected": -17.15776824951172, "step": 3262 }, { "epoch": 1.61, "learning_rate": 4.834483292444029e-07, "loss": 0.0013, "step": 3263 }, { "epoch": 1.61, "logps_train/chosen": -77.62448120117188, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -315.82684326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0788542032241821, "rewards_train/margins": 17.361934065818787, "rewards_train/rejected": -18.44078826904297, "step": 3263 }, { "epoch": 1.61, "learning_rate": 4.831770919883304e-07, "loss": 0.0, "step": 3264 }, { "epoch": 1.61, "logps_train/chosen": -76.31716918945312, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -304.2021789550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2295200824737549, "rewards_train/margins": 16.54572558403015, "rewards_train/rejected": -17.775245666503906, "step": 3264 }, { "epoch": 1.61, "learning_rate": 4.829058596884003e-07, "loss": 0.0, "step": 3265 }, { "epoch": 1.61, "logps_train/chosen": -79.02848815917969, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -310.4839172363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.256657361984253, "rewards_train/margins": 16.779040575027466, "rewards_train/rejected": -18.03569793701172, "step": 3265 }, { "epoch": 1.61, "learning_rate": 4.826346324245193e-07, "loss": 0.0001, "step": 3266 }, { "epoch": 1.61, "logps_train/chosen": -81.78678894042969, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -311.0009460449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5964525938034058, "rewards_train/margins": 17.00564396381378, "rewards_train/rejected": -18.602096557617188, "step": 3266 }, { "epoch": 1.61, "learning_rate": 4.823634102765928e-07, "loss": 0.0, "step": 3267 }, { "epoch": 1.61, "logps_train/chosen": -71.0177001953125, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -308.5386657714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6305544972419739, "rewards_train/margins": 17.326583564281464, "rewards_train/rejected": -17.957138061523438, "step": 3267 }, { "epoch": 1.61, "learning_rate": 4.820921933245245e-07, "loss": 0.0, "step": 3268 }, { "epoch": 1.61, "logps_train/chosen": -70.28919982910156, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -288.08526611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7973767518997192, "rewards_train/margins": 15.977754473686218, "rewards_train/rejected": -16.775131225585938, "step": 3268 }, { "epoch": 1.61, "learning_rate": 4.818209816482169e-07, "loss": 0.0004, "step": 3269 }, { "epoch": 1.61, "logps_train/chosen": -74.37616729736328, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -287.35284423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.206366777420044, "rewards_train/margins": 15.508213758468628, "rewards_train/rejected": -16.714580535888672, "step": 3269 }, { "epoch": 1.61, "learning_rate": 4.815497753275709e-07, "loss": 0.0002, "step": 3270 }, { "epoch": 1.61, "logps_train/chosen": -73.08660125732422, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -292.40167236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.051237940788269, "rewards_train/margins": 16.050454258918762, "rewards_train/rejected": -17.10169219970703, "step": 3270 }, { "epoch": 1.61, "learning_rate": 4.812785744424855e-07, "loss": 0.0, "step": 3271 }, { "epoch": 1.61, "logps_train/chosen": -75.59072875976562, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -299.3477783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9406644105911255, "rewards_train/margins": 16.673118472099304, "rewards_train/rejected": -17.61378288269043, "step": 3271 }, { "epoch": 1.61, "learning_rate": 4.810073790728584e-07, "loss": 0.0, "step": 3272 }, { "epoch": 1.61, "logps_train/chosen": -74.75959777832031, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -304.557861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9624348878860474, "rewards_train/margins": 16.619134783744812, "rewards_train/rejected": -17.58156967163086, "step": 3272 }, { "epoch": 1.61, "learning_rate": 4.807361892985857e-07, "loss": 0.0, "step": 3273 }, { "epoch": 1.61, "logps_train/chosen": -77.85469055175781, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -302.65142822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.374678134918213, "rewards_train/margins": 16.31438970565796, "rewards_train/rejected": -17.689067840576172, "step": 3273 }, { "epoch": 1.61, "learning_rate": 4.804650051995615e-07, "loss": 0.0, "step": 3274 }, { "epoch": 1.61, "logps_train/chosen": -81.073974609375, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -319.5834045410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3277587890625, "rewards_train/margins": 17.65763282775879, "rewards_train/rejected": -18.98539161682129, "step": 3274 }, { "epoch": 1.61, "learning_rate": 4.801938268556787e-07, "loss": 0.0, "step": 3275 }, { "epoch": 1.61, "logps_train/chosen": -74.34783935546875, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -295.99041748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1417171955108643, "rewards_train/margins": 16.216892957687378, "rewards_train/rejected": -17.358610153198242, "step": 3275 }, { "epoch": 1.61, "learning_rate": 4.79922654346828e-07, "loss": 0.0, "step": 3276 }, { "epoch": 1.61, "logps_train/chosen": -70.97616577148438, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -296.6700439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8435152769088745, "rewards_train/margins": 16.39019215106964, "rewards_train/rejected": -17.233707427978516, "step": 3276 }, { "epoch": 1.61, "learning_rate": 4.796514877528987e-07, "loss": 0.0001, "step": 3277 }, { "epoch": 1.61, "logps_train/chosen": -80.46397399902344, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -309.8096923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2423452138900757, "rewards_train/margins": 16.630716919898987, "rewards_train/rejected": -17.873062133789062, "step": 3277 }, { "epoch": 1.61, "learning_rate": 4.793803271537788e-07, "loss": 0.0, "step": 3278 }, { "epoch": 1.61, "logps_train/chosen": -71.91914367675781, "logps_train/ref_chosen": -58.75, "logps_train/ref_rejected": -118.3125, "logps_train/rejected": -290.6661376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.317305088043213, "rewards_train/margins": 15.916205883026123, "rewards_train/rejected": -17.233510971069336, "step": 3278 }, { "epoch": 1.61, "learning_rate": 4.791091726293533e-07, "loss": 0.0, "step": 3279 }, { "epoch": 1.61, "logps_train/chosen": -72.48348236083984, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -291.86944580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9755452871322632, "rewards_train/margins": 16.00046408176422, "rewards_train/rejected": -16.976009368896484, "step": 3279 }, { "epoch": 1.61, "learning_rate": 4.788380242595068e-07, "loss": 0.0004, "step": 3280 }, { "epoch": 1.61, "logps_train/chosen": -77.51587677001953, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -320.51806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1131601333618164, "rewards_train/margins": 17.71540355682373, "rewards_train/rejected": -18.828563690185547, "step": 3280 }, { "epoch": 1.62, "learning_rate": 4.78566882124121e-07, "loss": 0.0001, "step": 3281 }, { "epoch": 1.62, "logps_train/chosen": -79.88719177246094, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -301.5959167480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.714354157447815, "rewards_train/margins": 16.357102751731873, "rewards_train/rejected": -18.071456909179688, "step": 3281 }, { "epoch": 1.62, "learning_rate": 4.782957463030762e-07, "loss": 0.0001, "step": 3282 }, { "epoch": 1.62, "logps_train/chosen": -74.39627075195312, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -297.5091552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1367459297180176, "rewards_train/margins": 16.315730571746826, "rewards_train/rejected": -17.452476501464844, "step": 3282 }, { "epoch": 1.62, "learning_rate": 4.780246168762513e-07, "loss": 0.0001, "step": 3283 }, { "epoch": 1.62, "logps_train/chosen": -75.37005615234375, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -299.7177429199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2299745082855225, "rewards_train/margins": 16.320410013198853, "rewards_train/rejected": -17.550384521484375, "step": 3283 }, { "epoch": 1.62, "learning_rate": 4.777534939235224e-07, "loss": 0.0, "step": 3284 }, { "epoch": 1.62, "logps_train/chosen": -80.6851806640625, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -302.78582763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4545042514801025, "rewards_train/margins": 15.995664358139038, "rewards_train/rejected": -17.45016860961914, "step": 3284 }, { "epoch": 1.62, "learning_rate": 4.774823775247643e-07, "loss": 0.0, "step": 3285 }, { "epoch": 1.62, "logps_train/chosen": -79.77397155761719, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -310.2124328613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5860884189605713, "rewards_train/margins": 16.803855657577515, "rewards_train/rejected": -18.389944076538086, "step": 3285 }, { "epoch": 1.62, "learning_rate": 4.772112677598497e-07, "loss": 0.0001, "step": 3286 }, { "epoch": 1.62, "logps_train/chosen": -74.82731628417969, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -305.2432861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0668621063232422, "rewards_train/margins": 16.838571548461914, "rewards_train/rejected": -17.905433654785156, "step": 3286 }, { "epoch": 1.62, "learning_rate": 4.769401647086494e-07, "loss": 0.0002, "step": 3287 }, { "epoch": 1.62, "logps_train/chosen": -73.45878601074219, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -295.6815185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.924736499786377, "rewards_train/margins": 16.429643154144287, "rewards_train/rejected": -17.354379653930664, "step": 3287 }, { "epoch": 1.62, "learning_rate": 4.7666906845103223e-07, "loss": 0.0001, "step": 3288 }, { "epoch": 1.62, "logps_train/chosen": -74.22645568847656, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -297.7400207519531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0297260284423828, "rewards_train/margins": 16.34652328491211, "rewards_train/rejected": -17.376249313354492, "step": 3288 }, { "epoch": 1.62, "learning_rate": 4.7639797906686487e-07, "loss": 0.0, "step": 3289 }, { "epoch": 1.62, "logps_train/chosen": -71.56001281738281, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -301.70086669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8552936911582947, "rewards_train/margins": 16.446042597293854, "rewards_train/rejected": -17.30133628845215, "step": 3289 }, { "epoch": 1.62, "learning_rate": 4.761268966360122e-07, "loss": 0.0003, "step": 3290 }, { "epoch": 1.62, "logps_train/chosen": -71.65763854980469, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -299.6770324707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7425699234008789, "rewards_train/margins": 16.551108360290527, "rewards_train/rejected": -17.293678283691406, "step": 3290 }, { "epoch": 1.62, "learning_rate": 4.7585582123833676e-07, "loss": 0.0, "step": 3291 }, { "epoch": 1.62, "logps_train/chosen": -73.11775207519531, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -306.21826171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9585530161857605, "rewards_train/margins": 17.20550948381424, "rewards_train/rejected": -18.1640625, "step": 3291 }, { "epoch": 1.62, "learning_rate": 4.755847529536994e-07, "loss": 0.0, "step": 3292 }, { "epoch": 1.62, "logps_train/chosen": -76.51341247558594, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -296.7185974121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1962873935699463, "rewards_train/margins": 16.318788290023804, "rewards_train/rejected": -17.51507568359375, "step": 3292 }, { "epoch": 1.62, "learning_rate": 4.753136918619586e-07, "loss": 0.0001, "step": 3293 }, { "epoch": 1.62, "logps_train/chosen": -77.11434936523438, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -302.80926513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2988855838775635, "rewards_train/margins": 16.469001531600952, "rewards_train/rejected": -17.767887115478516, "step": 3293 }, { "epoch": 1.62, "learning_rate": 4.750426380429706e-07, "loss": 0.0, "step": 3294 }, { "epoch": 1.62, "logps_train/chosen": -68.94974517822266, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -293.3358459472656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7036171555519104, "rewards_train/margins": 16.51414865255356, "rewards_train/rejected": -17.21776580810547, "step": 3294 }, { "epoch": 1.62, "learning_rate": 4.7477159157659016e-07, "loss": 0.0001, "step": 3295 }, { "epoch": 1.62, "logps_train/chosen": -74.47267150878906, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -306.32794189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.008497714996338, "rewards_train/margins": 16.583085536956787, "rewards_train/rejected": -17.591583251953125, "step": 3295 }, { "epoch": 1.62, "learning_rate": 4.7450055254266866e-07, "loss": 0.0, "step": 3296 }, { "epoch": 1.62, "logps_train/chosen": -67.77745819091797, "logps_train/ref_chosen": -60.59375, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -290.15191650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7173943519592285, "rewards_train/margins": 16.26913595199585, "rewards_train/rejected": -16.986530303955078, "step": 3296 }, { "epoch": 1.62, "learning_rate": 4.7422952102105665e-07, "loss": 0.0, "step": 3297 }, { "epoch": 1.62, "logps_train/chosen": -79.42549133300781, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -304.7833251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4967243671417236, "rewards_train/margins": 16.346014738082886, "rewards_train/rejected": -17.84273910522461, "step": 3297 }, { "epoch": 1.62, "learning_rate": 4.7395849709160173e-07, "loss": 0.0, "step": 3298 }, { "epoch": 1.62, "logps_train/chosen": -71.1344223022461, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -307.4454040527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8232811689376831, "rewards_train/margins": 17.289815068244934, "rewards_train/rejected": -18.113096237182617, "step": 3298 }, { "epoch": 1.62, "learning_rate": 4.7368748083414917e-07, "loss": 0.0, "step": 3299 }, { "epoch": 1.62, "logps_train/chosen": -73.22315979003906, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -296.84869384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8894551396369934, "rewards_train/margins": 16.515288054943085, "rewards_train/rejected": -17.404743194580078, "step": 3299 }, { "epoch": 1.62, "learning_rate": 4.7341647232854235e-07, "loss": 0.0, "step": 3300 }, { "epoch": 1.62, "logps_train/chosen": -73.90374755859375, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -314.3959655761719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8996279239654541, "rewards_train/margins": 17.39626669883728, "rewards_train/rejected": -18.295894622802734, "step": 3300 }, { "epoch": 1.63, "learning_rate": 4.7314547165462217e-07, "loss": 0.0, "step": 3301 }, { "epoch": 1.63, "logps_train/chosen": -73.05853271484375, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -294.71380615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0620787143707275, "rewards_train/margins": 16.32487654685974, "rewards_train/rejected": -17.38695526123047, "step": 3301 }, { "epoch": 1.63, "learning_rate": 4.7287447889222715e-07, "loss": 0.0, "step": 3302 }, { "epoch": 1.63, "logps_train/chosen": -77.65681457519531, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -304.47943115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.087752103805542, "rewards_train/margins": 16.41243863105774, "rewards_train/rejected": -17.50019073486328, "step": 3302 }, { "epoch": 1.63, "learning_rate": 4.7260349412119374e-07, "loss": 0.0, "step": 3303 }, { "epoch": 1.63, "logps_train/chosen": -73.36705017089844, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -118.875, "logps_train/rejected": -284.85394287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.025913953781128, "rewards_train/margins": 15.573590517044067, "rewards_train/rejected": -16.599504470825195, "step": 3303 }, { "epoch": 1.63, "learning_rate": 4.723325174213558e-07, "loss": 0.0, "step": 3304 }, { "epoch": 1.63, "logps_train/chosen": -72.30748748779297, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -298.42486572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8639519214630127, "rewards_train/margins": 16.637375593185425, "rewards_train/rejected": -17.501327514648438, "step": 3304 }, { "epoch": 1.63, "learning_rate": 4.7206154887254506e-07, "loss": 0.0, "step": 3305 }, { "epoch": 1.63, "logps_train/chosen": -75.83427429199219, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -306.2415466308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1690720319747925, "rewards_train/margins": 16.547759175300598, "rewards_train/rejected": -17.71683120727539, "step": 3305 }, { "epoch": 1.63, "learning_rate": 4.717905885545904e-07, "loss": 0.0001, "step": 3306 }, { "epoch": 1.63, "logps_train/chosen": -77.48236083984375, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -307.3854675292969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015619158744812, "rewards_train/margins": 16.61374866962433, "rewards_train/rejected": -17.62936782836914, "step": 3306 }, { "epoch": 1.63, "learning_rate": 4.715196365473188e-07, "loss": 0.0, "step": 3307 }, { "epoch": 1.63, "logps_train/chosen": -80.08230590820312, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -304.3133544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6544711589813232, "rewards_train/margins": 16.159968614578247, "rewards_train/rejected": -17.81443977355957, "step": 3307 }, { "epoch": 1.63, "learning_rate": 4.712486929305544e-07, "loss": 0.0, "step": 3308 }, { "epoch": 1.63, "logps_train/chosen": -76.00292205810547, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -302.1685791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1685538291931152, "rewards_train/margins": 16.7270188331604, "rewards_train/rejected": -17.895572662353516, "step": 3308 }, { "epoch": 1.63, "learning_rate": 4.7097775778411896e-07, "loss": 0.0001, "step": 3309 }, { "epoch": 1.63, "logps_train/chosen": -70.02560424804688, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -297.0904235839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8042206764221191, "rewards_train/margins": 16.73060369491577, "rewards_train/rejected": -17.53482437133789, "step": 3309 }, { "epoch": 1.63, "learning_rate": 4.707068311878321e-07, "loss": 0.0, "step": 3310 }, { "epoch": 1.63, "logps_train/chosen": -67.94218444824219, "logps_train/ref_chosen": -60.34375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -295.14544677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7583296298980713, "rewards_train/margins": 16.17447829246521, "rewards_train/rejected": -16.93280792236328, "step": 3310 }, { "epoch": 1.63, "learning_rate": 4.704359132215103e-07, "loss": 0.0001, "step": 3311 }, { "epoch": 1.63, "logps_train/chosen": -77.72886657714844, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -292.62664794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4988880157470703, "rewards_train/margins": 15.356695175170898, "rewards_train/rejected": -16.85558319091797, "step": 3311 }, { "epoch": 1.63, "learning_rate": 4.7016500396496816e-07, "loss": 0.0001, "step": 3312 }, { "epoch": 1.63, "logps_train/chosen": -78.53441619873047, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -310.8017883300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3579821586608887, "rewards_train/margins": 16.99319314956665, "rewards_train/rejected": -18.35117530822754, "step": 3312 }, { "epoch": 1.63, "learning_rate": 4.69894103498017e-07, "loss": 0.0001, "step": 3313 }, { "epoch": 1.63, "logps_train/chosen": -77.09297180175781, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -315.76275634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2397173643112183, "rewards_train/margins": 17.45013439655304, "rewards_train/rejected": -18.689851760864258, "step": 3313 }, { "epoch": 1.63, "learning_rate": 4.6962321190046595e-07, "loss": 0.0, "step": 3314 }, { "epoch": 1.63, "logps_train/chosen": -78.48348999023438, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -300.7416076660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5636074542999268, "rewards_train/margins": 16.215777158737183, "rewards_train/rejected": -17.77938461303711, "step": 3314 }, { "epoch": 1.63, "learning_rate": 4.69352329252122e-07, "loss": 0.0, "step": 3315 }, { "epoch": 1.63, "logps_train/chosen": -82.12506866455078, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -314.99688720703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8111882209777832, "rewards_train/margins": 16.742797374725342, "rewards_train/rejected": -18.553985595703125, "step": 3315 }, { "epoch": 1.63, "learning_rate": 4.6908145563278846e-07, "loss": 0.0, "step": 3316 }, { "epoch": 1.63, "logps_train/chosen": -77.90815734863281, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -301.65228271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.269869089126587, "rewards_train/margins": 16.413768529891968, "rewards_train/rejected": -17.683637619018555, "step": 3316 }, { "epoch": 1.63, "learning_rate": 4.6881059112226677e-07, "loss": 0.0001, "step": 3317 }, { "epoch": 1.63, "logps_train/chosen": -78.00480651855469, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -118.1875, "logps_train/rejected": -291.59100341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4267010688781738, "rewards_train/margins": 15.915311336517334, "rewards_train/rejected": -17.342012405395508, "step": 3317 }, { "epoch": 1.63, "learning_rate": 4.685397358003553e-07, "loss": 0.0003, "step": 3318 }, { "epoch": 1.63, "logps_train/chosen": -74.44279479980469, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -298.6082763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8981366157531738, "rewards_train/margins": 16.60722303390503, "rewards_train/rejected": -17.505359649658203, "step": 3318 }, { "epoch": 1.63, "learning_rate": 4.6826888974684996e-07, "loss": 0.0, "step": 3319 }, { "epoch": 1.63, "logps_train/chosen": -75.16920471191406, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -302.4625549316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9914810061454773, "rewards_train/margins": 16.838368237018585, "rewards_train/rejected": -17.829849243164062, "step": 3319 }, { "epoch": 1.63, "learning_rate": 4.6799805304154394e-07, "loss": 0.0, "step": 3320 }, { "epoch": 1.63, "logps_train/chosen": -73.5898666381836, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -306.9427185058594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.895802915096283, "rewards_train/margins": 16.9097962975502, "rewards_train/rejected": -17.805599212646484, "step": 3320 }, { "epoch": 1.64, "learning_rate": 4.677272257642273e-07, "loss": 0.0, "step": 3321 }, { "epoch": 1.64, "logps_train/chosen": -74.06626892089844, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -300.4494934082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.132408618927002, "rewards_train/margins": 16.603557109832764, "rewards_train/rejected": -17.735965728759766, "step": 3321 }, { "epoch": 1.64, "learning_rate": 4.674564079946878e-07, "loss": 0.0, "step": 3322 }, { "epoch": 1.64, "logps_train/chosen": -73.8594970703125, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -306.2591552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0872437953948975, "rewards_train/margins": 16.675926446914673, "rewards_train/rejected": -17.76317024230957, "step": 3322 }, { "epoch": 1.64, "learning_rate": 4.6718559981271013e-07, "loss": 0.0, "step": 3323 }, { "epoch": 1.64, "logps_train/chosen": -78.01332092285156, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -306.6447448730469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3320446014404297, "rewards_train/margins": 16.819929122924805, "rewards_train/rejected": -18.151973724365234, "step": 3323 }, { "epoch": 1.64, "learning_rate": 4.66914801298076e-07, "loss": 0.0001, "step": 3324 }, { "epoch": 1.64, "logps_train/chosen": -79.41229248046875, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -310.21484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.276287317276001, "rewards_train/margins": 16.89725089073181, "rewards_train/rejected": -18.173538208007812, "step": 3324 }, { "epoch": 1.64, "learning_rate": 4.666440125305649e-07, "loss": 0.0, "step": 3325 }, { "epoch": 1.64, "logps_train/chosen": -74.36746215820312, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -308.5196228027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2119899988174438, "rewards_train/margins": 17.030598044395447, "rewards_train/rejected": -18.24258804321289, "step": 3325 }, { "epoch": 1.64, "learning_rate": 4.663732335899526e-07, "loss": 0.0, "step": 3326 }, { "epoch": 1.64, "logps_train/chosen": -77.22023010253906, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -318.34490966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1908708810806274, "rewards_train/margins": 17.61569344997406, "rewards_train/rejected": -18.806564331054688, "step": 3326 }, { "epoch": 1.64, "learning_rate": 4.661024645560128e-07, "loss": 0.0, "step": 3327 }, { "epoch": 1.64, "logps_train/chosen": -79.29475402832031, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -314.1792907714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3042802810668945, "rewards_train/margins": 17.205546379089355, "rewards_train/rejected": -18.50982666015625, "step": 3327 }, { "epoch": 1.64, "learning_rate": 4.658317055085154e-07, "loss": 0.0, "step": 3328 }, { "epoch": 1.64, "logps_train/chosen": -75.81790924072266, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -297.6283874511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.141312837600708, "rewards_train/margins": 16.18685793876648, "rewards_train/rejected": -17.328170776367188, "step": 3328 }, { "epoch": 1.64, "learning_rate": 4.655609565272282e-07, "loss": 0.0, "step": 3329 }, { "epoch": 1.64, "logps_train/chosen": -74.33465576171875, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -308.148681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1556825637817383, "rewards_train/margins": 16.734137535095215, "rewards_train/rejected": -17.889820098876953, "step": 3329 }, { "epoch": 1.64, "learning_rate": 4.6529021769191586e-07, "loss": 0.0003, "step": 3330 }, { "epoch": 1.64, "logps_train/chosen": -74.12638854980469, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -303.1011962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0437428951263428, "rewards_train/margins": 16.795233011245728, "rewards_train/rejected": -17.83897590637207, "step": 3330 }, { "epoch": 1.64, "learning_rate": 4.6501948908233924e-07, "loss": 0.0001, "step": 3331 }, { "epoch": 1.64, "logps_train/chosen": -75.6945571899414, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -309.6827392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1528542041778564, "rewards_train/margins": 16.898281812667847, "rewards_train/rejected": -18.051136016845703, "step": 3331 }, { "epoch": 1.64, "learning_rate": 4.647487707782575e-07, "loss": 0.0, "step": 3332 }, { "epoch": 1.64, "logps_train/chosen": -72.12329864501953, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -120.5625, "logps_train/rejected": -290.5831298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.924048662185669, "rewards_train/margins": 16.07742953300476, "rewards_train/rejected": -17.00147819519043, "step": 3332 }, { "epoch": 1.64, "learning_rate": 4.6447806285942546e-07, "loss": 0.0002, "step": 3333 }, { "epoch": 1.64, "logps_train/chosen": -75.7938232421875, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -288.7317810058594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.205554485321045, "rewards_train/margins": 15.669434070587158, "rewards_train/rejected": -16.874988555908203, "step": 3333 }, { "epoch": 1.64, "learning_rate": 4.642073654055958e-07, "loss": 0.0, "step": 3334 }, { "epoch": 1.64, "logps_train/chosen": -72.74172973632812, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -301.77008056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9965360164642334, "rewards_train/margins": 16.78828454017639, "rewards_train/rejected": -17.784820556640625, "step": 3334 }, { "epoch": 1.64, "learning_rate": 4.6393667849651814e-07, "loss": 0.0001, "step": 3335 }, { "epoch": 1.64, "logps_train/chosen": -76.89411163330078, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -310.26605224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.379889965057373, "rewards_train/margins": 16.870930194854736, "rewards_train/rejected": -18.25082015991211, "step": 3335 }, { "epoch": 1.64, "learning_rate": 4.636660022119381e-07, "loss": 0.0, "step": 3336 }, { "epoch": 1.64, "logps_train/chosen": -78.99884033203125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -316.592041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.397783637046814, "rewards_train/margins": 17.248483300209045, "rewards_train/rejected": -18.64626693725586, "step": 3336 }, { "epoch": 1.64, "learning_rate": 4.63395336631599e-07, "loss": 0.0, "step": 3337 }, { "epoch": 1.64, "logps_train/chosen": -74.57540893554688, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -295.1629943847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2829316854476929, "rewards_train/margins": 15.93346655368805, "rewards_train/rejected": -17.216398239135742, "step": 3337 }, { "epoch": 1.64, "learning_rate": 4.6312468183524073e-07, "loss": 0.0, "step": 3338 }, { "epoch": 1.64, "logps_train/chosen": -73.37535095214844, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -307.98419189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8090689182281494, "rewards_train/margins": 17.03105330467224, "rewards_train/rejected": -17.84012222290039, "step": 3338 }, { "epoch": 1.64, "learning_rate": 4.628540379026e-07, "loss": 0.0, "step": 3339 }, { "epoch": 1.64, "logps_train/chosen": -78.89828491210938, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -302.29351806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3102378845214844, "rewards_train/margins": 16.24733543395996, "rewards_train/rejected": -17.557573318481445, "step": 3339 }, { "epoch": 1.64, "learning_rate": 4.6258340491341044e-07, "loss": 0.0, "step": 3340 }, { "epoch": 1.64, "logps_train/chosen": -75.22833251953125, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -297.18389892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1111148595809937, "rewards_train/margins": 16.14677584171295, "rewards_train/rejected": -17.257890701293945, "step": 3340 }, { "epoch": 1.65, "learning_rate": 4.623127829474021e-07, "loss": 0.0, "step": 3341 }, { "epoch": 1.65, "logps_train/chosen": -82.4371337890625, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -303.1835632324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.587805151939392, "rewards_train/margins": 16.214147210121155, "rewards_train/rejected": -17.801952362060547, "step": 3341 }, { "epoch": 1.65, "learning_rate": 4.620421720843024e-07, "loss": 0.0, "step": 3342 }, { "epoch": 1.65, "logps_train/chosen": -75.4932861328125, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -318.0545654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9249633550643921, "rewards_train/margins": 17.78205645084381, "rewards_train/rejected": -18.707019805908203, "step": 3342 }, { "epoch": 1.65, "learning_rate": 4.617715724038348e-07, "loss": 0.0, "step": 3343 }, { "epoch": 1.65, "logps_train/chosen": -71.12535858154297, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -299.18914794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.666149377822876, "rewards_train/margins": 16.672004461288452, "rewards_train/rejected": -17.338153839111328, "step": 3343 }, { "epoch": 1.65, "learning_rate": 4.615009839857202e-07, "loss": 0.0, "step": 3344 }, { "epoch": 1.65, "logps_train/chosen": -81.0925064086914, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -312.5198669433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6270241737365723, "rewards_train/margins": 16.886338710784912, "rewards_train/rejected": -18.513362884521484, "step": 3344 }, { "epoch": 1.65, "learning_rate": 4.6123040690967506e-07, "loss": 0.0, "step": 3345 }, { "epoch": 1.65, "logps_train/chosen": -75.19270324707031, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -291.2178955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.170198678970337, "rewards_train/margins": 15.88782286643982, "rewards_train/rejected": -17.058021545410156, "step": 3345 }, { "epoch": 1.65, "learning_rate": 4.6095984125541386e-07, "loss": 0.0, "step": 3346 }, { "epoch": 1.65, "logps_train/chosen": -77.57752990722656, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -303.72100830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4032115936279297, "rewards_train/margins": 16.25726890563965, "rewards_train/rejected": -17.660480499267578, "step": 3346 }, { "epoch": 1.65, "learning_rate": 4.6068928710264704e-07, "loss": 0.0, "step": 3347 }, { "epoch": 1.65, "logps_train/chosen": -71.43354797363281, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -288.6290283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8680136203765869, "rewards_train/margins": 15.680925130844116, "rewards_train/rejected": -16.548938751220703, "step": 3347 }, { "epoch": 1.65, "learning_rate": 4.6041874453108133e-07, "loss": 0.0, "step": 3348 }, { "epoch": 1.65, "logps_train/chosen": -80.9270248413086, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -314.42266845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.495534896850586, "rewards_train/margins": 16.782669067382812, "rewards_train/rejected": -18.2782039642334, "step": 3348 }, { "epoch": 1.65, "learning_rate": 4.601482136204206e-07, "loss": 0.0, "step": 3349 }, { "epoch": 1.65, "logps_train/chosen": -81.70320892333984, "logps_train/ref_chosen": -68.5, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -306.62646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.320467472076416, "rewards_train/margins": 16.504388332366943, "rewards_train/rejected": -17.82485580444336, "step": 3349 }, { "epoch": 1.65, "learning_rate": 4.5987769445036494e-07, "loss": 0.0, "step": 3350 }, { "epoch": 1.65, "logps_train/chosen": -76.82734680175781, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -307.0805358886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1510934829711914, "rewards_train/margins": 16.81491756439209, "rewards_train/rejected": -17.96601104736328, "step": 3350 }, { "epoch": 1.65, "learning_rate": 4.596071871006113e-07, "loss": 0.0, "step": 3351 }, { "epoch": 1.65, "logps_train/chosen": -76.06733703613281, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -306.40155029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.181514024734497, "rewards_train/margins": 17.092137098312378, "rewards_train/rejected": -18.273651123046875, "step": 3351 }, { "epoch": 1.65, "learning_rate": 4.5933669165085297e-07, "loss": 0.0003, "step": 3352 }, { "epoch": 1.65, "logps_train/chosen": -73.84962463378906, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -307.87835693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8024427890777588, "rewards_train/margins": 16.968888998031616, "rewards_train/rejected": -17.771331787109375, "step": 3352 }, { "epoch": 1.65, "learning_rate": 4.5906620818077954e-07, "loss": 0.0, "step": 3353 }, { "epoch": 1.65, "logps_train/chosen": -75.02134704589844, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -306.54473876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0223982334136963, "rewards_train/margins": 17.108834981918335, "rewards_train/rejected": -18.13123321533203, "step": 3353 }, { "epoch": 1.65, "learning_rate": 4.5879573677007756e-07, "loss": 0.0, "step": 3354 }, { "epoch": 1.65, "logps_train/chosen": -76.62908172607422, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -295.2667236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.193084478378296, "rewards_train/margins": 16.235641717910767, "rewards_train/rejected": -17.428726196289062, "step": 3354 }, { "epoch": 1.65, "learning_rate": 4.585252774984294e-07, "loss": 0.0, "step": 3355 }, { "epoch": 1.65, "logps_train/chosen": -72.95291137695312, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -309.2540283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91916823387146, "rewards_train/margins": 17.310335397720337, "rewards_train/rejected": -18.229503631591797, "step": 3355 }, { "epoch": 1.65, "learning_rate": 4.582548304455143e-07, "loss": 0.0, "step": 3356 }, { "epoch": 1.65, "logps_train/chosen": -80.01705932617188, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -317.7898254394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2351527214050293, "rewards_train/margins": 17.43181848526001, "rewards_train/rejected": -18.66697120666504, "step": 3356 }, { "epoch": 1.65, "learning_rate": 4.5798439569100803e-07, "loss": 0.0, "step": 3357 }, { "epoch": 1.65, "logps_train/chosen": -76.53858947753906, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -304.64044189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1834970712661743, "rewards_train/margins": 16.662185072898865, "rewards_train/rejected": -17.84568214416504, "step": 3357 }, { "epoch": 1.65, "learning_rate": 4.5771397331458216e-07, "loss": 0.0003, "step": 3358 }, { "epoch": 1.65, "logps_train/chosen": -83.23675537109375, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -315.2344970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5827572345733643, "rewards_train/margins": 17.093912839889526, "rewards_train/rejected": -18.67667007446289, "step": 3358 }, { "epoch": 1.65, "learning_rate": 4.574435633959052e-07, "loss": 0.0, "step": 3359 }, { "epoch": 1.65, "logps_train/chosen": -83.55418395996094, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -133.5, "logps_train/rejected": -322.4046630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6120595932006836, "rewards_train/margins": 17.276551246643066, "rewards_train/rejected": -18.88861083984375, "step": 3359 }, { "epoch": 1.65, "learning_rate": 4.5717316601464155e-07, "loss": 0.0, "step": 3360 }, { "epoch": 1.65, "logps_train/chosen": -76.43002319335938, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -132.75, "logps_train/rejected": -318.4234924316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0104339122772217, "rewards_train/margins": 17.55134892463684, "rewards_train/rejected": -18.561782836914062, "step": 3360 }, { "epoch": 1.65, "learning_rate": 4.569027812504522e-07, "loss": 0.0, "step": 3361 }, { "epoch": 1.65, "logps_train/chosen": -71.7170181274414, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -296.99957275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8085668087005615, "rewards_train/margins": 16.344759225845337, "rewards_train/rejected": -17.1533260345459, "step": 3361 }, { "epoch": 1.66, "learning_rate": 4.5663240918299446e-07, "loss": 0.0001, "step": 3362 }, { "epoch": 1.66, "logps_train/chosen": -74.06455993652344, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -294.1170654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2306504249572754, "rewards_train/margins": 15.865235805511475, "rewards_train/rejected": -17.09588623046875, "step": 3362 }, { "epoch": 1.66, "learning_rate": 4.5636204989192126e-07, "loss": 0.0001, "step": 3363 }, { "epoch": 1.66, "logps_train/chosen": -80.06024932861328, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -301.89117431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.608563780784607, "rewards_train/margins": 16.17122709751129, "rewards_train/rejected": -17.7797908782959, "step": 3363 }, { "epoch": 1.66, "learning_rate": 4.56091703456883e-07, "loss": 0.0, "step": 3364 }, { "epoch": 1.66, "logps_train/chosen": -71.35964965820312, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -317.132568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7240020036697388, "rewards_train/margins": 17.720112681388855, "rewards_train/rejected": -18.444114685058594, "step": 3364 }, { "epoch": 1.66, "learning_rate": 4.5582136995752496e-07, "loss": 0.0, "step": 3365 }, { "epoch": 1.66, "logps_train/chosen": -74.96864318847656, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -296.84271240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1061421632766724, "rewards_train/margins": 16.166507601737976, "rewards_train/rejected": -17.27264976501465, "step": 3365 }, { "epoch": 1.66, "learning_rate": 4.5555104947348927e-07, "loss": 0.0003, "step": 3366 }, { "epoch": 1.66, "logps_train/chosen": -76.69717407226562, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -304.40972900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1408601999282837, "rewards_train/margins": 16.32559907436371, "rewards_train/rejected": -17.466459274291992, "step": 3366 }, { "epoch": 1.66, "learning_rate": 4.5528074208441475e-07, "loss": 0.0, "step": 3367 }, { "epoch": 1.66, "logps_train/chosen": -73.60903930664062, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -306.566650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9633945226669312, "rewards_train/margins": 17.13848841190338, "rewards_train/rejected": -18.101882934570312, "step": 3367 }, { "epoch": 1.66, "learning_rate": 4.550104478699351e-07, "loss": 0.0015, "step": 3368 }, { "epoch": 1.66, "logps_train/chosen": -75.74849700927734, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -312.5580139160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9824184775352478, "rewards_train/margins": 17.108344614505768, "rewards_train/rejected": -18.090763092041016, "step": 3368 }, { "epoch": 1.66, "learning_rate": 4.547401669096812e-07, "loss": 0.0, "step": 3369 }, { "epoch": 1.66, "logps_train/chosen": -75.71514129638672, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -303.6661376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3087456226348877, "rewards_train/margins": 16.820757627487183, "rewards_train/rejected": -18.12950325012207, "step": 3369 }, { "epoch": 1.66, "learning_rate": 4.544698992832794e-07, "loss": 0.0, "step": 3370 }, { "epoch": 1.66, "logps_train/chosen": -79.96260070800781, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -311.2129821777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.302900791168213, "rewards_train/margins": 16.755311489105225, "rewards_train/rejected": -18.058212280273438, "step": 3370 }, { "epoch": 1.66, "learning_rate": 4.541996450703525e-07, "loss": 0.0, "step": 3371 }, { "epoch": 1.66, "logps_train/chosen": -71.33451843261719, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -298.1059265136719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7475147843360901, "rewards_train/margins": 16.846918046474457, "rewards_train/rejected": -17.594432830810547, "step": 3371 }, { "epoch": 1.66, "learning_rate": 4.539294043505194e-07, "loss": 0.0, "step": 3372 }, { "epoch": 1.66, "logps_train/chosen": -74.79512023925781, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -306.63629150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1855177879333496, "rewards_train/margins": 16.88426446914673, "rewards_train/rejected": -18.069782257080078, "step": 3372 }, { "epoch": 1.66, "learning_rate": 4.5365917720339445e-07, "loss": 0.0, "step": 3373 }, { "epoch": 1.66, "logps_train/chosen": -76.10102081298828, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -309.7752380371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2222115993499756, "rewards_train/margins": 17.093691110610962, "rewards_train/rejected": -18.315902709960938, "step": 3373 }, { "epoch": 1.66, "learning_rate": 4.5338896370858876e-07, "loss": 0.0, "step": 3374 }, { "epoch": 1.66, "logps_train/chosen": -73.5985336303711, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -300.15863037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0562645196914673, "rewards_train/margins": 16.613115668296814, "rewards_train/rejected": -17.66938018798828, "step": 3374 }, { "epoch": 1.66, "learning_rate": 4.531187639457088e-07, "loss": 0.0, "step": 3375 }, { "epoch": 1.66, "logps_train/chosen": -77.60748291015625, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -309.0364074707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3598201274871826, "rewards_train/margins": 16.95637059211731, "rewards_train/rejected": -18.316190719604492, "step": 3375 }, { "epoch": 1.66, "learning_rate": 4.5284857799435724e-07, "loss": 0.0, "step": 3376 }, { "epoch": 1.66, "logps_train/chosen": -81.66548156738281, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -312.90057373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4864214658737183, "rewards_train/margins": 16.765501141548157, "rewards_train/rejected": -18.251922607421875, "step": 3376 }, { "epoch": 1.66, "learning_rate": 4.525784059341329e-07, "loss": 0.0, "step": 3377 }, { "epoch": 1.66, "logps_train/chosen": -73.87742614746094, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -301.588623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9533671736717224, "rewards_train/margins": 16.754762709140778, "rewards_train/rejected": -17.7081298828125, "step": 3377 }, { "epoch": 1.66, "learning_rate": 4.5230824784463007e-07, "loss": 0.0, "step": 3378 }, { "epoch": 1.66, "logps_train/chosen": -75.4158935546875, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -298.47662353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1549196243286133, "rewards_train/margins": 16.29484462738037, "rewards_train/rejected": -17.449764251708984, "step": 3378 }, { "epoch": 1.66, "learning_rate": 4.5203810380543946e-07, "loss": 0.0001, "step": 3379 }, { "epoch": 1.66, "logps_train/chosen": -72.9439468383789, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -299.75396728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8773050308227539, "rewards_train/margins": 16.393893241882324, "rewards_train/rejected": -17.271198272705078, "step": 3379 }, { "epoch": 1.66, "learning_rate": 4.5176797389614676e-07, "loss": 0.0, "step": 3380 }, { "epoch": 1.66, "logps_train/chosen": -77.81564331054688, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -301.3863525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3198455572128296, "rewards_train/margins": 16.40882647037506, "rewards_train/rejected": -17.72867202758789, "step": 3380 }, { "epoch": 1.66, "learning_rate": 4.514978581963345e-07, "loss": 0.0, "step": 3381 }, { "epoch": 1.66, "logps_train/chosen": -79.72897338867188, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -134.0, "logps_train/rejected": -319.3785400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2371556758880615, "rewards_train/margins": 17.302845239639282, "rewards_train/rejected": -18.540000915527344, "step": 3381 }, { "epoch": 1.67, "learning_rate": 4.5122775678558077e-07, "loss": 0.0, "step": 3382 }, { "epoch": 1.67, "logps_train/chosen": -71.80599975585938, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -308.12646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8678553104400635, "rewards_train/margins": 17.254361867904663, "rewards_train/rejected": -18.122217178344727, "step": 3382 }, { "epoch": 1.67, "learning_rate": 4.5095766974345863e-07, "loss": 0.0, "step": 3383 }, { "epoch": 1.67, "logps_train/chosen": -73.82299041748047, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -295.0321044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.060716986656189, "rewards_train/margins": 16.295915246009827, "rewards_train/rejected": -17.356632232666016, "step": 3383 }, { "epoch": 1.67, "learning_rate": 4.5068759714953825e-07, "loss": 0.0001, "step": 3384 }, { "epoch": 1.67, "logps_train/chosen": -71.15805053710938, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -310.2558288574219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9242040514945984, "rewards_train/margins": 17.598890125751495, "rewards_train/rejected": -18.523094177246094, "step": 3384 }, { "epoch": 1.67, "learning_rate": 4.5041753908338425e-07, "loss": 0.0, "step": 3385 }, { "epoch": 1.67, "logps_train/chosen": -74.05722045898438, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -289.9934997558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1242283582687378, "rewards_train/margins": 15.661107182502747, "rewards_train/rejected": -16.785335540771484, "step": 3385 }, { "epoch": 1.67, "learning_rate": 4.5014749562455795e-07, "loss": 0.0, "step": 3386 }, { "epoch": 1.67, "logps_train/chosen": -69.40717315673828, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -296.2925720214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8095406293869019, "rewards_train/margins": 16.761466145515442, "rewards_train/rejected": -17.571006774902344, "step": 3386 }, { "epoch": 1.67, "learning_rate": 4.4987746685261576e-07, "loss": 0.0, "step": 3387 }, { "epoch": 1.67, "logps_train/chosen": -74.04493713378906, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -310.8533020019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9996108412742615, "rewards_train/margins": 17.292604506015778, "rewards_train/rejected": -18.29221534729004, "step": 3387 }, { "epoch": 1.67, "learning_rate": 4.4960745284710996e-07, "loss": 0.0, "step": 3388 }, { "epoch": 1.67, "logps_train/chosen": -77.08281707763672, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -303.2597351074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.243706226348877, "rewards_train/margins": 16.650527477264404, "rewards_train/rejected": -17.89423370361328, "step": 3388 }, { "epoch": 1.67, "learning_rate": 4.4933745368758873e-07, "loss": 0.0, "step": 3389 }, { "epoch": 1.67, "logps_train/chosen": -75.27621459960938, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -308.7156982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0186376571655273, "rewards_train/margins": 17.385499000549316, "rewards_train/rejected": -18.404136657714844, "step": 3389 }, { "epoch": 1.67, "learning_rate": 4.490674694535954e-07, "loss": 0.0, "step": 3390 }, { "epoch": 1.67, "logps_train/chosen": -73.65435791015625, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -296.6474609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9842338562011719, "rewards_train/margins": 16.45458221435547, "rewards_train/rejected": -17.43881607055664, "step": 3390 }, { "epoch": 1.67, "learning_rate": 4.487975002246694e-07, "loss": 0.0, "step": 3391 }, { "epoch": 1.67, "logps_train/chosen": -73.7603988647461, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -307.8961486816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9573878049850464, "rewards_train/margins": 17.237550854682922, "rewards_train/rejected": -18.19493865966797, "step": 3391 }, { "epoch": 1.67, "learning_rate": 4.4852754608034515e-07, "loss": 0.0, "step": 3392 }, { "epoch": 1.67, "logps_train/chosen": -78.90482330322266, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -316.80902099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3793007135391235, "rewards_train/margins": 17.149454474449158, "rewards_train/rejected": -18.52875518798828, "step": 3392 }, { "epoch": 1.67, "learning_rate": 4.482576071001532e-07, "loss": 0.0, "step": 3393 }, { "epoch": 1.67, "logps_train/chosen": -77.8154296875, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -313.542724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.419677972793579, "rewards_train/margins": 17.21838355064392, "rewards_train/rejected": -18.6380615234375, "step": 3393 }, { "epoch": 1.67, "learning_rate": 4.479876833636195e-07, "loss": 0.0001, "step": 3394 }, { "epoch": 1.67, "logps_train/chosen": -81.953857421875, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -306.47222900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5817137956619263, "rewards_train/margins": 16.50799071788788, "rewards_train/rejected": -18.089704513549805, "step": 3394 }, { "epoch": 1.67, "learning_rate": 4.4771777495026505e-07, "loss": 0.0, "step": 3395 }, { "epoch": 1.67, "logps_train/chosen": -72.58056640625, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -303.0833740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0240228176116943, "rewards_train/margins": 17.067419290542603, "rewards_train/rejected": -18.091442108154297, "step": 3395 }, { "epoch": 1.67, "learning_rate": 4.474478819396071e-07, "loss": 0.0, "step": 3396 }, { "epoch": 1.67, "logps_train/chosen": -76.05363464355469, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -308.6129455566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1384937763214111, "rewards_train/margins": 17.000777006149292, "rewards_train/rejected": -18.139270782470703, "step": 3396 }, { "epoch": 1.67, "learning_rate": 4.4717800441115744e-07, "loss": 0.0, "step": 3397 }, { "epoch": 1.67, "logps_train/chosen": -78.80252075195312, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -307.2134094238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4297150373458862, "rewards_train/margins": 16.86276924610138, "rewards_train/rejected": -18.292484283447266, "step": 3397 }, { "epoch": 1.67, "learning_rate": 4.4690814244442427e-07, "loss": 0.0, "step": 3398 }, { "epoch": 1.67, "logps_train/chosen": -72.38740539550781, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -301.105712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8445513248443604, "rewards_train/margins": 16.838088750839233, "rewards_train/rejected": -17.682640075683594, "step": 3398 }, { "epoch": 1.67, "learning_rate": 4.466382961189107e-07, "loss": 0.0, "step": 3399 }, { "epoch": 1.67, "logps_train/chosen": -76.97381591796875, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -310.29949951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1448431015014648, "rewards_train/margins": 17.270407676696777, "rewards_train/rejected": -18.415250778198242, "step": 3399 }, { "epoch": 1.67, "learning_rate": 4.4636846551411505e-07, "loss": 0.0001, "step": 3400 }, { "epoch": 1.67, "logps_train/chosen": -75.19721984863281, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -318.5224914550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8372998237609863, "rewards_train/margins": 17.846591472625732, "rewards_train/rejected": -18.68389129638672, "step": 3400 }, { "epoch": 1.67, "learning_rate": 4.460986507095314e-07, "loss": 0.0, "step": 3401 }, { "epoch": 1.67, "logps_train/chosen": -79.78651428222656, "logps_train/ref_chosen": -68.75, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -311.1807861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1058974266052246, "rewards_train/margins": 17.283274173736572, "rewards_train/rejected": -18.389171600341797, "step": 3401 }, { "epoch": 1.68, "learning_rate": 4.458288517846489e-07, "loss": 0.0, "step": 3402 }, { "epoch": 1.68, "logps_train/chosen": -80.25215911865234, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -309.6614990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6021201610565186, "rewards_train/margins": 16.612761735916138, "rewards_train/rejected": -18.214881896972656, "step": 3402 }, { "epoch": 1.68, "learning_rate": 4.4555906881895215e-07, "loss": 0.0001, "step": 3403 }, { "epoch": 1.68, "logps_train/chosen": -79.07615661621094, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -307.85589599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3572742938995361, "rewards_train/margins": 17.020308256149292, "rewards_train/rejected": -18.377582550048828, "step": 3403 }, { "epoch": 1.68, "learning_rate": 4.452893018919213e-07, "loss": 0.0001, "step": 3404 }, { "epoch": 1.68, "logps_train/chosen": -72.60218811035156, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -302.5501708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8988908529281616, "rewards_train/margins": 16.75144064426422, "rewards_train/rejected": -17.650331497192383, "step": 3404 }, { "epoch": 1.68, "learning_rate": 4.45019551083031e-07, "loss": 0.0001, "step": 3405 }, { "epoch": 1.68, "logps_train/chosen": -75.70487213134766, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -313.1363525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1976847648620605, "rewards_train/margins": 17.35862398147583, "rewards_train/rejected": -18.55630874633789, "step": 3405 }, { "epoch": 1.68, "learning_rate": 4.4474981647175217e-07, "loss": 0.0001, "step": 3406 }, { "epoch": 1.68, "logps_train/chosen": -70.04629516601562, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -299.38616943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7047760486602783, "rewards_train/margins": 16.945561170578003, "rewards_train/rejected": -17.65033721923828, "step": 3406 }, { "epoch": 1.68, "learning_rate": 4.444800981375501e-07, "loss": 0.0, "step": 3407 }, { "epoch": 1.68, "logps_train/chosen": -76.7045669555664, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -305.9146728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1607646942138672, "rewards_train/margins": 16.896867752075195, "rewards_train/rejected": -18.057632446289062, "step": 3407 }, { "epoch": 1.68, "learning_rate": 4.4421039615988577e-07, "loss": 0.0, "step": 3408 }, { "epoch": 1.68, "logps_train/chosen": -75.36722564697266, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -302.0859680175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2157264947891235, "rewards_train/margins": 16.658302664756775, "rewards_train/rejected": -17.8740291595459, "step": 3408 }, { "epoch": 1.68, "learning_rate": 4.4394071061821523e-07, "loss": 0.0001, "step": 3409 }, { "epoch": 1.68, "logps_train/chosen": -75.25758361816406, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -319.8095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0981214046478271, "rewards_train/margins": 17.786253213882446, "rewards_train/rejected": -18.884374618530273, "step": 3409 }, { "epoch": 1.68, "learning_rate": 4.4367104159198955e-07, "loss": 0.0, "step": 3410 }, { "epoch": 1.68, "logps_train/chosen": -80.33087158203125, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -317.6587219238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5279121398925781, "rewards_train/margins": 17.399776458740234, "rewards_train/rejected": -18.927688598632812, "step": 3410 }, { "epoch": 1.68, "learning_rate": 4.434013891606552e-07, "loss": 0.0, "step": 3411 }, { "epoch": 1.68, "logps_train/chosen": -73.05425262451172, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -300.4486083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9070854187011719, "rewards_train/margins": 16.92229461669922, "rewards_train/rejected": -17.82938003540039, "step": 3411 }, { "epoch": 1.68, "learning_rate": 4.431317534036534e-07, "loss": 0.0, "step": 3412 }, { "epoch": 1.68, "logps_train/chosen": -77.0049819946289, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -301.7726135253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.348008394241333, "rewards_train/margins": 16.670462369918823, "rewards_train/rejected": -18.018470764160156, "step": 3412 }, { "epoch": 1.68, "learning_rate": 4.4286213440042084e-07, "loss": 0.0, "step": 3413 }, { "epoch": 1.68, "logps_train/chosen": -76.46444702148438, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -315.85211181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1449309587478638, "rewards_train/margins": 17.702879786491394, "rewards_train/rejected": -18.847810745239258, "step": 3413 }, { "epoch": 1.68, "learning_rate": 4.425925322303892e-07, "loss": 0.0, "step": 3414 }, { "epoch": 1.68, "logps_train/chosen": -75.64556884765625, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -298.397705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9953670501708984, "rewards_train/margins": 16.422239303588867, "rewards_train/rejected": -17.417606353759766, "step": 3414 }, { "epoch": 1.68, "learning_rate": 4.423229469729847e-07, "loss": 0.0, "step": 3415 }, { "epoch": 1.68, "logps_train/chosen": -76.88282012939453, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -311.857666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.177686333656311, "rewards_train/margins": 17.243040442466736, "rewards_train/rejected": -18.420726776123047, "step": 3415 }, { "epoch": 1.68, "learning_rate": 4.420533787076294e-07, "loss": 0.0, "step": 3416 }, { "epoch": 1.68, "logps_train/chosen": -76.12967681884766, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -304.25628662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0782999992370605, "rewards_train/margins": 16.846352100372314, "rewards_train/rejected": -17.924652099609375, "step": 3416 }, { "epoch": 1.68, "learning_rate": 4.4178382751373966e-07, "loss": 0.0001, "step": 3417 }, { "epoch": 1.68, "logps_train/chosen": -72.41065979003906, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -299.9713134765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.026099443435669, "rewards_train/margins": 16.88631510734558, "rewards_train/rejected": -17.91241455078125, "step": 3417 }, { "epoch": 1.68, "learning_rate": 4.41514293470727e-07, "loss": 0.0001, "step": 3418 }, { "epoch": 1.68, "logps_train/chosen": -80.67595672607422, "logps_train/ref_chosen": -69.5, "logps_train/ref_rejected": -135.625, "logps_train/rejected": -328.3460693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.117302417755127, "rewards_train/margins": 18.15051031112671, "rewards_train/rejected": -19.267812728881836, "step": 3418 }, { "epoch": 1.68, "learning_rate": 4.412447766579984e-07, "loss": 0.0001, "step": 3419 }, { "epoch": 1.68, "logps_train/chosen": -69.28057098388672, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -304.93756103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7186332941055298, "rewards_train/margins": 17.297096610069275, "rewards_train/rejected": -18.015729904174805, "step": 3419 }, { "epoch": 1.68, "learning_rate": 4.409752771549549e-07, "loss": 0.0005, "step": 3420 }, { "epoch": 1.68, "logps_train/chosen": -78.26563262939453, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -300.64459228515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.457764744758606, "rewards_train/margins": 16.045557856559753, "rewards_train/rejected": -17.50332260131836, "step": 3420 }, { "epoch": 1.68, "learning_rate": 4.4070579504099314e-07, "loss": 0.0, "step": 3421 }, { "epoch": 1.68, "logps_train/chosen": -72.98890686035156, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -298.3524169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8495256900787354, "rewards_train/margins": 16.431419134140015, "rewards_train/rejected": -17.28094482421875, "step": 3421 }, { "epoch": 1.68, "learning_rate": 4.404363303955042e-07, "loss": 0.0, "step": 3422 }, { "epoch": 1.68, "logps_train/chosen": -71.44520568847656, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -293.58819580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9606345891952515, "rewards_train/margins": 16.19632875919342, "rewards_train/rejected": -17.156963348388672, "step": 3422 }, { "epoch": 1.69, "learning_rate": 4.4016688329787414e-07, "loss": 0.0003, "step": 3423 }, { "epoch": 1.69, "logps_train/chosen": -71.84410095214844, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -300.89599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6277691721916199, "rewards_train/margins": 17.052261650562286, "rewards_train/rejected": -17.680030822753906, "step": 3423 }, { "epoch": 1.69, "learning_rate": 4.398974538274842e-07, "loss": 0.0, "step": 3424 }, { "epoch": 1.69, "logps_train/chosen": -75.05484771728516, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -297.32208251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.275504231452942, "rewards_train/margins": 16.132046580314636, "rewards_train/rejected": -17.407550811767578, "step": 3424 }, { "epoch": 1.69, "learning_rate": 4.396280420637098e-07, "loss": 0.0, "step": 3425 }, { "epoch": 1.69, "logps_train/chosen": -74.94339752197266, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -304.6878967285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2209265232086182, "rewards_train/margins": 16.754750967025757, "rewards_train/rejected": -17.975677490234375, "step": 3425 }, { "epoch": 1.69, "learning_rate": 4.3935864808592165e-07, "loss": 0.0, "step": 3426 }, { "epoch": 1.69, "logps_train/chosen": -77.50633239746094, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -309.20367431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1846177577972412, "rewards_train/margins": 16.794050455093384, "rewards_train/rejected": -17.978668212890625, "step": 3426 }, { "epoch": 1.69, "learning_rate": 4.390892719734849e-07, "loss": 0.0, "step": 3427 }, { "epoch": 1.69, "logps_train/chosen": -76.21131134033203, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -300.87579345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2381720542907715, "rewards_train/margins": 16.715620517730713, "rewards_train/rejected": -17.953792572021484, "step": 3427 }, { "epoch": 1.69, "learning_rate": 4.3881991380575985e-07, "loss": 0.0, "step": 3428 }, { "epoch": 1.69, "logps_train/chosen": -75.3167724609375, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -316.556396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9457886815071106, "rewards_train/margins": 17.80008763074875, "rewards_train/rejected": -18.74587631225586, "step": 3428 }, { "epoch": 1.69, "learning_rate": 4.385505736621007e-07, "loss": 0.0, "step": 3429 }, { "epoch": 1.69, "logps_train/chosen": -81.18940734863281, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -316.30517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4786579608917236, "rewards_train/margins": 17.04004168510437, "rewards_train/rejected": -18.518699645996094, "step": 3429 }, { "epoch": 1.69, "learning_rate": 4.3828125162185725e-07, "loss": 0.0, "step": 3430 }, { "epoch": 1.69, "logps_train/chosen": -73.17521667480469, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -119.9375, "logps_train/rejected": -283.583251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1356371641159058, "rewards_train/margins": 15.232404828071594, "rewards_train/rejected": -16.3680419921875, "step": 3430 }, { "epoch": 1.69, "learning_rate": 4.3801194776437377e-07, "loss": 0.0001, "step": 3431 }, { "epoch": 1.69, "logps_train/chosen": -79.25241088867188, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -309.3856201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3933076858520508, "rewards_train/margins": 17.097399711608887, "rewards_train/rejected": -18.490707397460938, "step": 3431 }, { "epoch": 1.69, "learning_rate": 4.377426621689885e-07, "loss": 0.0, "step": 3432 }, { "epoch": 1.69, "logps_train/chosen": -75.34442138671875, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -299.65350341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.053827166557312, "rewards_train/margins": 16.453954815864563, "rewards_train/rejected": -17.507781982421875, "step": 3432 }, { "epoch": 1.69, "learning_rate": 4.3747339491503534e-07, "loss": 0.0002, "step": 3433 }, { "epoch": 1.69, "logps_train/chosen": -74.27678680419922, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -299.81024169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0636646747589111, "rewards_train/margins": 16.343533277511597, "rewards_train/rejected": -17.407197952270508, "step": 3433 }, { "epoch": 1.69, "learning_rate": 4.372041460818417e-07, "loss": 0.0, "step": 3434 }, { "epoch": 1.69, "logps_train/chosen": -76.59501647949219, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -310.61749267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.385819673538208, "rewards_train/margins": 16.89609408378601, "rewards_train/rejected": -18.28191375732422, "step": 3434 }, { "epoch": 1.69, "learning_rate": 4.3693491574873024e-07, "loss": 0.0, "step": 3435 }, { "epoch": 1.69, "logps_train/chosen": -77.9161148071289, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -300.83502197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2938085794448853, "rewards_train/margins": 16.414008498191833, "rewards_train/rejected": -17.70781707763672, "step": 3435 }, { "epoch": 1.69, "learning_rate": 4.366657039950186e-07, "loss": 0.0, "step": 3436 }, { "epoch": 1.69, "logps_train/chosen": -72.02560424804688, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -304.3062744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79557865858078, "rewards_train/margins": 17.066687881946564, "rewards_train/rejected": -17.862266540527344, "step": 3436 }, { "epoch": 1.69, "learning_rate": 4.3639651090001757e-07, "loss": 0.0, "step": 3437 }, { "epoch": 1.69, "logps_train/chosen": -79.72917175292969, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -315.34661865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4748704433441162, "rewards_train/margins": 17.343385934829712, "rewards_train/rejected": -18.818256378173828, "step": 3437 }, { "epoch": 1.69, "learning_rate": 4.361273365430337e-07, "loss": 0.0, "step": 3438 }, { "epoch": 1.69, "logps_train/chosen": -75.52273559570312, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -309.9851989746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1386744976043701, "rewards_train/margins": 17.226788759231567, "rewards_train/rejected": -18.365463256835938, "step": 3438 }, { "epoch": 1.69, "learning_rate": 4.3585818100336744e-07, "loss": 0.0, "step": 3439 }, { "epoch": 1.69, "logps_train/chosen": -73.30231475830078, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -293.19097900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9686106443405151, "rewards_train/margins": 16.347410321235657, "rewards_train/rejected": -17.316020965576172, "step": 3439 }, { "epoch": 1.69, "learning_rate": 4.3585818100336744e-07, "loss": 0.0045, "step": 3440 }, { "epoch": 1.69, "logps_train/chosen": -73.50485229492188, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -297.133056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1213340759277344, "rewards_train/margins": 16.283233642578125, "rewards_train/rejected": -17.40456771850586, "step": 3440 }, { "epoch": 1.69, "learning_rate": 4.355890443603139e-07, "loss": 0.0, "step": 3441 }, { "epoch": 1.69, "logps_train/chosen": -78.71171569824219, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -315.5506896972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2616498470306396, "rewards_train/margins": 17.568809747695923, "rewards_train/rejected": -18.830459594726562, "step": 3441 }, { "epoch": 1.69, "learning_rate": 4.3531992669316264e-07, "loss": 0.0, "step": 3442 }, { "epoch": 1.69, "logps_train/chosen": -73.6104507446289, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -304.39617919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.110849380493164, "rewards_train/margins": 17.014802932739258, "rewards_train/rejected": -18.125652313232422, "step": 3442 }, { "epoch": 1.7, "learning_rate": 4.350508280811973e-07, "loss": 0.0002, "step": 3443 }, { "epoch": 1.7, "logps_train/chosen": -76.07563018798828, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -308.20965576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2419867515563965, "rewards_train/margins": 17.31564950942993, "rewards_train/rejected": -18.557636260986328, "step": 3443 }, { "epoch": 1.7, "learning_rate": 4.3478174860369634e-07, "loss": 0.0, "step": 3444 }, { "epoch": 1.7, "logps_train/chosen": -78.4456787109375, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -303.37847900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4947631359100342, "rewards_train/margins": 16.613982439041138, "rewards_train/rejected": -18.108745574951172, "step": 3444 }, { "epoch": 1.7, "learning_rate": 4.345126883399322e-07, "loss": 0.0, "step": 3445 }, { "epoch": 1.7, "logps_train/chosen": -75.23429870605469, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -308.09832763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.039957880973816, "rewards_train/margins": 17.145753026008606, "rewards_train/rejected": -18.185710906982422, "step": 3445 }, { "epoch": 1.7, "learning_rate": 4.3424364736917195e-07, "loss": 0.0, "step": 3446 }, { "epoch": 1.7, "logps_train/chosen": -78.05068969726562, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -306.4033508300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.137588620185852, "rewards_train/margins": 16.80318820476532, "rewards_train/rejected": -17.940776824951172, "step": 3446 }, { "epoch": 1.7, "learning_rate": 4.3397462577067707e-07, "loss": 0.0, "step": 3447 }, { "epoch": 1.7, "logps_train/chosen": -72.63719940185547, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -305.08367919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8199699521064758, "rewards_train/margins": 17.277754962444305, "rewards_train/rejected": -18.09772491455078, "step": 3447 }, { "epoch": 1.7, "learning_rate": 4.3370562362370285e-07, "loss": 0.0, "step": 3448 }, { "epoch": 1.7, "logps_train/chosen": -73.83675384521484, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -119.4375, "logps_train/rejected": -298.3983154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2456870079040527, "rewards_train/margins": 16.65049409866333, "rewards_train/rejected": -17.896181106567383, "step": 3448 }, { "epoch": 1.7, "learning_rate": 4.3343664100749946e-07, "loss": 0.0002, "step": 3449 }, { "epoch": 1.7, "logps_train/chosen": -76.7210693359375, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -300.28997802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625369548797607, "rewards_train/margins": 16.551032304763794, "rewards_train/rejected": -17.613569259643555, "step": 3449 }, { "epoch": 1.7, "learning_rate": 4.3316767800131036e-07, "loss": 0.0, "step": 3450 }, { "epoch": 1.7, "logps_train/chosen": -81.06962585449219, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -324.7374572753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5448532104492188, "rewards_train/margins": 17.75301742553711, "rewards_train/rejected": -19.297870635986328, "step": 3450 }, { "epoch": 1.7, "learning_rate": 4.328987346843745e-07, "loss": 0.0, "step": 3451 }, { "epoch": 1.7, "logps_train/chosen": -79.32630157470703, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -306.34075927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2105106115341187, "rewards_train/margins": 16.80208170413971, "rewards_train/rejected": -18.012592315673828, "step": 3451 }, { "epoch": 1.7, "learning_rate": 4.326298111359244e-07, "loss": 0.0, "step": 3452 }, { "epoch": 1.7, "logps_train/chosen": -75.16246795654297, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -307.82574462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9461781978607178, "rewards_train/margins": 16.843136072158813, "rewards_train/rejected": -17.78931427001953, "step": 3452 }, { "epoch": 1.7, "learning_rate": 4.3236090743518625e-07, "loss": 0.0, "step": 3453 }, { "epoch": 1.7, "logps_train/chosen": -75.56655883789062, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -303.9916076660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1157865524291992, "rewards_train/margins": 16.686108589172363, "rewards_train/rejected": -17.801895141601562, "step": 3453 }, { "epoch": 1.7, "learning_rate": 4.3209202366138136e-07, "loss": 0.0001, "step": 3454 }, { "epoch": 1.7, "logps_train/chosen": -72.84092712402344, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -307.5247497558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0917106866836548, "rewards_train/margins": 17.189184546470642, "rewards_train/rejected": -18.280895233154297, "step": 3454 }, { "epoch": 1.7, "learning_rate": 4.318231598937244e-07, "loss": 0.0, "step": 3455 }, { "epoch": 1.7, "logps_train/chosen": -77.84490203857422, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -309.4324035644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.34379243850708, "rewards_train/margins": 17.008187770843506, "rewards_train/rejected": -18.351980209350586, "step": 3455 }, { "epoch": 1.7, "learning_rate": 4.3155431621142457e-07, "loss": 0.0001, "step": 3456 }, { "epoch": 1.7, "logps_train/chosen": -81.14967346191406, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -313.07305908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.597657322883606, "rewards_train/margins": 17.013947367668152, "rewards_train/rejected": -18.611604690551758, "step": 3456 }, { "epoch": 1.7, "learning_rate": 4.312854926936852e-07, "loss": 0.0, "step": 3457 }, { "epoch": 1.7, "logps_train/chosen": -73.72691345214844, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -310.9680480957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.077427864074707, "rewards_train/margins": 17.624945640563965, "rewards_train/rejected": -18.702373504638672, "step": 3457 }, { "epoch": 1.7, "learning_rate": 4.3101668941970314e-07, "loss": 0.0, "step": 3458 }, { "epoch": 1.7, "logps_train/chosen": -76.27545928955078, "logps_train/ref_chosen": -61.28125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -312.89935302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.499371886253357, "rewards_train/margins": 17.320494294166565, "rewards_train/rejected": -18.819866180419922, "step": 3458 }, { "epoch": 1.7, "learning_rate": 4.3074790646867004e-07, "loss": 0.0, "step": 3459 }, { "epoch": 1.7, "logps_train/chosen": -76.57644653320312, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -306.90435791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4563260078430176, "rewards_train/margins": 16.71404218673706, "rewards_train/rejected": -18.170368194580078, "step": 3459 }, { "epoch": 1.7, "learning_rate": 4.304791439197709e-07, "loss": 0.0, "step": 3460 }, { "epoch": 1.7, "logps_train/chosen": -73.05496215820312, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -303.75091552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9691194295883179, "rewards_train/margins": 16.93868887424469, "rewards_train/rejected": -17.907808303833008, "step": 3460 }, { "epoch": 1.7, "learning_rate": 4.30210401852185e-07, "loss": 0.0001, "step": 3461 }, { "epoch": 1.7, "logps_train/chosen": -78.22964477539062, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -315.66668701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2888586521148682, "rewards_train/margins": 17.44284987449646, "rewards_train/rejected": -18.731708526611328, "step": 3461 }, { "epoch": 1.7, "learning_rate": 4.299416803450858e-07, "loss": 0.0, "step": 3462 }, { "epoch": 1.7, "logps_train/chosen": -77.96583557128906, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -311.78094482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1131858825683594, "rewards_train/margins": 17.28278160095215, "rewards_train/rejected": -18.395967483520508, "step": 3462 }, { "epoch": 1.71, "learning_rate": 4.2967297947764015e-07, "loss": 0.0, "step": 3463 }, { "epoch": 1.71, "logps_train/chosen": -89.25680541992188, "logps_train/ref_chosen": -68.3125, "logps_train/ref_rejected": -135.0, "logps_train/rejected": -334.10906982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0918426513671875, "rewards_train/margins": 17.82321548461914, "rewards_train/rejected": -19.915058135986328, "step": 3463 }, { "epoch": 1.71, "learning_rate": 4.2940429932900956e-07, "loss": 0.0, "step": 3464 }, { "epoch": 1.71, "logps_train/chosen": -74.7265396118164, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -312.4437561035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8688450455665588, "rewards_train/margins": 17.48695558309555, "rewards_train/rejected": -18.35580062866211, "step": 3464 }, { "epoch": 1.71, "learning_rate": 4.291356399783484e-07, "loss": 0.0, "step": 3465 }, { "epoch": 1.71, "logps_train/chosen": -79.55101013183594, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -300.0682373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4090077877044678, "rewards_train/margins": 16.159340143203735, "rewards_train/rejected": -17.568347930908203, "step": 3465 }, { "epoch": 1.71, "learning_rate": 4.288670015048062e-07, "loss": 0.0, "step": 3466 }, { "epoch": 1.71, "logps_train/chosen": -72.33154296875, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -307.12298583984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9055171012878418, "rewards_train/margins": 17.234711170196533, "rewards_train/rejected": -18.140228271484375, "step": 3466 }, { "epoch": 1.71, "learning_rate": 4.2859838398752514e-07, "loss": 0.0001, "step": 3467 }, { "epoch": 1.71, "logps_train/chosen": -73.48347473144531, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -311.7132873535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9468580484390259, "rewards_train/margins": 17.461778044700623, "rewards_train/rejected": -18.40863609313965, "step": 3467 }, { "epoch": 1.71, "learning_rate": 4.2832978750564187e-07, "loss": 0.0, "step": 3468 }, { "epoch": 1.71, "logps_train/chosen": -74.01361083984375, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -313.63128662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.086322546005249, "rewards_train/margins": 17.434715509414673, "rewards_train/rejected": -18.521038055419922, "step": 3468 }, { "epoch": 1.71, "learning_rate": 4.280612121382871e-07, "loss": 0.0, "step": 3469 }, { "epoch": 1.71, "logps_train/chosen": -74.84431457519531, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -309.7549133300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.227205514907837, "rewards_train/margins": 17.331388235092163, "rewards_train/rejected": -18.55859375, "step": 3469 }, { "epoch": 1.71, "learning_rate": 4.277926579645845e-07, "loss": 0.0, "step": 3470 }, { "epoch": 1.71, "logps_train/chosen": -80.55043029785156, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -118.6875, "logps_train/rejected": -299.3917236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8585100173950195, "rewards_train/margins": 16.213866233825684, "rewards_train/rejected": -18.072376251220703, "step": 3470 }, { "epoch": 1.71, "learning_rate": 4.2752412506365213e-07, "loss": 0.0001, "step": 3471 }, { "epoch": 1.71, "logps_train/chosen": -79.8761978149414, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -298.840087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3967994451522827, "rewards_train/margins": 16.05068552494049, "rewards_train/rejected": -17.447484970092773, "step": 3471 }, { "epoch": 1.71, "learning_rate": 4.272556135146015e-07, "loss": 0.0003, "step": 3472 }, { "epoch": 1.71, "logps_train/chosen": -78.03435516357422, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -316.5191345214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1746760606765747, "rewards_train/margins": 17.441595911979675, "rewards_train/rejected": -18.61627197265625, "step": 3472 }, { "epoch": 1.71, "learning_rate": 4.2698712339653803e-07, "loss": 0.0, "step": 3473 }, { "epoch": 1.71, "logps_train/chosen": -73.5655517578125, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -298.9820861816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2532098293304443, "rewards_train/margins": 16.394266366958618, "rewards_train/rejected": -17.647476196289062, "step": 3473 }, { "epoch": 1.71, "learning_rate": 4.267186547885607e-07, "loss": 0.0, "step": 3474 }, { "epoch": 1.71, "logps_train/chosen": -75.16609191894531, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -297.2015380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.346833348274231, "rewards_train/margins": 16.2328919172287, "rewards_train/rejected": -17.57972526550293, "step": 3474 }, { "epoch": 1.71, "learning_rate": 4.264502077697622e-07, "loss": 0.0, "step": 3475 }, { "epoch": 1.71, "logps_train/chosen": -74.71583557128906, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -302.94586181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.950196385383606, "rewards_train/margins": 16.889849543571472, "rewards_train/rejected": -17.840045928955078, "step": 3475 }, { "epoch": 1.71, "learning_rate": 4.2618178241922877e-07, "loss": 0.0, "step": 3476 }, { "epoch": 1.71, "logps_train/chosen": -74.62108612060547, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -304.18115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0288076400756836, "rewards_train/margins": 16.862648963928223, "rewards_train/rejected": -17.891456604003906, "step": 3476 }, { "epoch": 1.71, "learning_rate": 4.2591337881604023e-07, "loss": 0.0, "step": 3477 }, { "epoch": 1.71, "logps_train/chosen": -71.42189025878906, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -308.405517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8744640350341797, "rewards_train/margins": 17.528196334838867, "rewards_train/rejected": -18.402660369873047, "step": 3477 }, { "epoch": 1.71, "learning_rate": 4.2564499703927026e-07, "loss": 0.0, "step": 3478 }, { "epoch": 1.71, "logps_train/chosen": -81.09468078613281, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -135.0, "logps_train/rejected": -323.953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3047808408737183, "rewards_train/margins": 17.593021512031555, "rewards_train/rejected": -18.897802352905273, "step": 3478 }, { "epoch": 1.71, "learning_rate": 4.2537663716798595e-07, "loss": 0.0, "step": 3479 }, { "epoch": 1.71, "logps_train/chosen": -79.68241119384766, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -323.30035400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4505162239074707, "rewards_train/margins": 18.10246992111206, "rewards_train/rejected": -19.55298614501953, "step": 3479 }, { "epoch": 1.71, "learning_rate": 4.251082992812478e-07, "loss": 0.0, "step": 3480 }, { "epoch": 1.71, "logps_train/chosen": -72.80191040039062, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -306.25848388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9717926383018494, "rewards_train/margins": 17.06821972131729, "rewards_train/rejected": -18.04001235961914, "step": 3480 }, { "epoch": 1.71, "learning_rate": 4.2483998345811023e-07, "loss": 0.0, "step": 3481 }, { "epoch": 1.71, "logps_train/chosen": -78.75867462158203, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -314.7625427246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.193347692489624, "rewards_train/margins": 17.09340500831604, "rewards_train/rejected": -18.286752700805664, "step": 3481 }, { "epoch": 1.71, "learning_rate": 4.245716897776204e-07, "loss": 0.0, "step": 3482 }, { "epoch": 1.71, "logps_train/chosen": -75.79393005371094, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -299.11846923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3860822916030884, "rewards_train/margins": 16.493096709251404, "rewards_train/rejected": -17.879179000854492, "step": 3482 }, { "epoch": 1.71, "learning_rate": 4.2430341831881983e-07, "loss": 0.0001, "step": 3483 }, { "epoch": 1.71, "logps_train/chosen": -75.5833740234375, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -302.7041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.013757348060608, "rewards_train/margins": 16.951427817344666, "rewards_train/rejected": -17.965185165405273, "step": 3483 }, { "epoch": 1.72, "learning_rate": 4.240351691607432e-07, "loss": 0.0001, "step": 3484 }, { "epoch": 1.72, "logps_train/chosen": -75.93511962890625, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -135.625, "logps_train/rejected": -330.6885986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.067291259765625, "rewards_train/margins": 18.443073272705078, "rewards_train/rejected": -19.510364532470703, "step": 3484 }, { "epoch": 1.72, "learning_rate": 4.237669423824181e-07, "loss": 0.0, "step": 3485 }, { "epoch": 1.72, "logps_train/chosen": -77.17657470703125, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -305.2747497558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3533508777618408, "rewards_train/margins": 16.816410779953003, "rewards_train/rejected": -18.169761657714844, "step": 3485 }, { "epoch": 1.72, "learning_rate": 4.2349873806286634e-07, "loss": 0.0002, "step": 3486 }, { "epoch": 1.72, "logps_train/chosen": -77.32352447509766, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -312.19268798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265189528465271, "rewards_train/margins": 17.31472074985504, "rewards_train/rejected": -18.579910278320312, "step": 3486 }, { "epoch": 1.72, "learning_rate": 4.2323055628110246e-07, "loss": 0.0, "step": 3487 }, { "epoch": 1.72, "logps_train/chosen": -75.36616516113281, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -301.31829833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2574169635772705, "rewards_train/margins": 16.70615267753601, "rewards_train/rejected": -17.96356964111328, "step": 3487 }, { "epoch": 1.72, "learning_rate": 4.2296239711613466e-07, "loss": 0.0, "step": 3488 }, { "epoch": 1.72, "logps_train/chosen": -77.28846740722656, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -318.85064697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2234269380569458, "rewards_train/margins": 17.584880709648132, "rewards_train/rejected": -18.808307647705078, "step": 3488 }, { "epoch": 1.72, "learning_rate": 4.2269426064696464e-07, "loss": 0.0, "step": 3489 }, { "epoch": 1.72, "logps_train/chosen": -76.91152954101562, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -301.7679443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4658117294311523, "rewards_train/margins": 16.451804161071777, "rewards_train/rejected": -17.91761589050293, "step": 3489 }, { "epoch": 1.72, "learning_rate": 4.224261469525869e-07, "loss": 0.0009, "step": 3490 }, { "epoch": 1.72, "logps_train/chosen": -68.204345703125, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -299.1957092285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5631592869758606, "rewards_train/margins": 16.686489760875702, "rewards_train/rejected": -17.249649047851562, "step": 3490 }, { "epoch": 1.72, "learning_rate": 4.2215805611198994e-07, "loss": 0.0, "step": 3491 }, { "epoch": 1.72, "logps_train/chosen": -76.44462585449219, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -310.49945068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2726850509643555, "rewards_train/margins": 17.438782691955566, "rewards_train/rejected": -18.711467742919922, "step": 3491 }, { "epoch": 1.72, "learning_rate": 4.218899882041548e-07, "loss": 0.0001, "step": 3492 }, { "epoch": 1.72, "logps_train/chosen": -78.61444854736328, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -316.5403747558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3162786960601807, "rewards_train/margins": 17.559587240219116, "rewards_train/rejected": -18.875865936279297, "step": 3492 }, { "epoch": 1.72, "learning_rate": 4.2162194330805614e-07, "loss": 0.0, "step": 3493 }, { "epoch": 1.72, "logps_train/chosen": -75.47723388671875, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -323.4082336425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0647649765014648, "rewards_train/margins": 18.30823802947998, "rewards_train/rejected": -19.373003005981445, "step": 3493 }, { "epoch": 1.72, "learning_rate": 4.213539215026619e-07, "loss": 0.0, "step": 3494 }, { "epoch": 1.72, "logps_train/chosen": -80.16212463378906, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -308.82586669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5630145072937012, "rewards_train/margins": 16.848724842071533, "rewards_train/rejected": -18.411739349365234, "step": 3494 }, { "epoch": 1.72, "learning_rate": 4.210859228669331e-07, "loss": 0.0, "step": 3495 }, { "epoch": 1.72, "logps_train/chosen": -71.28700256347656, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -298.0592346191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8631249666213989, "rewards_train/margins": 16.68952739238739, "rewards_train/rejected": -17.55265235900879, "step": 3495 }, { "epoch": 1.72, "learning_rate": 4.208179474798239e-07, "loss": 0.0, "step": 3496 }, { "epoch": 1.72, "logps_train/chosen": -79.89830017089844, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -308.11370849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.398032784461975, "rewards_train/margins": 16.866657614707947, "rewards_train/rejected": -18.264690399169922, "step": 3496 }, { "epoch": 1.72, "learning_rate": 4.205499954202817e-07, "loss": 0.0001, "step": 3497 }, { "epoch": 1.72, "logps_train/chosen": -75.57232666015625, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -306.50091552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9180729389190674, "rewards_train/margins": 17.139683485031128, "rewards_train/rejected": -18.057756423950195, "step": 3497 }, { "epoch": 1.72, "learning_rate": 4.2028206676724687e-07, "loss": 0.0, "step": 3498 }, { "epoch": 1.72, "logps_train/chosen": -87.05422973632812, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -320.71112060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0433623790740967, "rewards_train/margins": 17.161925554275513, "rewards_train/rejected": -19.20528793334961, "step": 3498 }, { "epoch": 1.72, "learning_rate": 4.200141615996532e-07, "loss": 0.0, "step": 3499 }, { "epoch": 1.72, "logps_train/chosen": -78.94559478759766, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -303.978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484403371810913, "rewards_train/margins": 16.513351678848267, "rewards_train/rejected": -17.99775505065918, "step": 3499 }, { "epoch": 1.72, "learning_rate": 4.1974627999642715e-07, "loss": 0.0002, "step": 3500 }, { "epoch": 1.72, "logps_train/chosen": -69.78797149658203, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -117.875, "logps_train/rejected": -296.401611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8147830963134766, "rewards_train/margins": 17.036123275756836, "rewards_train/rejected": -17.850906372070312, "step": 3500 }, { "epoch": 1.72, "learning_rate": 4.1947842203648874e-07, "loss": 0.0, "step": 3501 }, { "epoch": 1.72, "logps_train/chosen": -77.9714584350586, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -308.158935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3343532085418701, "rewards_train/margins": 16.903172731399536, "rewards_train/rejected": -18.237525939941406, "step": 3501 }, { "epoch": 1.72, "learning_rate": 4.192105877987504e-07, "loss": 0.0, "step": 3502 }, { "epoch": 1.72, "logps_train/chosen": -74.58534240722656, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -309.96783447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0024797916412354, "rewards_train/margins": 17.30455756187439, "rewards_train/rejected": -18.307037353515625, "step": 3502 }, { "epoch": 1.72, "learning_rate": 4.1894277736211784e-07, "loss": 0.0, "step": 3503 }, { "epoch": 1.72, "logps_train/chosen": -77.33987426757812, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -301.12493896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3746368885040283, "rewards_train/margins": 16.363396406173706, "rewards_train/rejected": -17.738033294677734, "step": 3503 }, { "epoch": 1.73, "learning_rate": 4.186749908054904e-07, "loss": 0.0, "step": 3504 }, { "epoch": 1.73, "logps_train/chosen": -80.30720520019531, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -311.34259033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4816970825195312, "rewards_train/margins": 16.989376068115234, "rewards_train/rejected": -18.471073150634766, "step": 3504 }, { "epoch": 1.73, "learning_rate": 4.184072282077592e-07, "loss": 0.0, "step": 3505 }, { "epoch": 1.73, "logps_train/chosen": -79.92532348632812, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -316.86175537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5964391231536865, "rewards_train/margins": 17.307024240493774, "rewards_train/rejected": -18.90346336364746, "step": 3505 }, { "epoch": 1.73, "learning_rate": 4.1813948964780927e-07, "loss": 0.0, "step": 3506 }, { "epoch": 1.73, "logps_train/chosen": -81.35491943359375, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -320.86627197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5935977697372437, "rewards_train/margins": 17.688148140907288, "rewards_train/rejected": -19.28174591064453, "step": 3506 }, { "epoch": 1.73, "learning_rate": 4.1787177520451796e-07, "loss": 0.0001, "step": 3507 }, { "epoch": 1.73, "logps_train/chosen": -78.12875366210938, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -305.15521240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.368320345878601, "rewards_train/margins": 16.96580135822296, "rewards_train/rejected": -18.334121704101562, "step": 3507 }, { "epoch": 1.73, "learning_rate": 4.176040849567559e-07, "loss": 0.0001, "step": 3508 }, { "epoch": 1.73, "logps_train/chosen": -72.37716674804688, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -290.3488464355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.006246566772461, "rewards_train/margins": 16.02961540222168, "rewards_train/rejected": -17.03586196899414, "step": 3508 }, { "epoch": 1.73, "learning_rate": 4.1733641898338633e-07, "loss": 0.0, "step": 3509 }, { "epoch": 1.73, "logps_train/chosen": -75.27009582519531, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -301.3238830566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1802810430526733, "rewards_train/margins": 16.622125267982483, "rewards_train/rejected": -17.802406311035156, "step": 3509 }, { "epoch": 1.73, "learning_rate": 4.1706877736326556e-07, "loss": 0.0, "step": 3510 }, { "epoch": 1.73, "logps_train/chosen": -79.18382263183594, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -308.48187255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5347634553909302, "rewards_train/margins": 17.056198000907898, "rewards_train/rejected": -18.590961456298828, "step": 3510 }, { "epoch": 1.73, "learning_rate": 4.1680116017524265e-07, "loss": 0.0, "step": 3511 }, { "epoch": 1.73, "logps_train/chosen": -77.50813293457031, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -310.7803955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4125803709030151, "rewards_train/margins": 16.946416974067688, "rewards_train/rejected": -18.358997344970703, "step": 3511 }, { "epoch": 1.73, "learning_rate": 4.1653356749815925e-07, "loss": 0.0, "step": 3512 }, { "epoch": 1.73, "logps_train/chosen": -72.91972351074219, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -298.6429443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.068192958831787, "rewards_train/margins": 16.537020206451416, "rewards_train/rejected": -17.605213165283203, "step": 3512 }, { "epoch": 1.73, "learning_rate": 4.1626599941085015e-07, "loss": 0.0, "step": 3513 }, { "epoch": 1.73, "logps_train/chosen": -73.29008483886719, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -297.73529052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.051712989807129, "rewards_train/margins": 16.63675594329834, "rewards_train/rejected": -17.68846893310547, "step": 3513 }, { "epoch": 1.73, "learning_rate": 4.1599845599214263e-07, "loss": 0.0, "step": 3514 }, { "epoch": 1.73, "logps_train/chosen": -72.5699462890625, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -305.35797119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9481087327003479, "rewards_train/margins": 16.97113412618637, "rewards_train/rejected": -17.91924285888672, "step": 3514 }, { "epoch": 1.73, "learning_rate": 4.1573093732085676e-07, "loss": 0.0, "step": 3515 }, { "epoch": 1.73, "logps_train/chosen": -74.67219543457031, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -305.29644775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2457348108291626, "rewards_train/margins": 16.931955695152283, "rewards_train/rejected": -18.177690505981445, "step": 3515 }, { "epoch": 1.73, "learning_rate": 4.1546344347580555e-07, "loss": 0.0001, "step": 3516 }, { "epoch": 1.73, "logps_train/chosen": -77.36289978027344, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -316.49517822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1021596193313599, "rewards_train/margins": 17.615230441093445, "rewards_train/rejected": -18.717390060424805, "step": 3516 }, { "epoch": 1.73, "learning_rate": 4.1519597453579405e-07, "loss": 0.0, "step": 3517 }, { "epoch": 1.73, "logps_train/chosen": -72.11917114257812, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -317.23895263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.686965823173523, "rewards_train/margins": 17.9933739900589, "rewards_train/rejected": -18.680339813232422, "step": 3517 }, { "epoch": 1.73, "learning_rate": 4.14928530579621e-07, "loss": 0.0, "step": 3518 }, { "epoch": 1.73, "logps_train/chosen": -77.4520034790039, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -316.20697021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0753271579742432, "rewards_train/margins": 17.66617178916931, "rewards_train/rejected": -18.741498947143555, "step": 3518 }, { "epoch": 1.73, "learning_rate": 4.1466111168607667e-07, "loss": 0.0, "step": 3519 }, { "epoch": 1.73, "logps_train/chosen": -80.60159301757812, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -314.075439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4863307476043701, "rewards_train/margins": 16.90588116645813, "rewards_train/rejected": -18.3922119140625, "step": 3519 }, { "epoch": 1.73, "learning_rate": 4.143937179339445e-07, "loss": 0.0, "step": 3520 }, { "epoch": 1.73, "logps_train/chosen": -74.45332336425781, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -310.32574462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9124223589897156, "rewards_train/margins": 17.40877515077591, "rewards_train/rejected": -18.321197509765625, "step": 3520 }, { "epoch": 1.73, "learning_rate": 4.141263494020011e-07, "loss": 0.0, "step": 3521 }, { "epoch": 1.73, "logps_train/chosen": -73.79644775390625, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -312.06243896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9638246297836304, "rewards_train/margins": 17.53831923007965, "rewards_train/rejected": -18.50214385986328, "step": 3521 }, { "epoch": 1.73, "learning_rate": 4.138590061690144e-07, "loss": 0.0, "step": 3522 }, { "epoch": 1.73, "logps_train/chosen": -76.76307678222656, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -312.59344482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2896133661270142, "rewards_train/margins": 17.288875937461853, "rewards_train/rejected": -18.578489303588867, "step": 3522 }, { "epoch": 1.73, "learning_rate": 4.135916883137458e-07, "loss": 0.0, "step": 3523 }, { "epoch": 1.73, "logps_train/chosen": -71.83016967773438, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -313.4183349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0254004001617432, "rewards_train/margins": 17.6380136013031, "rewards_train/rejected": -18.663414001464844, "step": 3523 }, { "epoch": 1.74, "learning_rate": 4.133243959149488e-07, "loss": 0.0, "step": 3524 }, { "epoch": 1.74, "logps_train/chosen": -76.48970031738281, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -302.1627197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1713087558746338, "rewards_train/margins": 16.691543340682983, "rewards_train/rejected": -17.862852096557617, "step": 3524 }, { "epoch": 1.74, "learning_rate": 4.1305712905136957e-07, "loss": 0.0, "step": 3525 }, { "epoch": 1.74, "logps_train/chosen": -75.38479614257812, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -301.67889404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1412630081176758, "rewards_train/margins": 16.58492946624756, "rewards_train/rejected": -17.726192474365234, "step": 3525 }, { "epoch": 1.74, "learning_rate": 4.12789887801747e-07, "loss": 0.0001, "step": 3526 }, { "epoch": 1.74, "logps_train/chosen": -82.82791137695312, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -322.10626220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5459263324737549, "rewards_train/margins": 17.434231519699097, "rewards_train/rejected": -18.98015785217285, "step": 3526 }, { "epoch": 1.74, "learning_rate": 4.125226722448118e-07, "loss": 0.0, "step": 3527 }, { "epoch": 1.74, "logps_train/chosen": -78.03446960449219, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -296.036376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4223442077636719, "rewards_train/margins": 15.933635711669922, "rewards_train/rejected": -17.355979919433594, "step": 3527 }, { "epoch": 1.74, "learning_rate": 4.1225548245928777e-07, "loss": 0.0002, "step": 3528 }, { "epoch": 1.74, "logps_train/chosen": -79.07667541503906, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -300.5365295410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5230000019073486, "rewards_train/margins": 16.466788053512573, "rewards_train/rejected": -17.989788055419922, "step": 3528 }, { "epoch": 1.74, "learning_rate": 4.1198831852389047e-07, "loss": 0.0, "step": 3529 }, { "epoch": 1.74, "logps_train/chosen": -82.58566284179688, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -325.31292724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.728048324584961, "rewards_train/margins": 17.810619354248047, "rewards_train/rejected": -19.538667678833008, "step": 3529 }, { "epoch": 1.74, "learning_rate": 4.117211805173286e-07, "loss": 0.0, "step": 3530 }, { "epoch": 1.74, "logps_train/chosen": -78.92506408691406, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -317.0240478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3473398685455322, "rewards_train/margins": 17.70311141014099, "rewards_train/rejected": -19.050451278686523, "step": 3530 }, { "epoch": 1.74, "learning_rate": 4.114540685183026e-07, "loss": 0.0, "step": 3531 }, { "epoch": 1.74, "logps_train/chosen": -81.71124267578125, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -324.407470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5177068710327148, "rewards_train/margins": 17.967280387878418, "rewards_train/rejected": -19.484987258911133, "step": 3531 }, { "epoch": 1.74, "learning_rate": 4.1118698260550545e-07, "loss": 0.0, "step": 3532 }, { "epoch": 1.74, "logps_train/chosen": -71.24543762207031, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -306.64398193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.883625864982605, "rewards_train/margins": 17.25757896900177, "rewards_train/rejected": -18.141204833984375, "step": 3532 }, { "epoch": 1.74, "learning_rate": 4.1091992285762267e-07, "loss": 0.0, "step": 3533 }, { "epoch": 1.74, "logps_train/chosen": -74.50048828125, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -310.41058349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9817874431610107, "rewards_train/margins": 17.557999849319458, "rewards_train/rejected": -18.53978729248047, "step": 3533 }, { "epoch": 1.74, "learning_rate": 4.106528893533314e-07, "loss": 0.0, "step": 3534 }, { "epoch": 1.74, "logps_train/chosen": -74.96356201171875, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -316.50714111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0971380472183228, "rewards_train/margins": 17.564221739768982, "rewards_train/rejected": -18.661359786987305, "step": 3534 }, { "epoch": 1.74, "learning_rate": 4.10385882171302e-07, "loss": 0.0, "step": 3535 }, { "epoch": 1.74, "logps_train/chosen": -73.33668518066406, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -311.8377685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9436782598495483, "rewards_train/margins": 17.2723251581192, "rewards_train/rejected": -18.21600341796875, "step": 3535 }, { "epoch": 1.74, "learning_rate": 4.1011890139019653e-07, "loss": 0.0, "step": 3536 }, { "epoch": 1.74, "logps_train/chosen": -76.89079284667969, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -305.33154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3989430665969849, "rewards_train/margins": 17.00467097759247, "rewards_train/rejected": -18.403614044189453, "step": 3536 }, { "epoch": 1.74, "learning_rate": 4.09851947088669e-07, "loss": 0.0, "step": 3537 }, { "epoch": 1.74, "logps_train/chosen": -77.74788665771484, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -317.5890808105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3485924005508423, "rewards_train/margins": 17.934240698814392, "rewards_train/rejected": -19.282833099365234, "step": 3537 }, { "epoch": 1.74, "learning_rate": 4.095850193453664e-07, "loss": 0.0, "step": 3538 }, { "epoch": 1.74, "logps_train/chosen": -78.70155334472656, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -310.0674133300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4659810066223145, "rewards_train/margins": 17.104480266571045, "rewards_train/rejected": -18.57046127319336, "step": 3538 }, { "epoch": 1.74, "learning_rate": 4.09318118238927e-07, "loss": 0.0, "step": 3539 }, { "epoch": 1.74, "logps_train/chosen": -79.12905883789062, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -318.3296203613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3085116147994995, "rewards_train/margins": 17.507455945014954, "rewards_train/rejected": -18.815967559814453, "step": 3539 }, { "epoch": 1.74, "learning_rate": 4.090512438479818e-07, "loss": 0.0, "step": 3540 }, { "epoch": 1.74, "logps_train/chosen": -78.41535949707031, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -317.97418212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.437434434890747, "rewards_train/margins": 17.367403745651245, "rewards_train/rejected": -18.804838180541992, "step": 3540 }, { "epoch": 1.74, "learning_rate": 4.0878439625115405e-07, "loss": 0.0006, "step": 3541 }, { "epoch": 1.74, "logps_train/chosen": -79.57557678222656, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -314.1025695800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.503651738166809, "rewards_train/margins": 17.444300532341003, "rewards_train/rejected": -18.947952270507812, "step": 3541 }, { "epoch": 1.74, "learning_rate": 4.085175755270586e-07, "loss": 0.0, "step": 3542 }, { "epoch": 1.74, "logps_train/chosen": -80.5296630859375, "logps_train/ref_chosen": -68.3125, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -324.79248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2212767601013184, "rewards_train/margins": 18.057581424713135, "rewards_train/rejected": -19.278858184814453, "step": 3542 }, { "epoch": 1.74, "learning_rate": 4.0825078175430274e-07, "loss": 0.0, "step": 3543 }, { "epoch": 1.74, "logps_train/chosen": -76.33319091796875, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -302.51654052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1886417865753174, "rewards_train/margins": 16.697239637374878, "rewards_train/rejected": -17.885881423950195, "step": 3543 }, { "epoch": 1.74, "learning_rate": 4.0798401501148554e-07, "loss": 0.0001, "step": 3544 }, { "epoch": 1.74, "logps_train/chosen": -79.0323486328125, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -308.07440185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4506478309631348, "rewards_train/margins": 16.70532751083374, "rewards_train/rejected": -18.155975341796875, "step": 3544 }, { "epoch": 1.75, "learning_rate": 4.0771727537719856e-07, "loss": 0.0002, "step": 3545 }, { "epoch": 1.75, "logps_train/chosen": -76.26637268066406, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -300.9183349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1845965385437012, "rewards_train/margins": 16.453819751739502, "rewards_train/rejected": -17.638416290283203, "step": 3545 }, { "epoch": 1.75, "learning_rate": 4.074505629300249e-07, "loss": 0.0, "step": 3546 }, { "epoch": 1.75, "logps_train/chosen": -85.3868637084961, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -325.900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.959291696548462, "rewards_train/margins": 17.666879415512085, "rewards_train/rejected": -19.626171112060547, "step": 3546 }, { "epoch": 1.75, "learning_rate": 4.071838777485398e-07, "loss": 0.0, "step": 3547 }, { "epoch": 1.75, "logps_train/chosen": -77.7800521850586, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -318.72698974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.353005290031433, "rewards_train/margins": 17.70856201648712, "rewards_train/rejected": -19.061567306518555, "step": 3547 }, { "epoch": 1.75, "learning_rate": 4.069172199113108e-07, "loss": 0.0, "step": 3548 }, { "epoch": 1.75, "logps_train/chosen": -78.06050109863281, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -300.7763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5482375621795654, "rewards_train/margins": 16.530524492263794, "rewards_train/rejected": -18.07876205444336, "step": 3548 }, { "epoch": 1.75, "learning_rate": 4.066505894968968e-07, "loss": 0.0003, "step": 3549 }, { "epoch": 1.75, "logps_train/chosen": -74.35394287109375, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -310.38671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9794129133224487, "rewards_train/margins": 17.42185890674591, "rewards_train/rejected": -18.40127182006836, "step": 3549 }, { "epoch": 1.75, "learning_rate": 4.063839865838492e-07, "loss": 0.0001, "step": 3550 }, { "epoch": 1.75, "logps_train/chosen": -76.64945220947266, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -315.6079406738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.177201271057129, "rewards_train/margins": 17.572556495666504, "rewards_train/rejected": -18.749757766723633, "step": 3550 }, { "epoch": 1.75, "learning_rate": 4.061174112507106e-07, "loss": 0.0001, "step": 3551 }, { "epoch": 1.75, "logps_train/chosen": -77.11680603027344, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -304.78680419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.403282642364502, "rewards_train/margins": 16.82251501083374, "rewards_train/rejected": -18.225797653198242, "step": 3551 }, { "epoch": 1.75, "learning_rate": 4.058508635760162e-07, "loss": 0.0, "step": 3552 }, { "epoch": 1.75, "logps_train/chosen": -77.32943725585938, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -314.33770751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.196664810180664, "rewards_train/margins": 17.3864688873291, "rewards_train/rejected": -18.583133697509766, "step": 3552 }, { "epoch": 1.75, "learning_rate": 4.055843436382929e-07, "loss": 0.0, "step": 3553 }, { "epoch": 1.75, "logps_train/chosen": -75.64312744140625, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -305.637939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1773500442504883, "rewards_train/margins": 17.178580284118652, "rewards_train/rejected": -18.35593032836914, "step": 3553 }, { "epoch": 1.75, "learning_rate": 4.053178515160589e-07, "loss": 0.0, "step": 3554 }, { "epoch": 1.75, "logps_train/chosen": -75.9915542602539, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -321.0170593261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.058188796043396, "rewards_train/margins": 18.12203276157379, "rewards_train/rejected": -19.180221557617188, "step": 3554 }, { "epoch": 1.75, "learning_rate": 4.050513872878248e-07, "loss": 0.0, "step": 3555 }, { "epoch": 1.75, "logps_train/chosen": -76.20890808105469, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -308.05364990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2012622356414795, "rewards_train/margins": 17.002299547195435, "rewards_train/rejected": -18.203561782836914, "step": 3555 }, { "epoch": 1.75, "learning_rate": 4.0478495103209256e-07, "loss": 0.0, "step": 3556 }, { "epoch": 1.75, "logps_train/chosen": -76.24639892578125, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -311.36767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2296206951141357, "rewards_train/margins": 17.389275789260864, "rewards_train/rejected": -18.618896484375, "step": 3556 }, { "epoch": 1.75, "learning_rate": 4.0451854282735624e-07, "loss": 0.0, "step": 3557 }, { "epoch": 1.75, "logps_train/chosen": -76.76161193847656, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -300.51123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.281263828277588, "rewards_train/margins": 16.130359172821045, "rewards_train/rejected": -17.411623001098633, "step": 3557 }, { "epoch": 1.75, "learning_rate": 4.0425216275210167e-07, "loss": 0.0001, "step": 3558 }, { "epoch": 1.75, "logps_train/chosen": -77.54730224609375, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -301.23211669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2813905477523804, "rewards_train/margins": 16.616329789161682, "rewards_train/rejected": -17.897720336914062, "step": 3558 }, { "epoch": 1.75, "learning_rate": 4.0398581088480585e-07, "loss": 0.0001, "step": 3559 }, { "epoch": 1.75, "logps_train/chosen": -69.32298278808594, "logps_train/ref_chosen": -60.46875, "logps_train/ref_rejected": -119.0625, "logps_train/rejected": -289.7261962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8857654333114624, "rewards_train/margins": 16.178897500038147, "rewards_train/rejected": -17.06466293334961, "step": 3559 }, { "epoch": 1.75, "learning_rate": 4.037194873039382e-07, "loss": 0.0, "step": 3560 }, { "epoch": 1.75, "logps_train/chosen": -71.19566345214844, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -303.71527099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8119001388549805, "rewards_train/margins": 16.902695655822754, "rewards_train/rejected": -17.714595794677734, "step": 3560 }, { "epoch": 1.75, "learning_rate": 4.034531920879591e-07, "loss": 0.0, "step": 3561 }, { "epoch": 1.75, "logps_train/chosen": -72.28406524658203, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -305.75872802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0449838638305664, "rewards_train/margins": 17.15286350250244, "rewards_train/rejected": -18.197847366333008, "step": 3561 }, { "epoch": 1.75, "learning_rate": 4.031869253153211e-07, "loss": 0.0001, "step": 3562 }, { "epoch": 1.75, "logps_train/chosen": -73.41297912597656, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -320.9989013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9476937055587769, "rewards_train/margins": 18.128073811531067, "rewards_train/rejected": -19.075767517089844, "step": 3562 }, { "epoch": 1.75, "learning_rate": 4.029206870644683e-07, "loss": 0.0001, "step": 3563 }, { "epoch": 1.75, "logps_train/chosen": -76.02888488769531, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -311.8817138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.147810935974121, "rewards_train/margins": 17.148076057434082, "rewards_train/rejected": -18.295886993408203, "step": 3563 }, { "epoch": 1.75, "learning_rate": 4.02654477413836e-07, "loss": 0.0, "step": 3564 }, { "epoch": 1.75, "logps_train/chosen": -78.81898498535156, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -315.25048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3325822353363037, "rewards_train/margins": 17.439930200576782, "rewards_train/rejected": -18.772512435913086, "step": 3564 }, { "epoch": 1.76, "learning_rate": 4.0238829644185167e-07, "loss": 0.0, "step": 3565 }, { "epoch": 1.76, "logps_train/chosen": -74.57429504394531, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -312.2037658691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9784252047538757, "rewards_train/margins": 17.224470913410187, "rewards_train/rejected": -18.202896118164062, "step": 3565 }, { "epoch": 1.76, "learning_rate": 4.021221442269338e-07, "loss": 0.0, "step": 3566 }, { "epoch": 1.76, "logps_train/chosen": -75.28765869140625, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -302.072021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.148004174232483, "rewards_train/margins": 16.698944449424744, "rewards_train/rejected": -17.846948623657227, "step": 3566 }, { "epoch": 1.76, "learning_rate": 4.0185602084749266e-07, "loss": 0.0, "step": 3567 }, { "epoch": 1.76, "logps_train/chosen": -71.54638671875, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -292.3768615722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0195560455322266, "rewards_train/margins": 16.219743728637695, "rewards_train/rejected": -17.239299774169922, "step": 3567 }, { "epoch": 1.76, "learning_rate": 4.015899263819302e-07, "loss": 0.0002, "step": 3568 }, { "epoch": 1.76, "logps_train/chosen": -74.09107971191406, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -312.4017333984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1582287549972534, "rewards_train/margins": 17.429356694221497, "rewards_train/rejected": -18.58758544921875, "step": 3568 }, { "epoch": 1.76, "learning_rate": 4.013238609086392e-07, "loss": 0.0, "step": 3569 }, { "epoch": 1.76, "logps_train/chosen": -76.65780639648438, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -315.2618408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2321377992630005, "rewards_train/margins": 17.51264750957489, "rewards_train/rejected": -18.74478530883789, "step": 3569 }, { "epoch": 1.76, "learning_rate": 4.0105782450600495e-07, "loss": 0.0007, "step": 3570 }, { "epoch": 1.76, "logps_train/chosen": -78.19242095947266, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -305.5914306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4022502899169922, "rewards_train/margins": 16.6169490814209, "rewards_train/rejected": -18.01919937133789, "step": 3570 }, { "epoch": 1.76, "learning_rate": 4.00791817252403e-07, "loss": 0.0, "step": 3571 }, { "epoch": 1.76, "logps_train/chosen": -81.4969482421875, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -311.6859130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4914919137954712, "rewards_train/margins": 17.005616545677185, "rewards_train/rejected": -18.497108459472656, "step": 3571 }, { "epoch": 1.76, "learning_rate": 4.0052583922620104e-07, "loss": 0.0, "step": 3572 }, { "epoch": 1.76, "logps_train/chosen": -76.44464111328125, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -314.71392822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0707831382751465, "rewards_train/margins": 17.556467533111572, "rewards_train/rejected": -18.62725067138672, "step": 3572 }, { "epoch": 1.76, "learning_rate": 4.002598905057584e-07, "loss": 0.0, "step": 3573 }, { "epoch": 1.76, "logps_train/chosen": -78.29815673828125, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -314.9599914550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.142072081565857, "rewards_train/margins": 17.624436020851135, "rewards_train/rejected": -18.766508102416992, "step": 3573 }, { "epoch": 1.76, "learning_rate": 3.999939711694248e-07, "loss": 0.0, "step": 3574 }, { "epoch": 1.76, "logps_train/chosen": -77.5166244506836, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -307.540283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2211939096450806, "rewards_train/margins": 17.15407693386078, "rewards_train/rejected": -18.37527084350586, "step": 3574 }, { "epoch": 1.76, "learning_rate": 3.997280812955422e-07, "loss": 0.0, "step": 3575 }, { "epoch": 1.76, "logps_train/chosen": -81.53113555908203, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -319.64495849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.692371129989624, "rewards_train/margins": 17.630913019180298, "rewards_train/rejected": -19.323284149169922, "step": 3575 }, { "epoch": 1.76, "learning_rate": 3.9946222096244337e-07, "loss": 0.0, "step": 3576 }, { "epoch": 1.76, "logps_train/chosen": -82.00953674316406, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -307.99169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7360368967056274, "rewards_train/margins": 16.772069334983826, "rewards_train/rejected": -18.508106231689453, "step": 3576 }, { "epoch": 1.76, "learning_rate": 3.9919639024845267e-07, "loss": 0.0, "step": 3577 }, { "epoch": 1.76, "logps_train/chosen": -74.66189575195312, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -299.66729736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1428500413894653, "rewards_train/margins": 16.702885508537292, "rewards_train/rejected": -17.845735549926758, "step": 3577 }, { "epoch": 1.76, "learning_rate": 3.9893058923188565e-07, "loss": 0.0003, "step": 3578 }, { "epoch": 1.76, "logps_train/chosen": -75.032470703125, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -305.47540283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9728276133537292, "rewards_train/margins": 16.68447905778885, "rewards_train/rejected": -17.657306671142578, "step": 3578 }, { "epoch": 1.76, "learning_rate": 3.98664817991049e-07, "loss": 0.0, "step": 3579 }, { "epoch": 1.76, "logps_train/chosen": -83.49722290039062, "logps_train/ref_chosen": -68.0, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -320.8380126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5484037399291992, "rewards_train/margins": 17.65609836578369, "rewards_train/rejected": -19.20450210571289, "step": 3579 }, { "epoch": 1.76, "learning_rate": 3.983990766042408e-07, "loss": 0.0, "step": 3580 }, { "epoch": 1.76, "logps_train/chosen": -73.29552459716797, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -304.4154052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1157829761505127, "rewards_train/margins": 16.899975538253784, "rewards_train/rejected": -18.015758514404297, "step": 3580 }, { "epoch": 1.76, "learning_rate": 3.9813336514975014e-07, "loss": 0.0, "step": 3581 }, { "epoch": 1.76, "logps_train/chosen": -73.30831909179688, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -304.0659484863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9617403745651245, "rewards_train/margins": 16.882940411567688, "rewards_train/rejected": -17.844680786132812, "step": 3581 }, { "epoch": 1.76, "learning_rate": 3.978676837058575e-07, "loss": 0.0, "step": 3582 }, { "epoch": 1.76, "logps_train/chosen": -76.13218688964844, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -309.74224853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.053257942199707, "rewards_train/margins": 17.26588726043701, "rewards_train/rejected": -18.31914520263672, "step": 3582 }, { "epoch": 1.76, "learning_rate": 3.9760203235083446e-07, "loss": 0.0, "step": 3583 }, { "epoch": 1.76, "logps_train/chosen": -81.85786437988281, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -314.98388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7856407165527344, "rewards_train/margins": 17.105031967163086, "rewards_train/rejected": -18.89067268371582, "step": 3583 }, { "epoch": 1.76, "learning_rate": 3.973364111629436e-07, "loss": 0.0001, "step": 3584 }, { "epoch": 1.76, "logps_train/chosen": -76.26277160644531, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -315.98760986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.163924217224121, "rewards_train/margins": 17.767991065979004, "rewards_train/rejected": -18.931915283203125, "step": 3584 }, { "epoch": 1.77, "learning_rate": 3.970708202204389e-07, "loss": 0.0, "step": 3585 }, { "epoch": 1.77, "logps_train/chosen": -84.51287841796875, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -303.2830505371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9226257801055908, "rewards_train/margins": 16.29674220085144, "rewards_train/rejected": -18.21936798095703, "step": 3585 }, { "epoch": 1.77, "learning_rate": 3.9680525960156484e-07, "loss": 0.0001, "step": 3586 }, { "epoch": 1.77, "logps_train/chosen": -76.70162200927734, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -302.22967529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4256796836853027, "rewards_train/margins": 16.814085483551025, "rewards_train/rejected": -18.239765167236328, "step": 3586 }, { "epoch": 1.77, "learning_rate": 3.9653972938455793e-07, "loss": 0.0, "step": 3587 }, { "epoch": 1.77, "logps_train/chosen": -76.9837646484375, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -312.8329772949219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0283567905426025, "rewards_train/margins": 17.426963567733765, "rewards_train/rejected": -18.455320358276367, "step": 3587 }, { "epoch": 1.77, "learning_rate": 3.9627422964764465e-07, "loss": 0.0, "step": 3588 }, { "epoch": 1.77, "logps_train/chosen": -74.46434020996094, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -293.9429931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2085431814193726, "rewards_train/margins": 16.05216085910797, "rewards_train/rejected": -17.260704040527344, "step": 3588 }, { "epoch": 1.77, "learning_rate": 3.960087604690432e-07, "loss": 0.0, "step": 3589 }, { "epoch": 1.77, "logps_train/chosen": -80.41871643066406, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -320.4215087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.524098515510559, "rewards_train/margins": 17.56141173839569, "rewards_train/rejected": -19.08551025390625, "step": 3589 }, { "epoch": 1.77, "learning_rate": 3.9574332192696284e-07, "loss": 0.0, "step": 3590 }, { "epoch": 1.77, "logps_train/chosen": -77.57884216308594, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -320.2964782714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2818831205368042, "rewards_train/margins": 17.900013089179993, "rewards_train/rejected": -19.181896209716797, "step": 3590 }, { "epoch": 1.77, "learning_rate": 3.954779140996032e-07, "loss": 0.0, "step": 3591 }, { "epoch": 1.77, "logps_train/chosen": -81.36017608642578, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -314.58489990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4893869161605835, "rewards_train/margins": 17.23873007297516, "rewards_train/rejected": -18.728116989135742, "step": 3591 }, { "epoch": 1.77, "learning_rate": 3.9521253706515546e-07, "loss": 0.0, "step": 3592 }, { "epoch": 1.77, "logps_train/chosen": -76.08966064453125, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -300.7210998535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.174493432044983, "rewards_train/margins": 16.525984168052673, "rewards_train/rejected": -17.700477600097656, "step": 3592 }, { "epoch": 1.77, "learning_rate": 3.949471909018012e-07, "loss": 0.0, "step": 3593 }, { "epoch": 1.77, "logps_train/chosen": -73.68302917480469, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -305.00360107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.153165578842163, "rewards_train/margins": 16.88532853126526, "rewards_train/rejected": -18.038494110107422, "step": 3593 }, { "epoch": 1.77, "learning_rate": 3.946818756877134e-07, "loss": 0.0, "step": 3594 }, { "epoch": 1.77, "logps_train/chosen": -82.22683715820312, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -314.5289306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4565218687057495, "rewards_train/margins": 17.185728192329407, "rewards_train/rejected": -18.642250061035156, "step": 3594 }, { "epoch": 1.77, "learning_rate": 3.944165915010558e-07, "loss": 0.0, "step": 3595 }, { "epoch": 1.77, "logps_train/chosen": -74.9603271484375, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -302.36407470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0161007642745972, "rewards_train/margins": 16.99027931690216, "rewards_train/rejected": -18.006380081176758, "step": 3595 }, { "epoch": 1.77, "learning_rate": 3.941513384199827e-07, "loss": 0.0, "step": 3596 }, { "epoch": 1.77, "logps_train/chosen": -74.27790832519531, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -308.0599365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0425853729248047, "rewards_train/margins": 17.097684860229492, "rewards_train/rejected": -18.140270233154297, "step": 3596 }, { "epoch": 1.77, "learning_rate": 3.938861165226397e-07, "loss": 0.0, "step": 3597 }, { "epoch": 1.77, "logps_train/chosen": -71.37562561035156, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -309.2310791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8879779577255249, "rewards_train/margins": 17.50050699710846, "rewards_train/rejected": -18.388484954833984, "step": 3597 }, { "epoch": 1.77, "learning_rate": 3.9362092588716264e-07, "loss": 0.0, "step": 3598 }, { "epoch": 1.77, "logps_train/chosen": -80.52751922607422, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -312.05230712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4787285327911377, "rewards_train/margins": 16.96566081047058, "rewards_train/rejected": -18.44438934326172, "step": 3598 }, { "epoch": 1.77, "learning_rate": 3.9335576659167863e-07, "loss": 0.0, "step": 3599 }, { "epoch": 1.77, "logps_train/chosen": -82.53019714355469, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -313.4666748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7383708953857422, "rewards_train/margins": 16.82001495361328, "rewards_train/rejected": -18.558385848999023, "step": 3599 }, { "epoch": 1.77, "learning_rate": 3.930906387143055e-07, "loss": 0.0, "step": 3600 }, { "epoch": 1.77, "logps_train/chosen": -76.13883209228516, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -317.2506408691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2340490818023682, "rewards_train/margins": 17.741307973861694, "rewards_train/rejected": -18.975357055664062, "step": 3600 }, { "epoch": 1.77, "learning_rate": 3.9282554233315154e-07, "loss": 0.0, "step": 3601 }, { "epoch": 1.77, "logps_train/chosen": -76.97721099853516, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -307.31512451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3103193044662476, "rewards_train/margins": 16.700832962989807, "rewards_train/rejected": -18.011152267456055, "step": 3601 }, { "epoch": 1.77, "learning_rate": 3.9256047752631615e-07, "loss": 0.0001, "step": 3602 }, { "epoch": 1.77, "logps_train/chosen": -83.42857360839844, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -324.5687255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5694684982299805, "rewards_train/margins": 17.785256385803223, "rewards_train/rejected": -19.354724884033203, "step": 3602 }, { "epoch": 1.77, "learning_rate": 3.922954443718889e-07, "loss": 0.0, "step": 3603 }, { "epoch": 1.77, "logps_train/chosen": -77.49883270263672, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -302.3264465332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2775444984436035, "rewards_train/margins": 16.72892713546753, "rewards_train/rejected": -18.006471633911133, "step": 3603 }, { "epoch": 1.77, "learning_rate": 3.9203044294795037e-07, "loss": 0.0, "step": 3604 }, { "epoch": 1.77, "logps_train/chosen": -76.44600677490234, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -316.96710205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2737512588500977, "rewards_train/margins": 17.679694175720215, "rewards_train/rejected": -18.953445434570312, "step": 3604 }, { "epoch": 1.77, "learning_rate": 3.9176547333257214e-07, "loss": 0.0, "step": 3605 }, { "epoch": 1.77, "logps_train/chosen": -78.91102600097656, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -312.75537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3382710218429565, "rewards_train/margins": 17.46490204334259, "rewards_train/rejected": -18.803173065185547, "step": 3605 }, { "epoch": 1.78, "learning_rate": 3.915005356038157e-07, "loss": 0.0, "step": 3606 }, { "epoch": 1.78, "logps_train/chosen": -74.24415588378906, "logps_train/ref_chosen": -60.375, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -296.9640808105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.388453483581543, "rewards_train/margins": 16.203511238098145, "rewards_train/rejected": -17.591964721679688, "step": 3606 }, { "epoch": 1.78, "learning_rate": 3.9123562983973367e-07, "loss": 0.0, "step": 3607 }, { "epoch": 1.78, "logps_train/chosen": -78.00790405273438, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -328.482177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1436131000518799, "rewards_train/margins": 18.365492582321167, "rewards_train/rejected": -19.509105682373047, "step": 3607 }, { "epoch": 1.78, "learning_rate": 3.9097075611836894e-07, "loss": 0.0, "step": 3608 }, { "epoch": 1.78, "logps_train/chosen": -78.9013671875, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -319.33148193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.389648675918579, "rewards_train/margins": 17.614104986190796, "rewards_train/rejected": -19.003753662109375, "step": 3608 }, { "epoch": 1.78, "learning_rate": 3.9070591451775507e-07, "loss": 0.0, "step": 3609 }, { "epoch": 1.78, "logps_train/chosen": -79.17086791992188, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -322.94476318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2524383068084717, "rewards_train/margins": 17.93354344367981, "rewards_train/rejected": -19.18598175048828, "step": 3609 }, { "epoch": 1.78, "learning_rate": 3.9044110511591644e-07, "loss": 0.0001, "step": 3610 }, { "epoch": 1.78, "logps_train/chosen": -76.69383239746094, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -316.28485107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2987778186798096, "rewards_train/margins": 17.550902128219604, "rewards_train/rejected": -18.849679946899414, "step": 3610 }, { "epoch": 1.78, "learning_rate": 3.9017632799086743e-07, "loss": 0.0, "step": 3611 }, { "epoch": 1.78, "logps_train/chosen": -82.41505432128906, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -321.5967712402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.476515293121338, "rewards_train/margins": 17.910604000091553, "rewards_train/rejected": -19.38711929321289, "step": 3611 }, { "epoch": 1.78, "learning_rate": 3.8991158322061333e-07, "loss": 0.0, "step": 3612 }, { "epoch": 1.78, "logps_train/chosen": -71.1922378540039, "logps_train/ref_chosen": -59.78125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -310.48828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1408307552337646, "rewards_train/margins": 17.54403328895569, "rewards_train/rejected": -18.684864044189453, "step": 3612 }, { "epoch": 1.78, "learning_rate": 3.896468708831496e-07, "loss": 0.0, "step": 3613 }, { "epoch": 1.78, "logps_train/chosen": -75.3777847290039, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -307.4090576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078940510749817, "rewards_train/margins": 16.948925852775574, "rewards_train/rejected": -18.02786636352539, "step": 3613 }, { "epoch": 1.78, "learning_rate": 3.893821910564623e-07, "loss": 0.0001, "step": 3614 }, { "epoch": 1.78, "logps_train/chosen": -78.22819519042969, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -322.783935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4185709953308105, "rewards_train/margins": 18.35049867630005, "rewards_train/rejected": -19.76906967163086, "step": 3614 }, { "epoch": 1.78, "learning_rate": 3.891175438185281e-07, "loss": 0.0, "step": 3615 }, { "epoch": 1.78, "logps_train/chosen": -76.37742614746094, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -316.0659484863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2980446815490723, "rewards_train/margins": 17.6505446434021, "rewards_train/rejected": -18.948589324951172, "step": 3615 }, { "epoch": 1.78, "learning_rate": 3.888529292473135e-07, "loss": 0.0, "step": 3616 }, { "epoch": 1.78, "logps_train/chosen": -76.42617797851562, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -306.79705810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4776521921157837, "rewards_train/margins": 16.589162230491638, "rewards_train/rejected": -18.066814422607422, "step": 3616 }, { "epoch": 1.78, "learning_rate": 3.885883474207762e-07, "loss": 0.0019, "step": 3617 }, { "epoch": 1.78, "logps_train/chosen": -71.95382690429688, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -304.8461608886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9058322310447693, "rewards_train/margins": 17.144701063632965, "rewards_train/rejected": -18.050533294677734, "step": 3617 }, { "epoch": 1.78, "learning_rate": 3.8832379841686323e-07, "loss": 0.0, "step": 3618 }, { "epoch": 1.78, "logps_train/chosen": -77.24598693847656, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -315.65203857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3165913820266724, "rewards_train/margins": 17.473222613334656, "rewards_train/rejected": -18.789813995361328, "step": 3618 }, { "epoch": 1.78, "learning_rate": 3.880592823135129e-07, "loss": 0.0, "step": 3619 }, { "epoch": 1.78, "logps_train/chosen": -72.95235443115234, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -299.73175048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9900104403495789, "rewards_train/margins": 16.34347039461136, "rewards_train/rejected": -17.333480834960938, "step": 3619 }, { "epoch": 1.78, "learning_rate": 3.8779479918865356e-07, "loss": 0.0001, "step": 3620 }, { "epoch": 1.78, "logps_train/chosen": -76.66413879394531, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -322.63983154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0948801040649414, "rewards_train/margins": 18.020028114318848, "rewards_train/rejected": -19.11490821838379, "step": 3620 }, { "epoch": 1.78, "learning_rate": 3.875303491202032e-07, "loss": 0.0, "step": 3621 }, { "epoch": 1.78, "logps_train/chosen": -78.35952758789062, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -315.8343200683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2400293350219727, "rewards_train/margins": 17.53568935394287, "rewards_train/rejected": -18.775718688964844, "step": 3621 }, { "epoch": 1.78, "learning_rate": 3.8726593218607123e-07, "loss": 0.0001, "step": 3622 }, { "epoch": 1.78, "logps_train/chosen": -72.38314056396484, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -308.48956298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8886066675186157, "rewards_train/margins": 17.569332480430603, "rewards_train/rejected": -18.45793914794922, "step": 3622 }, { "epoch": 1.78, "learning_rate": 3.870015484641561e-07, "loss": 0.0, "step": 3623 }, { "epoch": 1.78, "logps_train/chosen": -76.3742446899414, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -308.73187255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3469951152801514, "rewards_train/margins": 17.075507402420044, "rewards_train/rejected": -18.422502517700195, "step": 3623 }, { "epoch": 1.78, "learning_rate": 3.8673719803234716e-07, "loss": 0.0, "step": 3624 }, { "epoch": 1.78, "logps_train/chosen": -79.9791259765625, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -316.2327575683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3221797943115234, "rewards_train/margins": 17.624582290649414, "rewards_train/rejected": -18.946762084960938, "step": 3624 }, { "epoch": 1.78, "learning_rate": 3.864728809685244e-07, "loss": 0.0, "step": 3625 }, { "epoch": 1.78, "logps_train/chosen": -76.16703796386719, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -306.90545654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.419829249382019, "rewards_train/margins": 17.047330021858215, "rewards_train/rejected": -18.467159271240234, "step": 3625 }, { "epoch": 1.79, "learning_rate": 3.862085973505566e-07, "loss": 0.0, "step": 3626 }, { "epoch": 1.79, "logps_train/chosen": -75.60945129394531, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -303.952392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2205157279968262, "rewards_train/margins": 16.72047472000122, "rewards_train/rejected": -17.940990447998047, "step": 3626 }, { "epoch": 1.79, "learning_rate": 3.85944347256304e-07, "loss": 0.0, "step": 3627 }, { "epoch": 1.79, "logps_train/chosen": -71.76513671875, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -312.88800048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9541013240814209, "rewards_train/margins": 17.68206238746643, "rewards_train/rejected": -18.63616371154785, "step": 3627 }, { "epoch": 1.79, "learning_rate": 3.8568013076361623e-07, "loss": 0.0, "step": 3628 }, { "epoch": 1.79, "logps_train/chosen": -71.77886962890625, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -310.7664489746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9025453329086304, "rewards_train/margins": 17.975563645362854, "rewards_train/rejected": -18.878108978271484, "step": 3628 }, { "epoch": 1.79, "learning_rate": 3.8541594795033347e-07, "loss": 0.0, "step": 3629 }, { "epoch": 1.79, "logps_train/chosen": -76.35897827148438, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -303.26043701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1494718790054321, "rewards_train/margins": 16.811679244041443, "rewards_train/rejected": -17.961151123046875, "step": 3629 }, { "epoch": 1.79, "learning_rate": 3.851517988942855e-07, "loss": 0.0002, "step": 3630 }, { "epoch": 1.79, "logps_train/chosen": -80.24755859375, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -316.819091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.523144245147705, "rewards_train/margins": 17.4536395072937, "rewards_train/rejected": -18.976783752441406, "step": 3630 }, { "epoch": 1.79, "learning_rate": 3.848876836732925e-07, "loss": 0.0, "step": 3631 }, { "epoch": 1.79, "logps_train/chosen": -82.45535278320312, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -310.0889892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7153596878051758, "rewards_train/margins": 16.6195650100708, "rewards_train/rejected": -18.334924697875977, "step": 3631 }, { "epoch": 1.79, "learning_rate": 3.846236023651648e-07, "loss": 0.0014, "step": 3632 }, { "epoch": 1.79, "logps_train/chosen": -70.53968048095703, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -310.81964111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6529427766799927, "rewards_train/margins": 17.609830737113953, "rewards_train/rejected": -18.262773513793945, "step": 3632 }, { "epoch": 1.79, "learning_rate": 3.8435955504770226e-07, "loss": 0.0, "step": 3633 }, { "epoch": 1.79, "logps_train/chosen": -74.08015441894531, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -311.3214111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0607984066009521, "rewards_train/margins": 17.658405542373657, "rewards_train/rejected": -18.71920394897461, "step": 3633 }, { "epoch": 1.79, "learning_rate": 3.840955417986953e-07, "loss": 0.0, "step": 3634 }, { "epoch": 1.79, "logps_train/chosen": -74.22297668457031, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -320.15472412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8739089965820312, "rewards_train/margins": 18.195810317993164, "rewards_train/rejected": -19.069719314575195, "step": 3634 }, { "epoch": 1.79, "learning_rate": 3.8383156269592356e-07, "loss": 0.0, "step": 3635 }, { "epoch": 1.79, "logps_train/chosen": -78.03790283203125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -318.4787902832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3043025732040405, "rewards_train/margins": 17.84094202518463, "rewards_train/rejected": -19.145244598388672, "step": 3635 }, { "epoch": 1.79, "learning_rate": 3.8356761781715743e-07, "loss": 0.0, "step": 3636 }, { "epoch": 1.79, "logps_train/chosen": -76.80440521240234, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -314.2069396972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0565147399902344, "rewards_train/margins": 17.66730308532715, "rewards_train/rejected": -18.723817825317383, "step": 3636 }, { "epoch": 1.79, "learning_rate": 3.833037072401569e-07, "loss": 0.0, "step": 3637 }, { "epoch": 1.79, "logps_train/chosen": -79.64384460449219, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -315.6456298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.575517177581787, "rewards_train/margins": 17.40848207473755, "rewards_train/rejected": -18.983999252319336, "step": 3637 }, { "epoch": 1.79, "learning_rate": 3.8303983104267134e-07, "loss": 0.0, "step": 3638 }, { "epoch": 1.79, "logps_train/chosen": -70.75041198730469, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -319.411376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9394458532333374, "rewards_train/margins": 18.264730095863342, "rewards_train/rejected": -19.20417594909668, "step": 3638 }, { "epoch": 1.79, "learning_rate": 3.8277598930244116e-07, "loss": 0.0, "step": 3639 }, { "epoch": 1.79, "logps_train/chosen": -74.61666870117188, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -303.14544677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1520966291427612, "rewards_train/margins": 17.069430470466614, "rewards_train/rejected": -18.221527099609375, "step": 3639 }, { "epoch": 1.79, "learning_rate": 3.8251218209719526e-07, "loss": 0.0, "step": 3640 }, { "epoch": 1.79, "logps_train/chosen": -81.68093872070312, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -322.88909912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6662869453430176, "rewards_train/margins": 17.712172985076904, "rewards_train/rejected": -19.378459930419922, "step": 3640 }, { "epoch": 1.79, "learning_rate": 3.822484095046533e-07, "loss": 0.0, "step": 3641 }, { "epoch": 1.79, "logps_train/chosen": -68.29502868652344, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -307.4664001464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5278429388999939, "rewards_train/margins": 17.50561386346817, "rewards_train/rejected": -18.033456802368164, "step": 3641 }, { "epoch": 1.79, "learning_rate": 3.8198467160252464e-07, "loss": 0.0, "step": 3642 }, { "epoch": 1.79, "logps_train/chosen": -72.77885437011719, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -294.2792053222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9461463689804077, "rewards_train/margins": 16.27552568912506, "rewards_train/rejected": -17.22167205810547, "step": 3642 }, { "epoch": 1.79, "learning_rate": 3.8172096846850787e-07, "loss": 0.0, "step": 3643 }, { "epoch": 1.79, "logps_train/chosen": -74.86209869384766, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -306.8067321777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2237589359283447, "rewards_train/margins": 17.111406087875366, "rewards_train/rejected": -18.33516502380371, "step": 3643 }, { "epoch": 1.79, "learning_rate": 3.8145730018029205e-07, "loss": 0.0, "step": 3644 }, { "epoch": 1.79, "logps_train/chosen": -70.87454986572266, "logps_train/ref_chosen": -61.28125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -307.92596435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9584508538246155, "rewards_train/margins": 17.30006355047226, "rewards_train/rejected": -18.258514404296875, "step": 3644 }, { "epoch": 1.79, "learning_rate": 3.8119366681555535e-07, "loss": 0.0, "step": 3645 }, { "epoch": 1.79, "logps_train/chosen": -78.46812438964844, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -299.57794189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.309849739074707, "rewards_train/margins": 16.53764247894287, "rewards_train/rejected": -17.847492218017578, "step": 3645 }, { "epoch": 1.8, "learning_rate": 3.809300684519662e-07, "loss": 0.0, "step": 3646 }, { "epoch": 1.8, "logps_train/chosen": -81.88041687011719, "logps_train/ref_chosen": -70.75, "logps_train/ref_rejected": -136.125, "logps_train/rejected": -330.90960693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1162643432617188, "rewards_train/margins": 18.363370895385742, "rewards_train/rejected": -19.47963523864746, "step": 3646 }, { "epoch": 1.8, "learning_rate": 3.806665051671823e-07, "loss": 0.0, "step": 3647 }, { "epoch": 1.8, "logps_train/chosen": -76.7177734375, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -301.73553466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3106930255889893, "rewards_train/margins": 16.36144518852234, "rewards_train/rejected": -17.672138214111328, "step": 3647 }, { "epoch": 1.8, "learning_rate": 3.8040297703885117e-07, "loss": 0.0, "step": 3648 }, { "epoch": 1.8, "logps_train/chosen": -76.18853759765625, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -312.2310791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0871644020080566, "rewards_train/margins": 17.467488765716553, "rewards_train/rejected": -18.55465316772461, "step": 3648 }, { "epoch": 1.8, "learning_rate": 3.801394841446101e-07, "loss": 0.0001, "step": 3649 }, { "epoch": 1.8, "logps_train/chosen": -79.74363708496094, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -325.5025634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2411614656448364, "rewards_train/margins": 18.059093356132507, "rewards_train/rejected": -19.300254821777344, "step": 3649 }, { "epoch": 1.8, "learning_rate": 3.7987602656208574e-07, "loss": 0.0, "step": 3650 }, { "epoch": 1.8, "logps_train/chosen": -69.58436584472656, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -296.406005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6801162362098694, "rewards_train/margins": 16.89349216222763, "rewards_train/rejected": -17.5736083984375, "step": 3650 }, { "epoch": 1.8, "learning_rate": 3.7961260436889446e-07, "loss": 0.0, "step": 3651 }, { "epoch": 1.8, "logps_train/chosen": -72.02177429199219, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -308.5572814941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.942803144454956, "rewards_train/margins": 17.393590211868286, "rewards_train/rejected": -18.336393356323242, "step": 3651 }, { "epoch": 1.8, "learning_rate": 3.7934921764264245e-07, "loss": 0.0, "step": 3652 }, { "epoch": 1.8, "logps_train/chosen": -81.23629760742188, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -312.1529541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.395845890045166, "rewards_train/margins": 17.14972448348999, "rewards_train/rejected": -18.545570373535156, "step": 3652 }, { "epoch": 1.8, "learning_rate": 3.7908586646092486e-07, "loss": 0.0003, "step": 3653 }, { "epoch": 1.8, "logps_train/chosen": -77.13275146484375, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -306.29364013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1644949913024902, "rewards_train/margins": 16.82365846633911, "rewards_train/rejected": -17.9881534576416, "step": 3653 }, { "epoch": 1.8, "learning_rate": 3.788225509013272e-07, "loss": 0.0, "step": 3654 }, { "epoch": 1.8, "logps_train/chosen": -77.88700866699219, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -297.5484619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3868944644927979, "rewards_train/margins": 16.260040044784546, "rewards_train/rejected": -17.646934509277344, "step": 3654 }, { "epoch": 1.8, "learning_rate": 3.785592710414235e-07, "loss": 0.0, "step": 3655 }, { "epoch": 1.8, "logps_train/chosen": -71.9124526977539, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -323.1148376464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8423684239387512, "rewards_train/margins": 18.46520870923996, "rewards_train/rejected": -19.30757713317871, "step": 3655 }, { "epoch": 1.8, "learning_rate": 3.7829602695877785e-07, "loss": 0.0, "step": 3656 }, { "epoch": 1.8, "logps_train/chosen": -76.41062927246094, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -301.3208923339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2935526371002197, "rewards_train/margins": 16.456016302108765, "rewards_train/rejected": -17.749568939208984, "step": 3656 }, { "epoch": 1.8, "learning_rate": 3.7803281873094423e-07, "loss": 0.0, "step": 3657 }, { "epoch": 1.8, "logps_train/chosen": -70.38223266601562, "logps_train/ref_chosen": -59.34375, "logps_train/ref_rejected": -119.6875, "logps_train/rejected": -300.6820068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1036524772644043, "rewards_train/margins": 16.99706506729126, "rewards_train/rejected": -18.100717544555664, "step": 3657 }, { "epoch": 1.8, "learning_rate": 3.77769646435465e-07, "loss": 0.0, "step": 3658 }, { "epoch": 1.8, "logps_train/chosen": -76.78997802734375, "logps_train/ref_chosen": -60.875, "logps_train/ref_rejected": -117.5, "logps_train/rejected": -299.70916748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5904723405838013, "rewards_train/margins": 16.62756049633026, "rewards_train/rejected": -18.218032836914062, "step": 3658 }, { "epoch": 1.8, "learning_rate": 3.775065101498728e-07, "loss": 0.0001, "step": 3659 }, { "epoch": 1.8, "logps_train/chosen": -74.220703125, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -309.7854309082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9600580334663391, "rewards_train/margins": 17.301881968975067, "rewards_train/rejected": -18.261940002441406, "step": 3659 }, { "epoch": 1.8, "learning_rate": 3.772434099516892e-07, "loss": 0.0, "step": 3660 }, { "epoch": 1.8, "logps_train/chosen": -81.087158203125, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -315.43975830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6308842897415161, "rewards_train/margins": 17.271100878715515, "rewards_train/rejected": -18.90198516845703, "step": 3660 }, { "epoch": 1.8, "learning_rate": 3.769803459184253e-07, "loss": 0.0, "step": 3661 }, { "epoch": 1.8, "logps_train/chosen": -76.6900863647461, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -309.14141845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2372701168060303, "rewards_train/margins": 17.01446557044983, "rewards_train/rejected": -18.25173568725586, "step": 3661 }, { "epoch": 1.8, "learning_rate": 3.767173181275818e-07, "loss": 0.0, "step": 3662 }, { "epoch": 1.8, "logps_train/chosen": -79.39588928222656, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -317.2098388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.329921007156372, "rewards_train/margins": 17.57133936882019, "rewards_train/rejected": -18.901260375976562, "step": 3662 }, { "epoch": 1.8, "learning_rate": 3.7645432665664814e-07, "loss": 0.0, "step": 3663 }, { "epoch": 1.8, "logps_train/chosen": -80.55508422851562, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -320.6401062011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4842681884765625, "rewards_train/margins": 17.76563262939453, "rewards_train/rejected": -19.249900817871094, "step": 3663 }, { "epoch": 1.8, "learning_rate": 3.7619137158310355e-07, "loss": 0.0, "step": 3664 }, { "epoch": 1.8, "logps_train/chosen": -77.54124450683594, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -316.12213134765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2313220500946045, "rewards_train/margins": 17.644173860549927, "rewards_train/rejected": -18.87549591064453, "step": 3664 }, { "epoch": 1.8, "learning_rate": 3.7592845298441624e-07, "loss": 0.0, "step": 3665 }, { "epoch": 1.8, "logps_train/chosen": -73.18958282470703, "logps_train/ref_chosen": -60.5, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -296.98126220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.269763708114624, "rewards_train/margins": 16.44555115699768, "rewards_train/rejected": -17.715314865112305, "step": 3665 }, { "epoch": 1.81, "learning_rate": 3.75665570938044e-07, "loss": 0.0001, "step": 3666 }, { "epoch": 1.81, "logps_train/chosen": -75.8092041015625, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -134.75, "logps_train/rejected": -327.3244323730469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1251826286315918, "rewards_train/margins": 18.134945392608643, "rewards_train/rejected": -19.260128021240234, "step": 3666 }, { "epoch": 1.81, "learning_rate": 3.7540272552143335e-07, "loss": 0.0001, "step": 3667 }, { "epoch": 1.81, "logps_train/chosen": -77.0458755493164, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -332.1594543457031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1730444431304932, "rewards_train/margins": 18.908040761947632, "rewards_train/rejected": -20.081085205078125, "step": 3667 }, { "epoch": 1.81, "learning_rate": 3.751399168120206e-07, "loss": 0.0, "step": 3668 }, { "epoch": 1.81, "logps_train/chosen": -71.09011840820312, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -306.7753601074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6897245645523071, "rewards_train/margins": 17.44894540309906, "rewards_train/rejected": -18.138669967651367, "step": 3668 }, { "epoch": 1.81, "learning_rate": 3.748771448872311e-07, "loss": 0.0, "step": 3669 }, { "epoch": 1.81, "logps_train/chosen": -78.34797668457031, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -318.26129150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2611157894134521, "rewards_train/margins": 17.61139702796936, "rewards_train/rejected": -18.872512817382812, "step": 3669 }, { "epoch": 1.81, "learning_rate": 3.746144098244787e-07, "loss": 0.0, "step": 3670 }, { "epoch": 1.81, "logps_train/chosen": -71.51800537109375, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -307.8287353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.745990514755249, "rewards_train/margins": 17.484001398086548, "rewards_train/rejected": -18.229991912841797, "step": 3670 }, { "epoch": 1.81, "learning_rate": 3.7435171170116755e-07, "loss": 0.0, "step": 3671 }, { "epoch": 1.81, "logps_train/chosen": -80.0093994140625, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -307.8492736816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3805782794952393, "rewards_train/margins": 16.946049451828003, "rewards_train/rejected": -18.326627731323242, "step": 3671 }, { "epoch": 1.81, "learning_rate": 3.7408905059468974e-07, "loss": 0.0002, "step": 3672 }, { "epoch": 1.81, "logps_train/chosen": -74.82459259033203, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -304.14581298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0522345304489136, "rewards_train/margins": 17.061617016792297, "rewards_train/rejected": -18.11385154724121, "step": 3672 }, { "epoch": 1.81, "learning_rate": 3.738264265824271e-07, "loss": 0.0, "step": 3673 }, { "epoch": 1.81, "logps_train/chosen": -73.94095611572266, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -302.337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9127965569496155, "rewards_train/margins": 16.900239408016205, "rewards_train/rejected": -17.81303596496582, "step": 3673 }, { "epoch": 1.81, "learning_rate": 3.73563839741751e-07, "loss": 0.0008, "step": 3674 }, { "epoch": 1.81, "logps_train/chosen": -77.72103881835938, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -308.3626708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.124155044555664, "rewards_train/margins": 17.14873695373535, "rewards_train/rejected": -18.272891998291016, "step": 3674 }, { "epoch": 1.81, "learning_rate": 3.733012901500206e-07, "loss": 0.0, "step": 3675 }, { "epoch": 1.81, "logps_train/chosen": -80.04254913330078, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -315.6634521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6321357488632202, "rewards_train/margins": 17.37522280216217, "rewards_train/rejected": -19.00735855102539, "step": 3675 }, { "epoch": 1.81, "learning_rate": 3.7303877788458517e-07, "loss": 0.0001, "step": 3676 }, { "epoch": 1.81, "logps_train/chosen": -77.6646728515625, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -315.8746337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.210754156112671, "rewards_train/margins": 17.486377954483032, "rewards_train/rejected": -18.697132110595703, "step": 3676 }, { "epoch": 1.81, "learning_rate": 3.7277630302278234e-07, "loss": 0.0, "step": 3677 }, { "epoch": 1.81, "logps_train/chosen": -75.6234359741211, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -303.3707275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3706936836242676, "rewards_train/margins": 16.498216152191162, "rewards_train/rejected": -17.86890983581543, "step": 3677 }, { "epoch": 1.81, "learning_rate": 3.7251386564193915e-07, "loss": 0.0, "step": 3678 }, { "epoch": 1.81, "logps_train/chosen": -79.53506469726562, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -314.4365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5098540782928467, "rewards_train/margins": 17.341120958328247, "rewards_train/rejected": -18.850975036621094, "step": 3678 }, { "epoch": 1.81, "learning_rate": 3.722514658193715e-07, "loss": 0.0, "step": 3679 }, { "epoch": 1.81, "logps_train/chosen": -71.18986511230469, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -306.1244201660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9078536033630371, "rewards_train/margins": 17.390230655670166, "rewards_train/rejected": -18.298084259033203, "step": 3679 }, { "epoch": 1.81, "learning_rate": 3.719891036323839e-07, "loss": 0.0, "step": 3680 }, { "epoch": 1.81, "logps_train/chosen": -75.49651336669922, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -300.6458740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.132952332496643, "rewards_train/margins": 16.776315093040466, "rewards_train/rejected": -17.90926742553711, "step": 3680 }, { "epoch": 1.81, "learning_rate": 3.7172677915827035e-07, "loss": 0.0, "step": 3681 }, { "epoch": 1.81, "logps_train/chosen": -81.510986328125, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -320.3056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4211174249649048, "rewards_train/margins": 17.720775961875916, "rewards_train/rejected": -19.14189338684082, "step": 3681 }, { "epoch": 1.81, "learning_rate": 3.71464492474313e-07, "loss": 0.0, "step": 3682 }, { "epoch": 1.81, "logps_train/chosen": -81.94737243652344, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -313.2281494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6379750967025757, "rewards_train/margins": 17.170925736427307, "rewards_train/rejected": -18.808900833129883, "step": 3682 }, { "epoch": 1.81, "learning_rate": 3.712022436577835e-07, "loss": 0.0, "step": 3683 }, { "epoch": 1.81, "logps_train/chosen": -77.52001953125, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -136.0, "logps_train/rejected": -333.93798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1348624229431152, "rewards_train/margins": 18.663723468780518, "rewards_train/rejected": -19.798585891723633, "step": 3683 }, { "epoch": 1.81, "learning_rate": 3.709400327859423e-07, "loss": 0.0, "step": 3684 }, { "epoch": 1.81, "logps_train/chosen": -75.08657836914062, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -309.44232177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1804345846176147, "rewards_train/margins": 17.403839707374573, "rewards_train/rejected": -18.584274291992188, "step": 3684 }, { "epoch": 1.81, "learning_rate": 3.7067785993603816e-07, "loss": 0.0, "step": 3685 }, { "epoch": 1.81, "logps_train/chosen": -73.43146514892578, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -301.3247985839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.011115312576294, "rewards_train/margins": 16.681130170822144, "rewards_train/rejected": -17.692245483398438, "step": 3685 }, { "epoch": 1.81, "learning_rate": 3.704157251853093e-07, "loss": 0.0, "step": 3686 }, { "epoch": 1.81, "logps_train/chosen": -78.27559661865234, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -308.71307373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2958698272705078, "rewards_train/margins": 16.98451805114746, "rewards_train/rejected": -18.28038787841797, "step": 3686 }, { "epoch": 1.82, "learning_rate": 3.701536286109819e-07, "loss": 0.0, "step": 3687 }, { "epoch": 1.82, "logps_train/chosen": -80.50563049316406, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -318.3873291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5466086864471436, "rewards_train/margins": 17.764193773269653, "rewards_train/rejected": -19.310802459716797, "step": 3687 }, { "epoch": 1.82, "learning_rate": 3.6989157029027185e-07, "loss": 0.0, "step": 3688 }, { "epoch": 1.82, "logps_train/chosen": -78.78838348388672, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -309.58465576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2726372480392456, "rewards_train/margins": 17.06097710132599, "rewards_train/rejected": -18.333614349365234, "step": 3688 }, { "epoch": 1.82, "learning_rate": 3.696295503003833e-07, "loss": 0.0, "step": 3689 }, { "epoch": 1.82, "logps_train/chosen": -73.88108825683594, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -320.752685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8499251008033752, "rewards_train/margins": 17.982277810573578, "rewards_train/rejected": -18.832202911376953, "step": 3689 }, { "epoch": 1.82, "learning_rate": 3.693675687185086e-07, "loss": 0.0001, "step": 3690 }, { "epoch": 1.82, "logps_train/chosen": -73.91202545166016, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -302.63250732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0410560369491577, "rewards_train/margins": 17.112821221351624, "rewards_train/rejected": -18.15387725830078, "step": 3690 }, { "epoch": 1.82, "learning_rate": 3.6910562562183003e-07, "loss": 0.0, "step": 3691 }, { "epoch": 1.82, "logps_train/chosen": -82.56205749511719, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -311.60394287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.679398775100708, "rewards_train/margins": 16.8377845287323, "rewards_train/rejected": -18.517183303833008, "step": 3691 }, { "epoch": 1.82, "learning_rate": 3.6884372108751704e-07, "loss": 0.0001, "step": 3692 }, { "epoch": 1.82, "logps_train/chosen": -82.80975341796875, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -320.89508056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.631096601486206, "rewards_train/margins": 17.494885683059692, "rewards_train/rejected": -19.1259822845459, "step": 3692 }, { "epoch": 1.82, "learning_rate": 3.68581855192729e-07, "loss": 0.0, "step": 3693 }, { "epoch": 1.82, "logps_train/chosen": -80.83070373535156, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -319.75225830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6127089262008667, "rewards_train/margins": 17.73839581012726, "rewards_train/rejected": -19.351104736328125, "step": 3693 }, { "epoch": 1.82, "learning_rate": 3.6832002801461317e-07, "loss": 0.0, "step": 3694 }, { "epoch": 1.82, "logps_train/chosen": -76.17082214355469, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -313.7304382324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2202556133270264, "rewards_train/margins": 17.413777589797974, "rewards_train/rejected": -18.634033203125, "step": 3694 }, { "epoch": 1.82, "learning_rate": 3.680582396303056e-07, "loss": 0.0001, "step": 3695 }, { "epoch": 1.82, "logps_train/chosen": -79.7882080078125, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -307.6328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6501091718673706, "rewards_train/margins": 16.608582615852356, "rewards_train/rejected": -18.258691787719727, "step": 3695 }, { "epoch": 1.82, "learning_rate": 3.67796490116931e-07, "loss": 0.0001, "step": 3696 }, { "epoch": 1.82, "logps_train/chosen": -85.56306457519531, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -323.4691162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0175371170043945, "rewards_train/margins": 17.622292518615723, "rewards_train/rejected": -19.639829635620117, "step": 3696 }, { "epoch": 1.82, "learning_rate": 3.675347795516024e-07, "loss": 0.0003, "step": 3697 }, { "epoch": 1.82, "logps_train/chosen": -74.37200927734375, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -296.5645751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2568055391311646, "rewards_train/margins": 16.38671100139618, "rewards_train/rejected": -17.643516540527344, "step": 3697 }, { "epoch": 1.82, "learning_rate": 3.6727310801142163e-07, "loss": 0.0003, "step": 3698 }, { "epoch": 1.82, "logps_train/chosen": -76.8629379272461, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -295.3495178222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3368065357208252, "rewards_train/margins": 15.99775767326355, "rewards_train/rejected": -17.334564208984375, "step": 3698 }, { "epoch": 1.82, "learning_rate": 3.670114755734789e-07, "loss": 0.0, "step": 3699 }, { "epoch": 1.82, "logps_train/chosen": -72.96675872802734, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -308.50628662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1188440322875977, "rewards_train/margins": 17.30741786956787, "rewards_train/rejected": -18.42626190185547, "step": 3699 }, { "epoch": 1.82, "learning_rate": 3.6674988231485275e-07, "loss": 0.0001, "step": 3700 }, { "epoch": 1.82, "logps_train/chosen": -78.16758728027344, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -302.8750915527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.507237434387207, "rewards_train/margins": 16.544676780700684, "rewards_train/rejected": -18.05191421508789, "step": 3700 }, { "epoch": 1.82, "learning_rate": 3.6648832831261055e-07, "loss": 0.0001, "step": 3701 }, { "epoch": 1.82, "logps_train/chosen": -78.960205078125, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -308.41253662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6300783157348633, "rewards_train/margins": 16.889981269836426, "rewards_train/rejected": -18.52005958557129, "step": 3701 }, { "epoch": 1.82, "learning_rate": 3.662268136438076e-07, "loss": 0.0001, "step": 3702 }, { "epoch": 1.82, "logps_train/chosen": -75.50701904296875, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -318.56951904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1569032669067383, "rewards_train/margins": 17.803030967712402, "rewards_train/rejected": -18.95993423461914, "step": 3702 }, { "epoch": 1.82, "learning_rate": 3.6596533838548805e-07, "loss": 0.0, "step": 3703 }, { "epoch": 1.82, "logps_train/chosen": -78.90406799316406, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -315.00433349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1219500303268433, "rewards_train/margins": 17.316572308540344, "rewards_train/rejected": -18.438522338867188, "step": 3703 }, { "epoch": 1.82, "learning_rate": 3.657039026146843e-07, "loss": 0.0, "step": 3704 }, { "epoch": 1.82, "logps_train/chosen": -83.3995361328125, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -324.9932861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7790166139602661, "rewards_train/margins": 17.534618258476257, "rewards_train/rejected": -19.313634872436523, "step": 3704 }, { "epoch": 1.82, "learning_rate": 3.6544250640841695e-07, "loss": 0.0001, "step": 3705 }, { "epoch": 1.82, "logps_train/chosen": -75.76091003417969, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -310.211669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.331608533859253, "rewards_train/margins": 17.328816652297974, "rewards_train/rejected": -18.660425186157227, "step": 3705 }, { "epoch": 1.82, "learning_rate": 3.651811498436954e-07, "loss": 0.0, "step": 3706 }, { "epoch": 1.82, "logps_train/chosen": -76.7005615234375, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -306.06011962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3170280456542969, "rewards_train/margins": 17.20636558532715, "rewards_train/rejected": -18.523393630981445, "step": 3706 }, { "epoch": 1.83, "learning_rate": 3.649198329975166e-07, "loss": 0.0, "step": 3707 }, { "epoch": 1.83, "logps_train/chosen": -77.37863159179688, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -317.24346923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1601775884628296, "rewards_train/margins": 17.67979395389557, "rewards_train/rejected": -18.8399715423584, "step": 3707 }, { "epoch": 1.83, "learning_rate": 3.646585559468666e-07, "loss": 0.0, "step": 3708 }, { "epoch": 1.83, "logps_train/chosen": -78.71804809570312, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -324.5635070800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4345982074737549, "rewards_train/margins": 18.048216581344604, "rewards_train/rejected": -19.48281478881836, "step": 3708 }, { "epoch": 1.83, "learning_rate": 3.643973187687192e-07, "loss": 0.0006, "step": 3709 }, { "epoch": 1.83, "logps_train/chosen": -74.36680603027344, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -310.90985107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.030137538909912, "rewards_train/margins": 17.32842493057251, "rewards_train/rejected": -18.358562469482422, "step": 3709 }, { "epoch": 1.83, "learning_rate": 3.641361215400367e-07, "loss": 0.0, "step": 3710 }, { "epoch": 1.83, "logps_train/chosen": -78.40117645263672, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -316.0556945800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3047666549682617, "rewards_train/margins": 17.56545352935791, "rewards_train/rejected": -18.870220184326172, "step": 3710 }, { "epoch": 1.83, "learning_rate": 3.6387496433776963e-07, "loss": 0.0001, "step": 3711 }, { "epoch": 1.83, "logps_train/chosen": -76.79646301269531, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -304.73626708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2779855728149414, "rewards_train/margins": 16.997591972351074, "rewards_train/rejected": -18.275577545166016, "step": 3711 }, { "epoch": 1.83, "learning_rate": 3.636138472388565e-07, "loss": 0.0, "step": 3712 }, { "epoch": 1.83, "logps_train/chosen": -78.45057678222656, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -309.33563232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4057027101516724, "rewards_train/margins": 16.851495623588562, "rewards_train/rejected": -18.257198333740234, "step": 3712 }, { "epoch": 1.83, "learning_rate": 3.6335277032022443e-07, "loss": 0.0, "step": 3713 }, { "epoch": 1.83, "logps_train/chosen": -74.38954162597656, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -298.2486572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1008198261260986, "rewards_train/margins": 16.28400683403015, "rewards_train/rejected": -17.38482666015625, "step": 3713 }, { "epoch": 1.83, "learning_rate": 3.630917336587881e-07, "loss": 0.0004, "step": 3714 }, { "epoch": 1.83, "logps_train/chosen": -75.14450073242188, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -313.1936950683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0467500686645508, "rewards_train/margins": 17.80875301361084, "rewards_train/rejected": -18.85550308227539, "step": 3714 }, { "epoch": 1.83, "learning_rate": 3.62830737331451e-07, "loss": 0.0001, "step": 3715 }, { "epoch": 1.83, "logps_train/chosen": -72.49353790283203, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -295.2982177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0442752838134766, "rewards_train/margins": 16.469924926757812, "rewards_train/rejected": -17.51420021057129, "step": 3715 }, { "epoch": 1.83, "learning_rate": 3.6256978141510427e-07, "loss": 0.0002, "step": 3716 }, { "epoch": 1.83, "logps_train/chosen": -73.82742309570312, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -308.40289306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9822049140930176, "rewards_train/margins": 17.557109355926514, "rewards_train/rejected": -18.53931427001953, "step": 3716 }, { "epoch": 1.83, "learning_rate": 3.6230886598662713e-07, "loss": 0.0001, "step": 3717 }, { "epoch": 1.83, "logps_train/chosen": -77.4720458984375, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -314.0647277832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2059931755065918, "rewards_train/margins": 17.650578022003174, "rewards_train/rejected": -18.856571197509766, "step": 3717 }, { "epoch": 1.83, "learning_rate": 3.620479911228875e-07, "loss": 0.0, "step": 3718 }, { "epoch": 1.83, "logps_train/chosen": -77.37259674072266, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -314.45428466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3641642332077026, "rewards_train/margins": 17.46544086933136, "rewards_train/rejected": -18.829605102539062, "step": 3718 }, { "epoch": 1.83, "learning_rate": 3.6178715690074015e-07, "loss": 0.0, "step": 3719 }, { "epoch": 1.83, "logps_train/chosen": -80.11692810058594, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -310.58355712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.632054090499878, "rewards_train/margins": 16.868830919265747, "rewards_train/rejected": -18.500885009765625, "step": 3719 }, { "epoch": 1.83, "learning_rate": 3.6152636339702914e-07, "loss": 0.0, "step": 3720 }, { "epoch": 1.83, "logps_train/chosen": -72.64311981201172, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -311.2071228027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.959135890007019, "rewards_train/margins": 17.603863835334778, "rewards_train/rejected": -18.562999725341797, "step": 3720 }, { "epoch": 1.83, "learning_rate": 3.612656106885861e-07, "loss": 0.0001, "step": 3721 }, { "epoch": 1.83, "logps_train/chosen": -83.23661041259766, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -322.9215393066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7604780197143555, "rewards_train/margins": 17.790757179260254, "rewards_train/rejected": -19.55123519897461, "step": 3721 }, { "epoch": 1.83, "learning_rate": 3.6100489885222997e-07, "loss": 0.0, "step": 3722 }, { "epoch": 1.83, "logps_train/chosen": -75.85086822509766, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -314.920166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2318153381347656, "rewards_train/margins": 17.793407440185547, "rewards_train/rejected": -19.025222778320312, "step": 3722 }, { "epoch": 1.83, "learning_rate": 3.6074422796476885e-07, "loss": 0.0, "step": 3723 }, { "epoch": 1.83, "logps_train/chosen": -80.70182037353516, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -312.9216003417969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6358559131622314, "rewards_train/margins": 17.253620862960815, "rewards_train/rejected": -18.889476776123047, "step": 3723 }, { "epoch": 1.83, "learning_rate": 3.604835981029976e-07, "loss": 0.0001, "step": 3724 }, { "epoch": 1.83, "logps_train/chosen": -75.76740264892578, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -315.244873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9440256357192993, "rewards_train/margins": 17.946427702903748, "rewards_train/rejected": -18.890453338623047, "step": 3724 }, { "epoch": 1.83, "learning_rate": 3.602230093436997e-07, "loss": 0.0, "step": 3725 }, { "epoch": 1.83, "logps_train/chosen": -76.87709045410156, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -303.10302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4486463069915771, "rewards_train/margins": 16.59779191017151, "rewards_train/rejected": -18.046438217163086, "step": 3725 }, { "epoch": 1.83, "learning_rate": 3.5996246176364677e-07, "loss": 0.0001, "step": 3726 }, { "epoch": 1.83, "logps_train/chosen": -81.91696166992188, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -322.7119445800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6294889450073242, "rewards_train/margins": 17.73843288421631, "rewards_train/rejected": -19.367921829223633, "step": 3726 }, { "epoch": 1.84, "learning_rate": 3.5970195543959726e-07, "loss": 0.0, "step": 3727 }, { "epoch": 1.84, "logps_train/chosen": -78.65838623046875, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -309.0802307128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.401141881942749, "rewards_train/margins": 17.19301438331604, "rewards_train/rejected": -18.59415626525879, "step": 3727 }, { "epoch": 1.84, "learning_rate": 3.5944149044829854e-07, "loss": 0.0, "step": 3728 }, { "epoch": 1.84, "logps_train/chosen": -74.135009765625, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -305.1025695800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2302002906799316, "rewards_train/margins": 16.97302770614624, "rewards_train/rejected": -18.203227996826172, "step": 3728 }, { "epoch": 1.84, "learning_rate": 3.591810668664851e-07, "loss": 0.0002, "step": 3729 }, { "epoch": 1.84, "logps_train/chosen": -78.95314025878906, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -322.1461181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1948989629745483, "rewards_train/margins": 17.8100448846817, "rewards_train/rejected": -19.00494384765625, "step": 3729 }, { "epoch": 1.84, "learning_rate": 3.589206847708795e-07, "loss": 0.0, "step": 3730 }, { "epoch": 1.84, "logps_train/chosen": -72.63703918457031, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -311.8485107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9015946388244629, "rewards_train/margins": 17.834432125091553, "rewards_train/rejected": -18.736026763916016, "step": 3730 }, { "epoch": 1.84, "learning_rate": 3.5866034423819227e-07, "loss": 0.0001, "step": 3731 }, { "epoch": 1.84, "logps_train/chosen": -72.90058898925781, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -300.8052978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0500203371047974, "rewards_train/margins": 16.920743823051453, "rewards_train/rejected": -17.97076416015625, "step": 3731 }, { "epoch": 1.84, "learning_rate": 3.5840004534512123e-07, "loss": 0.0, "step": 3732 }, { "epoch": 1.84, "logps_train/chosen": -73.6496353149414, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -300.11114501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0268290042877197, "rewards_train/margins": 17.000011205673218, "rewards_train/rejected": -18.026840209960938, "step": 3732 }, { "epoch": 1.84, "learning_rate": 3.5813978816835243e-07, "loss": 0.0, "step": 3733 }, { "epoch": 1.84, "logps_train/chosen": -80.09392547607422, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -308.8310241699219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2766287326812744, "rewards_train/margins": 17.02385687828064, "rewards_train/rejected": -18.300485610961914, "step": 3733 }, { "epoch": 1.84, "learning_rate": 3.5787957278455925e-07, "loss": 0.0, "step": 3734 }, { "epoch": 1.84, "logps_train/chosen": -77.02098083496094, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -309.6022644042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1874494552612305, "rewards_train/margins": 17.249144554138184, "rewards_train/rejected": -18.436594009399414, "step": 3734 }, { "epoch": 1.84, "learning_rate": 3.5761939927040287e-07, "loss": 0.0, "step": 3735 }, { "epoch": 1.84, "logps_train/chosen": -79.50900268554688, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -319.6224670410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4883275032043457, "rewards_train/margins": 17.657561779022217, "rewards_train/rejected": -19.145889282226562, "step": 3735 }, { "epoch": 1.84, "learning_rate": 3.5735926770253226e-07, "loss": 0.0007, "step": 3736 }, { "epoch": 1.84, "logps_train/chosen": -71.16655731201172, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -308.875244140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9099178314208984, "rewards_train/margins": 17.656513214111328, "rewards_train/rejected": -18.566431045532227, "step": 3736 }, { "epoch": 1.84, "learning_rate": 3.5709917815758383e-07, "loss": 0.0, "step": 3737 }, { "epoch": 1.84, "logps_train/chosen": -81.0980224609375, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -314.4884033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7798700332641602, "rewards_train/margins": 17.482836723327637, "rewards_train/rejected": -19.262706756591797, "step": 3737 }, { "epoch": 1.84, "learning_rate": 3.56839130712182e-07, "loss": 0.0, "step": 3738 }, { "epoch": 1.84, "logps_train/chosen": -80.26791381835938, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -313.3467712402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.481723427772522, "rewards_train/margins": 17.48059117794037, "rewards_train/rejected": -18.96231460571289, "step": 3738 }, { "epoch": 1.84, "learning_rate": 3.56579125442938e-07, "loss": 0.0001, "step": 3739 }, { "epoch": 1.84, "logps_train/chosen": -81.33365631103516, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -319.4358825683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6325846910476685, "rewards_train/margins": 17.406023859977722, "rewards_train/rejected": -19.03860855102539, "step": 3739 }, { "epoch": 1.84, "learning_rate": 3.563191624264514e-07, "loss": 0.0, "step": 3740 }, { "epoch": 1.84, "logps_train/chosen": -73.51779174804688, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -317.037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0633028745651245, "rewards_train/margins": 17.88259518146515, "rewards_train/rejected": -18.945898056030273, "step": 3740 }, { "epoch": 1.84, "learning_rate": 3.560592417393094e-07, "loss": 0.0, "step": 3741 }, { "epoch": 1.84, "logps_train/chosen": -86.55056762695312, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -324.0663146972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.11008620262146, "rewards_train/margins": 17.42193627357483, "rewards_train/rejected": -19.53202247619629, "step": 3741 }, { "epoch": 1.84, "learning_rate": 3.557993634580858e-07, "loss": 0.0001, "step": 3742 }, { "epoch": 1.84, "logps_train/chosen": -80.91373443603516, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -319.9635009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6897618770599365, "rewards_train/margins": 17.702289819717407, "rewards_train/rejected": -19.392051696777344, "step": 3742 }, { "epoch": 1.84, "learning_rate": 3.555395276593429e-07, "loss": 0.0, "step": 3743 }, { "epoch": 1.84, "logps_train/chosen": -77.67973327636719, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -312.9517822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2951700687408447, "rewards_train/margins": 17.402644872665405, "rewards_train/rejected": -18.69781494140625, "step": 3743 }, { "epoch": 1.84, "learning_rate": 3.552797344196298e-07, "loss": 0.0, "step": 3744 }, { "epoch": 1.84, "logps_train/chosen": -78.23078918457031, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -305.71539306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2448318004608154, "rewards_train/margins": 16.894282579421997, "rewards_train/rejected": -18.139114379882812, "step": 3744 }, { "epoch": 1.84, "learning_rate": 3.5501998381548346e-07, "loss": 0.0, "step": 3745 }, { "epoch": 1.84, "logps_train/chosen": -81.47687530517578, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -324.916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4648752212524414, "rewards_train/margins": 18.124869346618652, "rewards_train/rejected": -19.589744567871094, "step": 3745 }, { "epoch": 1.84, "learning_rate": 3.5476027592342825e-07, "loss": 0.0, "step": 3746 }, { "epoch": 1.84, "logps_train/chosen": -70.631591796875, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -305.7148132324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8148195743560791, "rewards_train/margins": 17.516721487045288, "rewards_train/rejected": -18.331541061401367, "step": 3746 }, { "epoch": 1.84, "learning_rate": 3.545006108199758e-07, "loss": 0.0, "step": 3747 }, { "epoch": 1.84, "logps_train/chosen": -80.33869171142578, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -303.3360595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.735822319984436, "rewards_train/margins": 16.262430548667908, "rewards_train/rejected": -17.998252868652344, "step": 3747 }, { "epoch": 1.85, "learning_rate": 3.542409885816252e-07, "loss": 0.0001, "step": 3748 }, { "epoch": 1.85, "logps_train/chosen": -79.95492553710938, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -315.3396301269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.641000509262085, "rewards_train/margins": 17.047014474868774, "rewards_train/rejected": -18.68801498413086, "step": 3748 }, { "epoch": 1.85, "learning_rate": 3.5398140928486287e-07, "loss": 0.0, "step": 3749 }, { "epoch": 1.85, "logps_train/chosen": -77.43112182617188, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -312.2771911621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1209933757781982, "rewards_train/margins": 17.335240602493286, "rewards_train/rejected": -18.456233978271484, "step": 3749 }, { "epoch": 1.85, "learning_rate": 3.5372187300616273e-07, "loss": 0.0, "step": 3750 }, { "epoch": 1.85, "logps_train/chosen": -75.04934692382812, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -316.5367126464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2388947010040283, "rewards_train/margins": 17.764191389083862, "rewards_train/rejected": -19.00308609008789, "step": 3750 }, { "epoch": 1.85, "learning_rate": 3.534623798219858e-07, "loss": 0.0, "step": 3751 }, { "epoch": 1.85, "logps_train/chosen": -77.10731506347656, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -315.4789733886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.153017520904541, "rewards_train/margins": 17.795857906341553, "rewards_train/rejected": -18.948875427246094, "step": 3751 }, { "epoch": 1.85, "learning_rate": 3.532029298087805e-07, "loss": 0.0002, "step": 3752 }, { "epoch": 1.85, "logps_train/chosen": -77.476318359375, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -317.5564880371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3310794830322266, "rewards_train/margins": 17.642635345458984, "rewards_train/rejected": -18.97371482849121, "step": 3752 }, { "epoch": 1.85, "learning_rate": 3.529435230429828e-07, "loss": 0.0, "step": 3753 }, { "epoch": 1.85, "logps_train/chosen": -73.9269027709961, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -304.2133483886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.051820993423462, "rewards_train/margins": 17.240655660629272, "rewards_train/rejected": -18.292476654052734, "step": 3753 }, { "epoch": 1.85, "learning_rate": 3.526841596010154e-07, "loss": 0.0001, "step": 3754 }, { "epoch": 1.85, "logps_train/chosen": -76.7182846069336, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -309.0286560058594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.202883005142212, "rewards_train/margins": 17.016290426254272, "rewards_train/rejected": -18.219173431396484, "step": 3754 }, { "epoch": 1.85, "learning_rate": 3.524248395592888e-07, "loss": 0.0, "step": 3755 }, { "epoch": 1.85, "logps_train/chosen": -77.90423583984375, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -318.77117919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4137136936187744, "rewards_train/margins": 17.56174635887146, "rewards_train/rejected": -18.975460052490234, "step": 3755 }, { "epoch": 1.85, "learning_rate": 3.521655629942001e-07, "loss": 0.0001, "step": 3756 }, { "epoch": 1.85, "logps_train/chosen": -75.60774993896484, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -302.47052001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1165857315063477, "rewards_train/margins": 16.930174827575684, "rewards_train/rejected": -18.04676055908203, "step": 3756 }, { "epoch": 1.85, "learning_rate": 3.5190632998213396e-07, "loss": 0.0, "step": 3757 }, { "epoch": 1.85, "logps_train/chosen": -77.57398223876953, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -319.77191162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2795660495758057, "rewards_train/margins": 18.018232107162476, "rewards_train/rejected": -19.29779815673828, "step": 3757 }, { "epoch": 1.85, "learning_rate": 3.5164714059946264e-07, "loss": 0.0, "step": 3758 }, { "epoch": 1.85, "logps_train/chosen": -71.38249969482422, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -298.9488525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.895378828048706, "rewards_train/margins": 16.606780290603638, "rewards_train/rejected": -17.502159118652344, "step": 3758 }, { "epoch": 1.85, "learning_rate": 3.5138799492254456e-07, "loss": 0.0001, "step": 3759 }, { "epoch": 1.85, "logps_train/chosen": -78.21607971191406, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -315.766357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4527114629745483, "rewards_train/margins": 17.432958245277405, "rewards_train/rejected": -18.885669708251953, "step": 3759 }, { "epoch": 1.85, "learning_rate": 3.511288930277261e-07, "loss": 0.0004, "step": 3760 }, { "epoch": 1.85, "logps_train/chosen": -81.92601776123047, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -317.80438232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.537670373916626, "rewards_train/margins": 17.588128805160522, "rewards_train/rejected": -19.12579917907715, "step": 3760 }, { "epoch": 1.85, "learning_rate": 3.5086983499134015e-07, "loss": 0.0005, "step": 3761 }, { "epoch": 1.85, "logps_train/chosen": -79.66249084472656, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -307.33392333984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7986221313476562, "rewards_train/margins": 16.68906593322754, "rewards_train/rejected": -18.487688064575195, "step": 3761 }, { "epoch": 1.85, "learning_rate": 3.5061082088970705e-07, "loss": 0.0, "step": 3762 }, { "epoch": 1.85, "logps_train/chosen": -82.29293060302734, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -317.43408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6821738481521606, "rewards_train/margins": 17.30244529247284, "rewards_train/rejected": -18.984619140625, "step": 3762 }, { "epoch": 1.85, "learning_rate": 3.5035185079913434e-07, "loss": 0.0, "step": 3763 }, { "epoch": 1.85, "logps_train/chosen": -77.99807739257812, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -317.3965148925781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3609895706176758, "rewards_train/margins": 17.500048637390137, "rewards_train/rejected": -18.861038208007812, "step": 3763 }, { "epoch": 1.85, "learning_rate": 3.5009292479591604e-07, "loss": 0.0, "step": 3764 }, { "epoch": 1.85, "logps_train/chosen": -72.07044219970703, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -318.79913330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7661753296852112, "rewards_train/margins": 18.227801263332367, "rewards_train/rejected": -18.993976593017578, "step": 3764 }, { "epoch": 1.85, "learning_rate": 3.4983404295633376e-07, "loss": 0.0, "step": 3765 }, { "epoch": 1.85, "logps_train/chosen": -75.77233123779297, "logps_train/ref_chosen": -61.40625, "logps_train/ref_rejected": -118.625, "logps_train/rejected": -299.00408935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4373409748077393, "rewards_train/margins": 16.602278470993042, "rewards_train/rejected": -18.03961944580078, "step": 3765 }, { "epoch": 1.85, "learning_rate": 3.495752053566556e-07, "loss": 0.0, "step": 3766 }, { "epoch": 1.85, "logps_train/chosen": -80.46568298339844, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -331.1253662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4977891445159912, "rewards_train/margins": 18.42089867591858, "rewards_train/rejected": -19.91868782043457, "step": 3766 }, { "epoch": 1.85, "learning_rate": 3.49316412073137e-07, "loss": 0.0, "step": 3767 }, { "epoch": 1.85, "logps_train/chosen": -78.7098388671875, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -315.59893798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4234745502471924, "rewards_train/margins": 17.53661799430847, "rewards_train/rejected": -18.960092544555664, "step": 3767 }, { "epoch": 1.86, "learning_rate": 3.4905766318202026e-07, "loss": 0.0, "step": 3768 }, { "epoch": 1.86, "logps_train/chosen": -75.3709487915039, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -303.46966552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4068948030471802, "rewards_train/margins": 16.717464327812195, "rewards_train/rejected": -18.124359130859375, "step": 3768 }, { "epoch": 1.86, "learning_rate": 3.4879895875953437e-07, "loss": 0.0, "step": 3769 }, { "epoch": 1.86, "logps_train/chosen": -77.32286834716797, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -312.8110046386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3361446857452393, "rewards_train/margins": 17.425034284591675, "rewards_train/rejected": -18.761178970336914, "step": 3769 }, { "epoch": 1.86, "learning_rate": 3.485402988818956e-07, "loss": 0.0, "step": 3770 }, { "epoch": 1.86, "logps_train/chosen": -74.42231750488281, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -303.5202331542969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2523884773254395, "rewards_train/margins": 16.73625898361206, "rewards_train/rejected": -17.9886474609375, "step": 3770 }, { "epoch": 1.86, "learning_rate": 3.482816836253066e-07, "loss": 0.0, "step": 3771 }, { "epoch": 1.86, "logps_train/chosen": -80.77922058105469, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -309.72296142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.620036244392395, "rewards_train/margins": 16.961880564689636, "rewards_train/rejected": -18.58191680908203, "step": 3771 }, { "epoch": 1.86, "learning_rate": 3.4802311306595743e-07, "loss": 0.0001, "step": 3772 }, { "epoch": 1.86, "logps_train/chosen": -78.8558578491211, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -317.6238708496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3627345561981201, "rewards_train/margins": 17.49691891670227, "rewards_train/rejected": -18.85965347290039, "step": 3772 }, { "epoch": 1.86, "learning_rate": 3.477645872800249e-07, "loss": 0.0, "step": 3773 }, { "epoch": 1.86, "logps_train/chosen": -74.31378173828125, "logps_train/ref_chosen": -61.46875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -298.6205749511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2845035791397095, "rewards_train/margins": 16.377359747886658, "rewards_train/rejected": -17.661863327026367, "step": 3773 }, { "epoch": 1.86, "learning_rate": 3.4750610634367184e-07, "loss": 0.0, "step": 3774 }, { "epoch": 1.86, "logps_train/chosen": -75.34530639648438, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -309.27880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0635342597961426, "rewards_train/margins": 17.203071117401123, "rewards_train/rejected": -18.266605377197266, "step": 3774 }, { "epoch": 1.86, "learning_rate": 3.4724767033304923e-07, "loss": 0.0, "step": 3775 }, { "epoch": 1.86, "logps_train/chosen": -76.30533599853516, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -316.94873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1671545505523682, "rewards_train/margins": 17.880595922470093, "rewards_train/rejected": -19.04775047302246, "step": 3775 }, { "epoch": 1.86, "learning_rate": 3.4698927932429344e-07, "loss": 0.0, "step": 3776 }, { "epoch": 1.86, "logps_train/chosen": -81.79424285888672, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -308.1568603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6681445837020874, "rewards_train/margins": 16.775764107704163, "rewards_train/rejected": -18.44390869140625, "step": 3776 }, { "epoch": 1.86, "learning_rate": 3.467309333935283e-07, "loss": 0.0001, "step": 3777 }, { "epoch": 1.86, "logps_train/chosen": -79.16841125488281, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -316.74151611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3327109813690186, "rewards_train/margins": 17.338950395584106, "rewards_train/rejected": -18.671661376953125, "step": 3777 }, { "epoch": 1.86, "learning_rate": 3.4647263261686474e-07, "loss": 0.0001, "step": 3778 }, { "epoch": 1.86, "logps_train/chosen": -73.23606872558594, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -298.2003173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0387190580368042, "rewards_train/margins": 16.66583263874054, "rewards_train/rejected": -17.704551696777344, "step": 3778 }, { "epoch": 1.86, "learning_rate": 3.4621437707039937e-07, "loss": 0.0001, "step": 3779 }, { "epoch": 1.86, "logps_train/chosen": -73.58168029785156, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -118.0625, "logps_train/rejected": -299.44354248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2208139896392822, "rewards_train/margins": 16.920222997665405, "rewards_train/rejected": -18.141036987304688, "step": 3779 }, { "epoch": 1.86, "learning_rate": 3.4595616683021635e-07, "loss": 0.0, "step": 3780 }, { "epoch": 1.86, "logps_train/chosen": -69.07296752929688, "logps_train/ref_chosen": -59.71875, "logps_train/ref_rejected": -118.625, "logps_train/rejected": -296.41485595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9350807666778564, "rewards_train/margins": 16.846930265426636, "rewards_train/rejected": -17.782011032104492, "step": 3780 }, { "epoch": 1.86, "learning_rate": 3.4569800197238586e-07, "loss": 0.0, "step": 3781 }, { "epoch": 1.86, "logps_train/chosen": -79.64671325683594, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -311.510009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.51325523853302, "rewards_train/margins": 17.224417567253113, "rewards_train/rejected": -18.737672805786133, "step": 3781 }, { "epoch": 1.86, "learning_rate": 3.454398825729652e-07, "loss": 0.0, "step": 3782 }, { "epoch": 1.86, "logps_train/chosen": -74.38011169433594, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -311.8114013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8939198851585388, "rewards_train/margins": 17.40489488840103, "rewards_train/rejected": -18.29881477355957, "step": 3782 }, { "epoch": 1.86, "learning_rate": 3.4518180870799817e-07, "loss": 0.0, "step": 3783 }, { "epoch": 1.86, "logps_train/chosen": -74.4300537109375, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -306.01568603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0125854015350342, "rewards_train/margins": 17.094550371170044, "rewards_train/rejected": -18.107135772705078, "step": 3783 }, { "epoch": 1.86, "learning_rate": 3.4492378045351493e-07, "loss": 0.0, "step": 3784 }, { "epoch": 1.86, "logps_train/chosen": -78.61297607421875, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -311.165771484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4922064542770386, "rewards_train/margins": 17.25454342365265, "rewards_train/rejected": -18.746749877929688, "step": 3784 }, { "epoch": 1.86, "learning_rate": 3.446657978855325e-07, "loss": 0.0, "step": 3785 }, { "epoch": 1.86, "logps_train/chosen": -72.68856811523438, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -297.037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9449315667152405, "rewards_train/margins": 16.580897748470306, "rewards_train/rejected": -17.525829315185547, "step": 3785 }, { "epoch": 1.86, "learning_rate": 3.44407861080054e-07, "loss": 0.0, "step": 3786 }, { "epoch": 1.86, "logps_train/chosen": -75.19857788085938, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -312.73358154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0421721935272217, "rewards_train/margins": 17.47132420539856, "rewards_train/rejected": -18.51349639892578, "step": 3786 }, { "epoch": 1.86, "learning_rate": 3.4414997011306976e-07, "loss": 0.0, "step": 3787 }, { "epoch": 1.86, "logps_train/chosen": -74.47344970703125, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -312.0810852050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2395079135894775, "rewards_train/margins": 17.339502096176147, "rewards_train/rejected": -18.579010009765625, "step": 3787 }, { "epoch": 1.87, "learning_rate": 3.438921250605556e-07, "loss": 0.0, "step": 3788 }, { "epoch": 1.87, "logps_train/chosen": -79.87683868408203, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -319.1665954589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3970584869384766, "rewards_train/margins": 17.291086196899414, "rewards_train/rejected": -18.68814468383789, "step": 3788 }, { "epoch": 1.87, "learning_rate": 3.4363432599847495e-07, "loss": 0.002, "step": 3789 }, { "epoch": 1.87, "logps_train/chosen": -78.74014282226562, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -314.50726318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5474510192871094, "rewards_train/margins": 17.6025447845459, "rewards_train/rejected": -19.149995803833008, "step": 3789 }, { "epoch": 1.87, "learning_rate": 3.433765730027771e-07, "loss": 0.0001, "step": 3790 }, { "epoch": 1.87, "logps_train/chosen": -79.54159545898438, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -310.5396728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5692965984344482, "rewards_train/margins": 17.07011914253235, "rewards_train/rejected": -18.639415740966797, "step": 3790 }, { "epoch": 1.87, "learning_rate": 3.431188661493975e-07, "loss": 0.0001, "step": 3791 }, { "epoch": 1.87, "logps_train/chosen": -77.8345947265625, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -321.0048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.312415361404419, "rewards_train/margins": 17.90838313102722, "rewards_train/rejected": -19.22079849243164, "step": 3791 }, { "epoch": 1.87, "learning_rate": 3.428612055142588e-07, "loss": 0.0, "step": 3792 }, { "epoch": 1.87, "logps_train/chosen": -76.38849639892578, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -302.6768493652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1223454475402832, "rewards_train/margins": 16.646071910858154, "rewards_train/rejected": -17.768417358398438, "step": 3792 }, { "epoch": 1.87, "learning_rate": 3.4260359117326907e-07, "loss": 0.0003, "step": 3793 }, { "epoch": 1.87, "logps_train/chosen": -74.3402099609375, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -300.5572509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1546015739440918, "rewards_train/margins": 16.66890001296997, "rewards_train/rejected": -17.823501586914062, "step": 3793 }, { "epoch": 1.87, "learning_rate": 3.423460232023234e-07, "loss": 0.0, "step": 3794 }, { "epoch": 1.87, "logps_train/chosen": -81.01578521728516, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -331.9036865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.522330641746521, "rewards_train/margins": 18.55905592441559, "rewards_train/rejected": -20.08138656616211, "step": 3794 }, { "epoch": 1.87, "learning_rate": 3.420885016773033e-07, "loss": 0.0, "step": 3795 }, { "epoch": 1.87, "logps_train/chosen": -77.3903579711914, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -324.44024658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1955296993255615, "rewards_train/margins": 18.092926263809204, "rewards_train/rejected": -19.288455963134766, "step": 3795 }, { "epoch": 1.87, "learning_rate": 3.41831026674076e-07, "loss": 0.0, "step": 3796 }, { "epoch": 1.87, "logps_train/chosen": -77.9000015258789, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -322.17022705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1554782390594482, "rewards_train/margins": 18.196458101272583, "rewards_train/rejected": -19.35193634033203, "step": 3796 }, { "epoch": 1.87, "learning_rate": 3.4157359826849566e-07, "loss": 0.0, "step": 3797 }, { "epoch": 1.87, "logps_train/chosen": -77.10283660888672, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -309.5440979003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3358205556869507, "rewards_train/margins": 17.289095520973206, "rewards_train/rejected": -18.624916076660156, "step": 3797 }, { "epoch": 1.87, "learning_rate": 3.4131621653640217e-07, "loss": 0.0001, "step": 3798 }, { "epoch": 1.87, "logps_train/chosen": -79.94530487060547, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -322.04718017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5865718126296997, "rewards_train/margins": 17.6679505109787, "rewards_train/rejected": -19.2545223236084, "step": 3798 }, { "epoch": 1.87, "learning_rate": 3.4105888155362203e-07, "loss": 0.0, "step": 3799 }, { "epoch": 1.87, "logps_train/chosen": -75.55948638916016, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -306.0760498046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1140543222427368, "rewards_train/margins": 17.116111159324646, "rewards_train/rejected": -18.230165481567383, "step": 3799 }, { "epoch": 1.87, "learning_rate": 3.4080159339596804e-07, "loss": 0.0, "step": 3800 }, { "epoch": 1.87, "logps_train/chosen": -77.6036376953125, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -315.3690490722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.163342833518982, "rewards_train/margins": 17.70422613620758, "rewards_train/rejected": -18.867568969726562, "step": 3800 }, { "epoch": 1.87, "learning_rate": 3.405443521392388e-07, "loss": 0.0, "step": 3801 }, { "epoch": 1.87, "logps_train/chosen": -76.90027618408203, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -314.1881408691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.319007396697998, "rewards_train/margins": 17.52671480178833, "rewards_train/rejected": -18.845722198486328, "step": 3801 }, { "epoch": 1.87, "learning_rate": 3.402871578592194e-07, "loss": 0.0, "step": 3802 }, { "epoch": 1.87, "logps_train/chosen": -78.56965637207031, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -314.62896728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2704418897628784, "rewards_train/margins": 17.22741425037384, "rewards_train/rejected": -18.49785614013672, "step": 3802 }, { "epoch": 1.87, "learning_rate": 3.400300106316809e-07, "loss": 0.0, "step": 3803 }, { "epoch": 1.87, "logps_train/chosen": -75.02506256103516, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -317.0914611816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0532383918762207, "rewards_train/margins": 17.64399290084839, "rewards_train/rejected": -18.69723129272461, "step": 3803 }, { "epoch": 1.87, "learning_rate": 3.397729105323808e-07, "loss": 0.0, "step": 3804 }, { "epoch": 1.87, "logps_train/chosen": -74.49107360839844, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -305.1927490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1350207328796387, "rewards_train/margins": 17.11033296585083, "rewards_train/rejected": -18.24535369873047, "step": 3804 }, { "epoch": 1.87, "learning_rate": 3.3951585763706244e-07, "loss": 0.0, "step": 3805 }, { "epoch": 1.87, "logps_train/chosen": -72.64409637451172, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -293.63275146484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0142145156860352, "rewards_train/margins": 16.251112937927246, "rewards_train/rejected": -17.26532745361328, "step": 3805 }, { "epoch": 1.87, "learning_rate": 3.392588520214552e-07, "loss": 0.0, "step": 3806 }, { "epoch": 1.87, "logps_train/chosen": -81.79817199707031, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -322.9112854003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.584406852722168, "rewards_train/margins": 17.913853645324707, "rewards_train/rejected": -19.498260498046875, "step": 3806 }, { "epoch": 1.87, "learning_rate": 3.390018937612751e-07, "loss": 0.0, "step": 3807 }, { "epoch": 1.87, "logps_train/chosen": -80.74632263183594, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -311.748779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2479239702224731, "rewards_train/margins": 17.081398844718933, "rewards_train/rejected": -18.329322814941406, "step": 3807 }, { "epoch": 1.87, "learning_rate": 3.387449829322232e-07, "loss": 0.0001, "step": 3808 }, { "epoch": 1.87, "logps_train/chosen": -81.26934814453125, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -319.2888488769531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5706846714019775, "rewards_train/margins": 17.821237325668335, "rewards_train/rejected": -19.391921997070312, "step": 3808 }, { "epoch": 1.88, "learning_rate": 3.3848811960998735e-07, "loss": 0.0, "step": 3809 }, { "epoch": 1.88, "logps_train/chosen": -80.16539001464844, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -133.25, "logps_train/rejected": -325.34417724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3335800170898438, "rewards_train/margins": 17.877696990966797, "rewards_train/rejected": -19.21127700805664, "step": 3809 }, { "epoch": 1.88, "learning_rate": 3.382313038702417e-07, "loss": 0.0, "step": 3810 }, { "epoch": 1.88, "logps_train/chosen": -73.17754364013672, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -311.36138916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0826468467712402, "rewards_train/margins": 17.378492832183838, "rewards_train/rejected": -18.461139678955078, "step": 3810 }, { "epoch": 1.88, "learning_rate": 3.379745357886452e-07, "loss": 0.0, "step": 3811 }, { "epoch": 1.88, "logps_train/chosen": -79.80447387695312, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -307.1261291503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5664825439453125, "rewards_train/margins": 16.618057250976562, "rewards_train/rejected": -18.184539794921875, "step": 3811 }, { "epoch": 1.88, "learning_rate": 3.3771781544084385e-07, "loss": 0.0, "step": 3812 }, { "epoch": 1.88, "logps_train/chosen": -75.61125183105469, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -317.94390869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8354417085647583, "rewards_train/margins": 18.089027285575867, "rewards_train/rejected": -18.924468994140625, "step": 3812 }, { "epoch": 1.88, "learning_rate": 3.374611429024691e-07, "loss": 0.0, "step": 3813 }, { "epoch": 1.88, "logps_train/chosen": -74.07276916503906, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -321.37371826171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1873552799224854, "rewards_train/margins": 18.309394598007202, "rewards_train/rejected": -19.496749877929688, "step": 3813 }, { "epoch": 1.88, "learning_rate": 3.372045182491384e-07, "loss": 0.0, "step": 3814 }, { "epoch": 1.88, "logps_train/chosen": -78.79224395751953, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -311.7264099121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2005138397216797, "rewards_train/margins": 17.114900588989258, "rewards_train/rejected": -18.315414428710938, "step": 3814 }, { "epoch": 1.88, "learning_rate": 3.3694794155645523e-07, "loss": 0.001, "step": 3815 }, { "epoch": 1.88, "logps_train/chosen": -78.46099853515625, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -313.10260009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3260812759399414, "rewards_train/margins": 17.316502571105957, "rewards_train/rejected": -18.6425838470459, "step": 3815 }, { "epoch": 1.88, "learning_rate": 3.366914129000086e-07, "loss": 0.0, "step": 3816 }, { "epoch": 1.88, "logps_train/chosen": -79.23753356933594, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -321.8546142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.516624927520752, "rewards_train/margins": 18.23446226119995, "rewards_train/rejected": -19.751087188720703, "step": 3816 }, { "epoch": 1.88, "learning_rate": 3.3643493235537373e-07, "loss": 0.0, "step": 3817 }, { "epoch": 1.88, "logps_train/chosen": -78.27991485595703, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -319.490966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4764776229858398, "rewards_train/margins": 17.854552268981934, "rewards_train/rejected": -19.331029891967773, "step": 3817 }, { "epoch": 1.88, "learning_rate": 3.3617849999811143e-07, "loss": 0.0, "step": 3818 }, { "epoch": 1.88, "logps_train/chosen": -84.68416595458984, "logps_train/ref_chosen": -68.3125, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -321.21478271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6382410526275635, "rewards_train/margins": 17.31756567955017, "rewards_train/rejected": -18.955806732177734, "step": 3818 }, { "epoch": 1.88, "learning_rate": 3.359221159037685e-07, "loss": 0.0, "step": 3819 }, { "epoch": 1.88, "logps_train/chosen": -78.70025634765625, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -313.2938232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2932922840118408, "rewards_train/margins": 17.13999629020691, "rewards_train/rejected": -18.43328857421875, "step": 3819 }, { "epoch": 1.88, "learning_rate": 3.356657801478775e-07, "loss": 0.0, "step": 3820 }, { "epoch": 1.88, "logps_train/chosen": -76.50711059570312, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -307.61346435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3277616500854492, "rewards_train/margins": 17.02626323699951, "rewards_train/rejected": -18.35402488708496, "step": 3820 }, { "epoch": 1.88, "learning_rate": 3.3540949280595637e-07, "loss": 0.0, "step": 3821 }, { "epoch": 1.88, "logps_train/chosen": -75.1331787109375, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -302.7735595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1192262172698975, "rewards_train/margins": 17.013696908950806, "rewards_train/rejected": -18.132923126220703, "step": 3821 }, { "epoch": 1.88, "learning_rate": 3.3515325395350957e-07, "loss": 0.0, "step": 3822 }, { "epoch": 1.88, "logps_train/chosen": -76.7176742553711, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -319.85821533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2534568309783936, "rewards_train/margins": 18.13563847541809, "rewards_train/rejected": -19.389095306396484, "step": 3822 }, { "epoch": 1.88, "learning_rate": 3.348970636660261e-07, "loss": 0.0, "step": 3823 }, { "epoch": 1.88, "logps_train/chosen": -78.7358627319336, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -314.6079406738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.120314598083496, "rewards_train/margins": 17.44789981842041, "rewards_train/rejected": -18.568214416503906, "step": 3823 }, { "epoch": 1.88, "learning_rate": 3.346409220189819e-07, "loss": 0.0, "step": 3824 }, { "epoch": 1.88, "logps_train/chosen": -78.580078125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -312.89617919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3551270961761475, "rewards_train/margins": 17.11242127418518, "rewards_train/rejected": -18.467548370361328, "step": 3824 }, { "epoch": 1.88, "learning_rate": 3.343848290878381e-07, "loss": 0.0, "step": 3825 }, { "epoch": 1.88, "logps_train/chosen": -76.66188049316406, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -311.44012451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.247145414352417, "rewards_train/margins": 17.29833149909973, "rewards_train/rejected": -18.54547691345215, "step": 3825 }, { "epoch": 1.88, "learning_rate": 3.3412878494804086e-07, "loss": 0.0001, "step": 3826 }, { "epoch": 1.88, "logps_train/chosen": -74.19022369384766, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -309.1185302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1946567296981812, "rewards_train/margins": 17.032724022865295, "rewards_train/rejected": -18.227380752563477, "step": 3826 }, { "epoch": 1.88, "learning_rate": 3.338727896750232e-07, "loss": 0.0005, "step": 3827 }, { "epoch": 1.88, "logps_train/chosen": -76.99617004394531, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -319.98486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1237878799438477, "rewards_train/margins": 18.03958797454834, "rewards_train/rejected": -19.163375854492188, "step": 3827 }, { "epoch": 1.88, "learning_rate": 3.3361684334420235e-07, "loss": 0.0, "step": 3828 }, { "epoch": 1.88, "logps_train/chosen": -74.02123260498047, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -298.8203430175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.132396936416626, "rewards_train/margins": 16.509990453720093, "rewards_train/rejected": -17.64238739013672, "step": 3828 }, { "epoch": 1.89, "learning_rate": 3.333609460309824e-07, "loss": 0.0, "step": 3829 }, { "epoch": 1.89, "logps_train/chosen": -78.053955078125, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -330.1294860839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1802496910095215, "rewards_train/margins": 18.93445634841919, "rewards_train/rejected": -20.11470603942871, "step": 3829 }, { "epoch": 1.89, "learning_rate": 3.3310509781075204e-07, "loss": 0.0, "step": 3830 }, { "epoch": 1.89, "logps_train/chosen": -74.22150421142578, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -304.00018310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0423645973205566, "rewards_train/margins": 17.11458730697632, "rewards_train/rejected": -18.156951904296875, "step": 3830 }, { "epoch": 1.89, "learning_rate": 3.32849298758886e-07, "loss": 0.0001, "step": 3831 }, { "epoch": 1.89, "logps_train/chosen": -78.15795135498047, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -325.25653076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4713129997253418, "rewards_train/margins": 18.200825214385986, "rewards_train/rejected": -19.672138214111328, "step": 3831 }, { "epoch": 1.89, "learning_rate": 3.3259354895074456e-07, "loss": 0.0, "step": 3832 }, { "epoch": 1.89, "logps_train/chosen": -79.79830169677734, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -318.54266357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3693807125091553, "rewards_train/margins": 17.805343866348267, "rewards_train/rejected": -19.174724578857422, "step": 3832 }, { "epoch": 1.89, "learning_rate": 3.3233784846167314e-07, "loss": 0.0, "step": 3833 }, { "epoch": 1.89, "logps_train/chosen": -78.74920654296875, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -314.9249267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5311706066131592, "rewards_train/margins": 17.11708378791809, "rewards_train/rejected": -18.64825439453125, "step": 3833 }, { "epoch": 1.89, "learning_rate": 3.3208219736700293e-07, "loss": 0.0001, "step": 3834 }, { "epoch": 1.89, "logps_train/chosen": -78.49053955078125, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -314.9495849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4859678745269775, "rewards_train/margins": 17.228177785873413, "rewards_train/rejected": -18.71414566040039, "step": 3834 }, { "epoch": 1.89, "learning_rate": 3.318265957420504e-07, "loss": 0.0001, "step": 3835 }, { "epoch": 1.89, "logps_train/chosen": -77.98963928222656, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -317.979736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2775286436080933, "rewards_train/margins": 18.175573468208313, "rewards_train/rejected": -19.453102111816406, "step": 3835 }, { "epoch": 1.89, "learning_rate": 3.315710436621176e-07, "loss": 0.0, "step": 3836 }, { "epoch": 1.89, "logps_train/chosen": -79.09744262695312, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -306.10089111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1961209774017334, "rewards_train/margins": 16.899708032608032, "rewards_train/rejected": -18.095829010009766, "step": 3836 }, { "epoch": 1.89, "learning_rate": 3.313155412024919e-07, "loss": 0.0003, "step": 3837 }, { "epoch": 1.89, "logps_train/chosen": -76.72859191894531, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -313.5584411621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421248197555542, "rewards_train/margins": 17.72629475593567, "rewards_train/rejected": -19.14754295349121, "step": 3837 }, { "epoch": 1.89, "learning_rate": 3.310600884384459e-07, "loss": 0.0001, "step": 3838 }, { "epoch": 1.89, "logps_train/chosen": -76.18603515625, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -321.7733154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9624021053314209, "rewards_train/margins": 18.12078881263733, "rewards_train/rejected": -19.08319091796875, "step": 3838 }, { "epoch": 1.89, "learning_rate": 3.308046854452381e-07, "loss": 0.0, "step": 3839 }, { "epoch": 1.89, "logps_train/chosen": -80.05598449707031, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -312.8222351074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5461254119873047, "rewards_train/margins": 17.258071899414062, "rewards_train/rejected": -18.804197311401367, "step": 3839 }, { "epoch": 1.89, "learning_rate": 3.305493322981112e-07, "loss": 0.0, "step": 3840 }, { "epoch": 1.89, "logps_train/chosen": -79.88821411132812, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -315.0340576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5229523181915283, "rewards_train/margins": 17.76336359977722, "rewards_train/rejected": -19.28631591796875, "step": 3840 }, { "epoch": 1.89, "learning_rate": 3.3029402907229466e-07, "loss": 0.0, "step": 3841 }, { "epoch": 1.89, "logps_train/chosen": -78.25752258300781, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -314.7784423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2097375392913818, "rewards_train/margins": 17.46200203895569, "rewards_train/rejected": -18.67173957824707, "step": 3841 }, { "epoch": 1.89, "learning_rate": 3.3003877584300246e-07, "loss": 0.0, "step": 3842 }, { "epoch": 1.89, "logps_train/chosen": -74.64228057861328, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -312.2613220214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1322455406188965, "rewards_train/margins": 17.388563632965088, "rewards_train/rejected": -18.520809173583984, "step": 3842 }, { "epoch": 1.89, "learning_rate": 3.297835726854334e-07, "loss": 0.0001, "step": 3843 }, { "epoch": 1.89, "logps_train/chosen": -84.29905700683594, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -322.1072998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7912824153900146, "rewards_train/margins": 17.363395929336548, "rewards_train/rejected": -19.154678344726562, "step": 3843 }, { "epoch": 1.89, "learning_rate": 3.2952841967477275e-07, "loss": 0.0, "step": 3844 }, { "epoch": 1.89, "logps_train/chosen": -75.26565551757812, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -303.9053039550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1834499835968018, "rewards_train/margins": 16.793993711471558, "rewards_train/rejected": -17.97744369506836, "step": 3844 }, { "epoch": 1.89, "learning_rate": 3.2927331688618974e-07, "loss": 0.0, "step": 3845 }, { "epoch": 1.89, "logps_train/chosen": -75.15143585205078, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -307.239990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1393866539001465, "rewards_train/margins": 17.14198350906372, "rewards_train/rejected": -18.281370162963867, "step": 3845 }, { "epoch": 1.89, "learning_rate": 3.290182643948396e-07, "loss": 0.0, "step": 3846 }, { "epoch": 1.89, "logps_train/chosen": -77.34378051757812, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -308.705810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3790560960769653, "rewards_train/margins": 16.982638239860535, "rewards_train/rejected": -18.3616943359375, "step": 3846 }, { "epoch": 1.89, "learning_rate": 3.2876326227586266e-07, "loss": 0.0, "step": 3847 }, { "epoch": 1.89, "logps_train/chosen": -79.92801666259766, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -320.6358642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.380643606185913, "rewards_train/margins": 17.81399941444397, "rewards_train/rejected": -19.194643020629883, "step": 3847 }, { "epoch": 1.89, "learning_rate": 3.28508310604384e-07, "loss": 0.0, "step": 3848 }, { "epoch": 1.89, "logps_train/chosen": -75.18746948242188, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -312.09765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.044479489326477, "rewards_train/margins": 17.488284945487976, "rewards_train/rejected": -18.532764434814453, "step": 3848 }, { "epoch": 1.9, "learning_rate": 3.282534094555143e-07, "loss": 0.0001, "step": 3849 }, { "epoch": 1.9, "logps_train/chosen": -74.1867904663086, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -305.9341735839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1781275272369385, "rewards_train/margins": 17.33008360862732, "rewards_train/rejected": -18.508211135864258, "step": 3849 }, { "epoch": 1.9, "learning_rate": 3.2799855890434895e-07, "loss": 0.0, "step": 3850 }, { "epoch": 1.9, "logps_train/chosen": -81.90023040771484, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -314.845947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.648763656616211, "rewards_train/margins": 17.29041862487793, "rewards_train/rejected": -18.93918228149414, "step": 3850 }, { "epoch": 1.9, "learning_rate": 3.2774375902596884e-07, "loss": 0.0, "step": 3851 }, { "epoch": 1.9, "logps_train/chosen": -72.99345397949219, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -313.20703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.814042866230011, "rewards_train/margins": 17.952612102031708, "rewards_train/rejected": -18.76665496826172, "step": 3851 }, { "epoch": 1.9, "learning_rate": 3.274890098954397e-07, "loss": 0.0, "step": 3852 }, { "epoch": 1.9, "logps_train/chosen": -73.53559875488281, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -313.4712829589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0500438213348389, "rewards_train/margins": 17.95353055000305, "rewards_train/rejected": -19.00357437133789, "step": 3852 }, { "epoch": 1.9, "learning_rate": 3.2723431158781225e-07, "loss": 0.0, "step": 3853 }, { "epoch": 1.9, "logps_train/chosen": -76.82485961914062, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -309.99835205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.166641354560852, "rewards_train/margins": 17.13070285320282, "rewards_train/rejected": -18.297344207763672, "step": 3853 }, { "epoch": 1.9, "learning_rate": 3.269796641781225e-07, "loss": 0.0, "step": 3854 }, { "epoch": 1.9, "logps_train/chosen": -74.78270721435547, "logps_train/ref_chosen": -60.5625, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -298.9170227050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4216299057006836, "rewards_train/margins": 16.437993049621582, "rewards_train/rejected": -17.859622955322266, "step": 3854 }, { "epoch": 1.9, "learning_rate": 3.26725067741391e-07, "loss": 0.0, "step": 3855 }, { "epoch": 1.9, "logps_train/chosen": -71.84050750732422, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -295.6898498535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0150076150894165, "rewards_train/margins": 16.472827315330505, "rewards_train/rejected": -17.487834930419922, "step": 3855 }, { "epoch": 1.9, "learning_rate": 3.2647052235262386e-07, "loss": 0.0, "step": 3856 }, { "epoch": 1.9, "logps_train/chosen": -66.6029052734375, "logps_train/ref_chosen": -59.625, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -297.1114196777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6997921466827393, "rewards_train/margins": 16.92443537712097, "rewards_train/rejected": -17.62422752380371, "step": 3856 }, { "epoch": 1.9, "learning_rate": 3.2621602808681194e-07, "loss": 0.0009, "step": 3857 }, { "epoch": 1.9, "logps_train/chosen": -80.15541076660156, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -326.84674072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3143205642700195, "rewards_train/margins": 18.1845121383667, "rewards_train/rejected": -19.49883270263672, "step": 3857 }, { "epoch": 1.9, "learning_rate": 3.2596158501893047e-07, "loss": 0.0003, "step": 3858 }, { "epoch": 1.9, "logps_train/chosen": -77.34263610839844, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -316.7786865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0246937274932861, "rewards_train/margins": 18.021437406539917, "rewards_train/rejected": -19.046131134033203, "step": 3858 }, { "epoch": 1.9, "learning_rate": 3.257071932239408e-07, "loss": 0.0, "step": 3859 }, { "epoch": 1.9, "logps_train/chosen": -80.33152770996094, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -332.08013916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.364549160003662, "rewards_train/margins": 18.89531946182251, "rewards_train/rejected": -20.259868621826172, "step": 3859 }, { "epoch": 1.9, "learning_rate": 3.254528527767878e-07, "loss": 0.0, "step": 3860 }, { "epoch": 1.9, "logps_train/chosen": -82.36753845214844, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -133.625, "logps_train/rejected": -324.65753173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5552597045898438, "rewards_train/margins": 17.54320526123047, "rewards_train/rejected": -19.098464965820312, "step": 3860 }, { "epoch": 1.9, "learning_rate": 3.2519856375240206e-07, "loss": 0.0003, "step": 3861 }, { "epoch": 1.9, "logps_train/chosen": -78.83816528320312, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -314.6861267089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3894802331924438, "rewards_train/margins": 17.414190649986267, "rewards_train/rejected": -18.80367088317871, "step": 3861 }, { "epoch": 1.9, "learning_rate": 3.2494432622569936e-07, "loss": 0.0, "step": 3862 }, { "epoch": 1.9, "logps_train/chosen": -79.06022644042969, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -316.2180480957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.270867109298706, "rewards_train/margins": 17.790683031082153, "rewards_train/rejected": -19.06155014038086, "step": 3862 }, { "epoch": 1.9, "learning_rate": 3.2469014027157915e-07, "loss": 0.0, "step": 3863 }, { "epoch": 1.9, "logps_train/chosen": -78.72422790527344, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -317.8302917480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3057727813720703, "rewards_train/margins": 17.663679122924805, "rewards_train/rejected": -18.969451904296875, "step": 3863 }, { "epoch": 1.9, "learning_rate": 3.244360059649266e-07, "loss": 0.0, "step": 3864 }, { "epoch": 1.9, "logps_train/chosen": -78.4302978515625, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -133.125, "logps_train/rejected": -326.0085754394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2369751930236816, "rewards_train/margins": 18.045814037322998, "rewards_train/rejected": -19.28278923034668, "step": 3864 }, { "epoch": 1.9, "learning_rate": 3.2418192338061137e-07, "loss": 0.0, "step": 3865 }, { "epoch": 1.9, "logps_train/chosen": -75.44441986083984, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -304.11492919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.253475546836853, "rewards_train/margins": 16.800695061683655, "rewards_train/rejected": -18.054170608520508, "step": 3865 }, { "epoch": 1.9, "learning_rate": 3.239278925934878e-07, "loss": 0.0, "step": 3866 }, { "epoch": 1.9, "logps_train/chosen": -76.70137023925781, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -320.26385498046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1578322649002075, "rewards_train/margins": 18.17187201976776, "rewards_train/rejected": -19.32970428466797, "step": 3866 }, { "epoch": 1.9, "learning_rate": 3.2367391367839524e-07, "loss": 0.0, "step": 3867 }, { "epoch": 1.9, "logps_train/chosen": -81.95449829101562, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -316.2046203613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5479404926300049, "rewards_train/margins": 17.45592188835144, "rewards_train/rejected": -19.003862380981445, "step": 3867 }, { "epoch": 1.9, "learning_rate": 3.2341998671015753e-07, "loss": 0.0, "step": 3868 }, { "epoch": 1.9, "logps_train/chosen": -79.77130889892578, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -309.189697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.387921929359436, "rewards_train/margins": 17.292913794517517, "rewards_train/rejected": -18.680835723876953, "step": 3868 }, { "epoch": 1.9, "learning_rate": 3.231661117635833e-07, "loss": 0.0, "step": 3869 }, { "epoch": 1.9, "logps_train/chosen": -73.84797668457031, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -306.78521728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1631183624267578, "rewards_train/margins": 17.112478256225586, "rewards_train/rejected": -18.275596618652344, "step": 3869 }, { "epoch": 1.91, "learning_rate": 3.2291228891346565e-07, "loss": 0.0001, "step": 3870 }, { "epoch": 1.91, "logps_train/chosen": -78.63639831542969, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -308.86077880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4001147747039795, "rewards_train/margins": 16.96135449409485, "rewards_train/rejected": -18.361469268798828, "step": 3870 }, { "epoch": 1.91, "learning_rate": 3.226585182345829e-07, "loss": 0.0, "step": 3871 }, { "epoch": 1.91, "logps_train/chosen": -83.20771789550781, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -332.57354736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5586130619049072, "rewards_train/margins": 18.696983098983765, "rewards_train/rejected": -20.255596160888672, "step": 3871 }, { "epoch": 1.91, "learning_rate": 3.2240479980169696e-07, "loss": 0.0, "step": 3872 }, { "epoch": 1.91, "logps_train/chosen": -72.38670349121094, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -312.15277099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0028300285339355, "rewards_train/margins": 17.6749005317688, "rewards_train/rejected": -18.677730560302734, "step": 3872 }, { "epoch": 1.91, "learning_rate": 3.221511336895555e-07, "loss": 0.0, "step": 3873 }, { "epoch": 1.91, "logps_train/chosen": -85.48438262939453, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -134.5, "logps_train/rejected": -337.29754638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7388190031051636, "rewards_train/margins": 18.545429348945618, "rewards_train/rejected": -20.28424835205078, "step": 3873 }, { "epoch": 1.91, "learning_rate": 3.2189751997289033e-07, "loss": 0.0, "step": 3874 }, { "epoch": 1.91, "logps_train/chosen": -76.14288330078125, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -305.4017028808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3293755054473877, "rewards_train/margins": 17.017239332199097, "rewards_train/rejected": -18.346614837646484, "step": 3874 }, { "epoch": 1.91, "learning_rate": 3.2164395872641723e-07, "loss": 0.0, "step": 3875 }, { "epoch": 1.91, "logps_train/chosen": -72.7450942993164, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -308.4874267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9778296947479248, "rewards_train/margins": 17.48619771003723, "rewards_train/rejected": -18.464027404785156, "step": 3875 }, { "epoch": 1.91, "learning_rate": 3.213904500248378e-07, "loss": 0.0, "step": 3876 }, { "epoch": 1.91, "logps_train/chosen": -82.03599548339844, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -332.03424072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7182968854904175, "rewards_train/margins": 18.267451405525208, "rewards_train/rejected": -19.985748291015625, "step": 3876 }, { "epoch": 1.91, "learning_rate": 3.2113699394283674e-07, "loss": 0.0, "step": 3877 }, { "epoch": 1.91, "logps_train/chosen": -80.0657958984375, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -325.7162170410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.40325927734375, "rewards_train/margins": 18.188091278076172, "rewards_train/rejected": -19.591350555419922, "step": 3877 }, { "epoch": 1.91, "learning_rate": 3.208835905550841e-07, "loss": 0.0, "step": 3878 }, { "epoch": 1.91, "logps_train/chosen": -76.41371154785156, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -320.11834716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1880998611450195, "rewards_train/margins": 18.208109855651855, "rewards_train/rejected": -19.396209716796875, "step": 3878 }, { "epoch": 1.91, "learning_rate": 3.206302399362346e-07, "loss": 0.0, "step": 3879 }, { "epoch": 1.91, "logps_train/chosen": -80.6469955444336, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -313.5304260253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.555129051208496, "rewards_train/margins": 17.368714332580566, "rewards_train/rejected": -18.923843383789062, "step": 3879 }, { "epoch": 1.91, "learning_rate": 3.203769421609266e-07, "loss": 0.0, "step": 3880 }, { "epoch": 1.91, "logps_train/chosen": -80.02336120605469, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -313.6235046386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3187915086746216, "rewards_train/margins": 17.158745646476746, "rewards_train/rejected": -18.477537155151367, "step": 3880 }, { "epoch": 1.91, "learning_rate": 3.2012369730378355e-07, "loss": 0.0019, "step": 3881 }, { "epoch": 1.91, "logps_train/chosen": -77.43258666992188, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -309.0244140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3877415657043457, "rewards_train/margins": 17.2960467338562, "rewards_train/rejected": -18.683788299560547, "step": 3881 }, { "epoch": 1.91, "learning_rate": 3.198705054394129e-07, "loss": 0.0, "step": 3882 }, { "epoch": 1.91, "logps_train/chosen": -72.66990661621094, "logps_train/ref_chosen": -59.5625, "logps_train/ref_rejected": -116.1875, "logps_train/rejected": -300.67620849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.310838222503662, "rewards_train/margins": 17.135982990264893, "rewards_train/rejected": -18.446821212768555, "step": 3882 }, { "epoch": 1.91, "learning_rate": 3.196173666424069e-07, "loss": 0.0001, "step": 3883 }, { "epoch": 1.91, "logps_train/chosen": -78.18418884277344, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -317.7985534667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4519639015197754, "rewards_train/margins": 17.652501583099365, "rewards_train/rejected": -19.10446548461914, "step": 3883 }, { "epoch": 1.91, "learning_rate": 3.1936428098734193e-07, "loss": 0.0, "step": 3884 }, { "epoch": 1.91, "logps_train/chosen": -76.96208190917969, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -308.8926696777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6023118495941162, "rewards_train/margins": 17.01391053199768, "rewards_train/rejected": -18.616222381591797, "step": 3884 }, { "epoch": 1.91, "learning_rate": 3.191112485487786e-07, "loss": 0.0, "step": 3885 }, { "epoch": 1.91, "logps_train/chosen": -74.93014526367188, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -310.06097412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0864713191986084, "rewards_train/margins": 17.58730149269104, "rewards_train/rejected": -18.67377281188965, "step": 3885 }, { "epoch": 1.91, "learning_rate": 3.188582694012621e-07, "loss": 0.0, "step": 3886 }, { "epoch": 1.91, "logps_train/chosen": -73.38155364990234, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -303.63104248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.163155198097229, "rewards_train/margins": 17.114697813987732, "rewards_train/rejected": -18.27785301208496, "step": 3886 }, { "epoch": 1.91, "learning_rate": 3.186053436193216e-07, "loss": 0.0, "step": 3887 }, { "epoch": 1.91, "logps_train/chosen": -78.94706726074219, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -310.4863586425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.474686622619629, "rewards_train/margins": 17.09836483001709, "rewards_train/rejected": -18.57305145263672, "step": 3887 }, { "epoch": 1.91, "learning_rate": 3.1835247127747095e-07, "loss": 0.0, "step": 3888 }, { "epoch": 1.91, "logps_train/chosen": -76.57849884033203, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -309.05908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3393924236297607, "rewards_train/margins": 16.93985676765442, "rewards_train/rejected": -18.27924919128418, "step": 3888 }, { "epoch": 1.91, "learning_rate": 3.180996524502081e-07, "loss": 0.0001, "step": 3889 }, { "epoch": 1.91, "logps_train/chosen": -71.92691802978516, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -303.036865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.069303274154663, "rewards_train/margins": 17.106699228286743, "rewards_train/rejected": -18.176002502441406, "step": 3889 }, { "epoch": 1.92, "learning_rate": 3.1784688721201493e-07, "loss": 0.0, "step": 3890 }, { "epoch": 1.92, "logps_train/chosen": -78.63322448730469, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -314.93988037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.434220790863037, "rewards_train/margins": 17.46557855606079, "rewards_train/rejected": -18.899799346923828, "step": 3890 }, { "epoch": 1.92, "learning_rate": 3.17594175637358e-07, "loss": 0.0, "step": 3891 }, { "epoch": 1.92, "logps_train/chosen": -79.17340087890625, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -331.51922607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2828668355941772, "rewards_train/margins": 18.96182692050934, "rewards_train/rejected": -20.244693756103516, "step": 3891 }, { "epoch": 1.92, "learning_rate": 3.1734151780068765e-07, "loss": 0.0, "step": 3892 }, { "epoch": 1.92, "logps_train/chosen": -78.6572265625, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -319.0833740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3798339366912842, "rewards_train/margins": 17.784850358963013, "rewards_train/rejected": -19.164684295654297, "step": 3892 }, { "epoch": 1.92, "learning_rate": 3.1708891377643864e-07, "loss": 0.0001, "step": 3893 }, { "epoch": 1.92, "logps_train/chosen": -80.42940521240234, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -309.9224853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.406075119972229, "rewards_train/margins": 17.171185851097107, "rewards_train/rejected": -18.577260971069336, "step": 3893 }, { "epoch": 1.92, "learning_rate": 3.168363636390301e-07, "loss": 0.0001, "step": 3894 }, { "epoch": 1.92, "logps_train/chosen": -76.33091735839844, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -322.48614501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1926628351211548, "rewards_train/margins": 18.458879828453064, "rewards_train/rejected": -19.65154266357422, "step": 3894 }, { "epoch": 1.92, "learning_rate": 3.1658386746286464e-07, "loss": 0.0003, "step": 3895 }, { "epoch": 1.92, "logps_train/chosen": -74.63304138183594, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -134.375, "logps_train/rejected": -323.541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0244373083114624, "rewards_train/margins": 17.88601267337799, "rewards_train/rejected": -18.910449981689453, "step": 3895 }, { "epoch": 1.92, "learning_rate": 3.1633142532232956e-07, "loss": 0.0, "step": 3896 }, { "epoch": 1.92, "logps_train/chosen": -77.9632568359375, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -311.2706298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4934940338134766, "rewards_train/margins": 17.246070861816406, "rewards_train/rejected": -18.739564895629883, "step": 3896 }, { "epoch": 1.92, "learning_rate": 3.1607903729179577e-07, "loss": 0.0, "step": 3897 }, { "epoch": 1.92, "logps_train/chosen": -82.99661254882812, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -323.00384521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6130404472351074, "rewards_train/margins": 17.660099506378174, "rewards_train/rejected": -19.27313995361328, "step": 3897 }, { "epoch": 1.92, "learning_rate": 3.158267034456187e-07, "loss": 0.0001, "step": 3898 }, { "epoch": 1.92, "logps_train/chosen": -76.53492736816406, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -306.634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2245627641677856, "rewards_train/margins": 16.94145095348358, "rewards_train/rejected": -18.166013717651367, "step": 3898 }, { "epoch": 1.92, "learning_rate": 3.1557442385813767e-07, "loss": 0.0, "step": 3899 }, { "epoch": 1.92, "logps_train/chosen": -77.65353393554688, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -314.89007568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3062710762023926, "rewards_train/margins": 17.570823192596436, "rewards_train/rejected": -18.877094268798828, "step": 3899 }, { "epoch": 1.92, "learning_rate": 3.153221986036757e-07, "loss": 0.0, "step": 3900 }, { "epoch": 1.92, "logps_train/chosen": -80.42915344238281, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -321.5902099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4670367240905762, "rewards_train/margins": 17.948235034942627, "rewards_train/rejected": -19.415271759033203, "step": 3900 }, { "epoch": 1.92, "learning_rate": 3.1507002775654026e-07, "loss": 0.0, "step": 3901 }, { "epoch": 1.92, "logps_train/chosen": -79.13636779785156, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -311.81939697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.433851718902588, "rewards_train/margins": 17.055219173431396, "rewards_train/rejected": -18.489070892333984, "step": 3901 }, { "epoch": 1.92, "learning_rate": 3.1481791139102244e-07, "loss": 0.0004, "step": 3902 }, { "epoch": 1.92, "logps_train/chosen": -79.49630737304688, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -313.2696533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6413791179656982, "rewards_train/margins": 17.358582735061646, "rewards_train/rejected": -18.999961853027344, "step": 3902 }, { "epoch": 1.92, "learning_rate": 3.145658495813974e-07, "loss": 0.0, "step": 3903 }, { "epoch": 1.92, "logps_train/chosen": -73.26445007324219, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -312.49468994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8157509565353394, "rewards_train/margins": 17.988600850105286, "rewards_train/rejected": -18.804351806640625, "step": 3903 }, { "epoch": 1.92, "learning_rate": 3.143138424019246e-07, "loss": 0.0, "step": 3904 }, { "epoch": 1.92, "logps_train/chosen": -77.95164489746094, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -316.6287841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3062483072280884, "rewards_train/margins": 17.430850386619568, "rewards_train/rejected": -18.737098693847656, "step": 3904 }, { "epoch": 1.92, "learning_rate": 3.1406188992684655e-07, "loss": 0.0, "step": 3905 }, { "epoch": 1.92, "logps_train/chosen": -72.53927612304688, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -313.62481689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7147676348686218, "rewards_train/margins": 18.020078480243683, "rewards_train/rejected": -18.734846115112305, "step": 3905 }, { "epoch": 1.92, "learning_rate": 3.1380999223039063e-07, "loss": 0.0, "step": 3906 }, { "epoch": 1.92, "logps_train/chosen": -75.14677429199219, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -307.08203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3149703741073608, "rewards_train/margins": 17.08620035648346, "rewards_train/rejected": -18.40117073059082, "step": 3906 }, { "epoch": 1.92, "learning_rate": 3.135581493867672e-07, "loss": 0.0, "step": 3907 }, { "epoch": 1.92, "logps_train/chosen": -79.24325561523438, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -311.97845458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.325692892074585, "rewards_train/margins": 17.285829782485962, "rewards_train/rejected": -18.611522674560547, "step": 3907 }, { "epoch": 1.92, "learning_rate": 3.133063614701712e-07, "loss": 0.0, "step": 3908 }, { "epoch": 1.92, "logps_train/chosen": -74.46186828613281, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -311.07464599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1728479862213135, "rewards_train/margins": 17.383350133895874, "rewards_train/rejected": -18.556198120117188, "step": 3908 }, { "epoch": 1.92, "learning_rate": 3.130546285547807e-07, "loss": 0.0, "step": 3909 }, { "epoch": 1.92, "logps_train/chosen": -78.97286987304688, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -317.81622314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.378000020980835, "rewards_train/margins": 17.706310510635376, "rewards_train/rejected": -19.08431053161621, "step": 3909 }, { "epoch": 1.93, "learning_rate": 3.1280295071475796e-07, "loss": 0.0, "step": 3910 }, { "epoch": 1.93, "logps_train/chosen": -72.27427673339844, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -119.125, "logps_train/rejected": -305.3863830566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9956897497177124, "rewards_train/margins": 17.626054406166077, "rewards_train/rejected": -18.62174415588379, "step": 3910 }, { "epoch": 1.93, "learning_rate": 3.125513280242494e-07, "loss": 0.0002, "step": 3911 }, { "epoch": 1.93, "logps_train/chosen": -73.73636627197266, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -311.0908508300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.876859188079834, "rewards_train/margins": 17.624903202056885, "rewards_train/rejected": -18.50176239013672, "step": 3911 }, { "epoch": 1.93, "learning_rate": 3.1229976055738425e-07, "loss": 0.0, "step": 3912 }, { "epoch": 1.93, "logps_train/chosen": -73.97477722167969, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -310.1321716308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1716471910476685, "rewards_train/margins": 17.431315302848816, "rewards_train/rejected": -18.602962493896484, "step": 3912 }, { "epoch": 1.93, "learning_rate": 3.1204824838827635e-07, "loss": 0.0002, "step": 3913 }, { "epoch": 1.93, "logps_train/chosen": -80.71525573730469, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -321.73907470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4078541994094849, "rewards_train/margins": 17.71546733379364, "rewards_train/rejected": -19.123321533203125, "step": 3913 }, { "epoch": 1.93, "learning_rate": 3.1179679159102267e-07, "loss": 0.0, "step": 3914 }, { "epoch": 1.93, "logps_train/chosen": -76.04376220703125, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -307.95452880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3524227142333984, "rewards_train/margins": 16.982091903686523, "rewards_train/rejected": -18.334514617919922, "step": 3914 }, { "epoch": 1.93, "learning_rate": 3.1154539023970404e-07, "loss": 0.0001, "step": 3915 }, { "epoch": 1.93, "logps_train/chosen": -79.19114685058594, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -325.99578857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4229726791381836, "rewards_train/margins": 18.16772174835205, "rewards_train/rejected": -19.590694427490234, "step": 3915 }, { "epoch": 1.93, "learning_rate": 3.1129404440838527e-07, "loss": 0.0, "step": 3916 }, { "epoch": 1.93, "logps_train/chosen": -73.2080307006836, "logps_train/ref_chosen": -61.59375, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -311.58001708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1612815856933594, "rewards_train/margins": 17.242618560791016, "rewards_train/rejected": -18.403900146484375, "step": 3916 }, { "epoch": 1.93, "learning_rate": 3.110427541711142e-07, "loss": 0.0, "step": 3917 }, { "epoch": 1.93, "logps_train/chosen": -74.85016632080078, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -119.125, "logps_train/rejected": -303.2601623535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2179510593414307, "rewards_train/margins": 17.1967351436615, "rewards_train/rejected": -18.41468620300293, "step": 3917 }, { "epoch": 1.93, "learning_rate": 3.107915196019229e-07, "loss": 0.0, "step": 3918 }, { "epoch": 1.93, "logps_train/chosen": -74.12394714355469, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -312.0921325683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1420328617095947, "rewards_train/margins": 17.47093939781189, "rewards_train/rejected": -18.612972259521484, "step": 3918 }, { "epoch": 1.93, "learning_rate": 3.105403407748266e-07, "loss": 0.0, "step": 3919 }, { "epoch": 1.93, "logps_train/chosen": -79.14391326904297, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -313.56256103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.305797815322876, "rewards_train/margins": 17.479366064071655, "rewards_train/rejected": -18.78516387939453, "step": 3919 }, { "epoch": 1.93, "learning_rate": 3.1028921776382427e-07, "loss": 0.0, "step": 3920 }, { "epoch": 1.93, "logps_train/chosen": -75.77466583251953, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -302.8054504394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2328135967254639, "rewards_train/margins": 16.919215440750122, "rewards_train/rejected": -18.152029037475586, "step": 3920 }, { "epoch": 1.93, "learning_rate": 3.1003815064289864e-07, "loss": 0.0, "step": 3921 }, { "epoch": 1.93, "logps_train/chosen": -74.85253143310547, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -309.54852294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2689934968948364, "rewards_train/margins": 17.37062442302704, "rewards_train/rejected": -18.639617919921875, "step": 3921 }, { "epoch": 1.93, "learning_rate": 3.0978713948601556e-07, "loss": 0.0001, "step": 3922 }, { "epoch": 1.93, "logps_train/chosen": -78.97708129882812, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -303.4228820800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4848178625106812, "rewards_train/margins": 16.579490303993225, "rewards_train/rejected": -18.064308166503906, "step": 3922 }, { "epoch": 1.93, "learning_rate": 3.095361843671249e-07, "loss": 0.0, "step": 3923 }, { "epoch": 1.93, "logps_train/chosen": -77.98626708984375, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -315.9024658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3220640420913696, "rewards_train/margins": 17.353728652000427, "rewards_train/rejected": -18.675792694091797, "step": 3923 }, { "epoch": 1.93, "learning_rate": 3.0928528536015924e-07, "loss": 0.0, "step": 3924 }, { "epoch": 1.93, "logps_train/chosen": -81.79042053222656, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -325.9420166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5226458311080933, "rewards_train/margins": 17.976537823677063, "rewards_train/rejected": -19.499183654785156, "step": 3924 }, { "epoch": 1.93, "learning_rate": 3.090344425390355e-07, "loss": 0.0, "step": 3925 }, { "epoch": 1.93, "logps_train/chosen": -77.35008239746094, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -313.33038330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.344041109085083, "rewards_train/margins": 17.33123278617859, "rewards_train/rejected": -18.675273895263672, "step": 3925 }, { "epoch": 1.93, "learning_rate": 3.0878365597765375e-07, "loss": 0.0001, "step": 3926 }, { "epoch": 1.93, "logps_train/chosen": -76.66218566894531, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -304.7717590332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2074782848358154, "rewards_train/margins": 16.85309910774231, "rewards_train/rejected": -18.060577392578125, "step": 3926 }, { "epoch": 1.93, "learning_rate": 3.08532925749897e-07, "loss": 0.0, "step": 3927 }, { "epoch": 1.93, "logps_train/chosen": -82.71788024902344, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -328.45343017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.681016445159912, "rewards_train/margins": 18.001047611236572, "rewards_train/rejected": -19.682064056396484, "step": 3927 }, { "epoch": 1.93, "learning_rate": 3.0828225192963255e-07, "loss": 0.0, "step": 3928 }, { "epoch": 1.93, "logps_train/chosen": -76.61650085449219, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -329.9171142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0443156957626343, "rewards_train/margins": 18.716827034950256, "rewards_train/rejected": -19.76114273071289, "step": 3928 }, { "epoch": 1.93, "learning_rate": 3.080316345907101e-07, "loss": 0.0, "step": 3929 }, { "epoch": 1.93, "logps_train/chosen": -82.40152740478516, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -316.2099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.602506399154663, "rewards_train/margins": 17.503156900405884, "rewards_train/rejected": -19.105663299560547, "step": 3929 }, { "epoch": 1.94, "learning_rate": 3.077810738069634e-07, "loss": 0.0003, "step": 3930 }, { "epoch": 1.94, "logps_train/chosen": -75.55098724365234, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -321.2903747558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.033174753189087, "rewards_train/margins": 17.94312834739685, "rewards_train/rejected": -18.976303100585938, "step": 3930 }, { "epoch": 1.94, "learning_rate": 3.075305696522097e-07, "loss": 0.0001, "step": 3931 }, { "epoch": 1.94, "logps_train/chosen": -78.89183044433594, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -309.39837646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7196033000946045, "rewards_train/margins": 17.126630067825317, "rewards_train/rejected": -18.846233367919922, "step": 3931 }, { "epoch": 1.94, "learning_rate": 3.0728012220024875e-07, "loss": 0.0, "step": 3932 }, { "epoch": 1.94, "logps_train/chosen": -72.30990600585938, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -300.08319091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.839853048324585, "rewards_train/margins": 16.747568368911743, "rewards_train/rejected": -17.587421417236328, "step": 3932 }, { "epoch": 1.94, "learning_rate": 3.0702973152486433e-07, "loss": 0.0, "step": 3933 }, { "epoch": 1.94, "logps_train/chosen": -83.74998474121094, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -313.2958679199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.985692024230957, "rewards_train/margins": 16.909323692321777, "rewards_train/rejected": -18.895015716552734, "step": 3933 }, { "epoch": 1.94, "learning_rate": 3.06779397699823e-07, "loss": 0.0, "step": 3934 }, { "epoch": 1.94, "logps_train/chosen": -76.33195495605469, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -314.422119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1801199913024902, "rewards_train/margins": 17.63694429397583, "rewards_train/rejected": -18.81706428527832, "step": 3934 }, { "epoch": 1.94, "learning_rate": 3.0652912079887485e-07, "loss": 0.0, "step": 3935 }, { "epoch": 1.94, "logps_train/chosen": -70.76345825195312, "logps_train/ref_chosen": -59.59375, "logps_train/ref_rejected": -118.9375, "logps_train/rejected": -305.2132873535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1170687675476074, "rewards_train/margins": 17.508751392364502, "rewards_train/rejected": -18.62582015991211, "step": 3935 }, { "epoch": 1.94, "learning_rate": 3.062789008957534e-07, "loss": 0.0, "step": 3936 }, { "epoch": 1.94, "logps_train/chosen": -73.83897399902344, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -309.4983825683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.053624153137207, "rewards_train/margins": 17.890501976013184, "rewards_train/rejected": -18.94412612915039, "step": 3936 }, { "epoch": 1.94, "learning_rate": 3.060287380641748e-07, "loss": 0.0, "step": 3937 }, { "epoch": 1.94, "logps_train/chosen": -79.95828247070312, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -318.2877502441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3572537899017334, "rewards_train/margins": 17.526745080947876, "rewards_train/rejected": -18.88399887084961, "step": 3937 }, { "epoch": 1.94, "learning_rate": 3.057786323778389e-07, "loss": 0.0, "step": 3938 }, { "epoch": 1.94, "logps_train/chosen": -82.47442626953125, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -322.025634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7030571699142456, "rewards_train/margins": 17.9859801530838, "rewards_train/rejected": -19.689037322998047, "step": 3938 }, { "epoch": 1.94, "learning_rate": 3.055285839104284e-07, "loss": 0.0, "step": 3939 }, { "epoch": 1.94, "logps_train/chosen": -79.65797424316406, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -316.60797119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6711682081222534, "rewards_train/margins": 17.519999623298645, "rewards_train/rejected": -19.1911678314209, "step": 3939 }, { "epoch": 1.94, "learning_rate": 3.052785927356093e-07, "loss": 0.0, "step": 3940 }, { "epoch": 1.94, "logps_train/chosen": -77.0350341796875, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -301.59307861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.393737554550171, "rewards_train/margins": 16.622703790664673, "rewards_train/rejected": -18.016441345214844, "step": 3940 }, { "epoch": 1.94, "learning_rate": 3.050286589270309e-07, "loss": 0.0002, "step": 3941 }, { "epoch": 1.94, "logps_train/chosen": -77.12545776367188, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -314.53033447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2473118305206299, "rewards_train/margins": 17.750256299972534, "rewards_train/rejected": -18.997568130493164, "step": 3941 }, { "epoch": 1.94, "learning_rate": 3.0477878255832503e-07, "loss": 0.0, "step": 3942 }, { "epoch": 1.94, "logps_train/chosen": -73.33552551269531, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -313.5223693847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.005330204963684, "rewards_train/margins": 17.79832351207733, "rewards_train/rejected": -18.803653717041016, "step": 3942 }, { "epoch": 1.94, "learning_rate": 3.045289637031073e-07, "loss": 0.0, "step": 3943 }, { "epoch": 1.94, "logps_train/chosen": -69.16522216796875, "logps_train/ref_chosen": -61.0, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -310.75164794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8150568008422852, "rewards_train/margins": 17.827296257019043, "rewards_train/rejected": -18.642353057861328, "step": 3943 }, { "epoch": 1.94, "learning_rate": 3.0427920243497554e-07, "loss": 0.0, "step": 3944 }, { "epoch": 1.94, "logps_train/chosen": -78.58287811279297, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -316.497802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3297722339630127, "rewards_train/margins": 17.61385416984558, "rewards_train/rejected": -18.943626403808594, "step": 3944 }, { "epoch": 1.94, "learning_rate": 3.040294988275116e-07, "loss": 0.0, "step": 3945 }, { "epoch": 1.94, "logps_train/chosen": -78.63107299804688, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -323.9154052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3935272693634033, "rewards_train/margins": 18.20299220085144, "rewards_train/rejected": -19.596519470214844, "step": 3945 }, { "epoch": 1.94, "learning_rate": 3.037798529542799e-07, "loss": 0.0, "step": 3946 }, { "epoch": 1.94, "logps_train/chosen": -80.07574462890625, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -319.9665832519531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.497027039527893, "rewards_train/margins": 17.780587553977966, "rewards_train/rejected": -19.27761459350586, "step": 3946 }, { "epoch": 1.94, "learning_rate": 3.0353026488882727e-07, "loss": 0.0, "step": 3947 }, { "epoch": 1.94, "logps_train/chosen": -75.2357177734375, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -314.25439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8833370804786682, "rewards_train/margins": 18.130336701869965, "rewards_train/rejected": -19.013673782348633, "step": 3947 }, { "epoch": 1.94, "learning_rate": 3.032807347046844e-07, "loss": 0.0, "step": 3948 }, { "epoch": 1.94, "logps_train/chosen": -76.96536254882812, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -302.7655944824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5189979076385498, "rewards_train/margins": 16.714593172073364, "rewards_train/rejected": -18.233591079711914, "step": 3948 }, { "epoch": 1.94, "learning_rate": 3.030312624753645e-07, "loss": 0.0, "step": 3949 }, { "epoch": 1.94, "logps_train/chosen": -75.79487609863281, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -319.9804992675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3798291683197021, "rewards_train/margins": 17.704303979873657, "rewards_train/rejected": -19.08413314819336, "step": 3949 }, { "epoch": 1.94, "learning_rate": 3.027818482743638e-07, "loss": 0.0, "step": 3950 }, { "epoch": 1.94, "logps_train/chosen": -77.74594116210938, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -324.9391784667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9864591360092163, "rewards_train/margins": 18.47328245639801, "rewards_train/rejected": -19.459741592407227, "step": 3950 }, { "epoch": 1.95, "learning_rate": 3.0253249217516137e-07, "loss": 0.0, "step": 3951 }, { "epoch": 1.95, "logps_train/chosen": -76.2029800415039, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -312.8683166503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1790382862091064, "rewards_train/margins": 17.659746885299683, "rewards_train/rejected": -18.83878517150879, "step": 3951 }, { "epoch": 1.95, "learning_rate": 3.022831942512192e-07, "loss": 0.0, "step": 3952 }, { "epoch": 1.95, "logps_train/chosen": -79.69792938232422, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -312.291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4600763320922852, "rewards_train/margins": 17.04329204559326, "rewards_train/rejected": -18.503368377685547, "step": 3952 }, { "epoch": 1.95, "learning_rate": 3.0203395457598215e-07, "loss": 0.0004, "step": 3953 }, { "epoch": 1.95, "logps_train/chosen": -78.159423828125, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -310.72393798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.472339153289795, "rewards_train/margins": 17.400347232818604, "rewards_train/rejected": -18.8726863861084, "step": 3953 }, { "epoch": 1.95, "learning_rate": 3.0178477322287787e-07, "loss": 0.0, "step": 3954 }, { "epoch": 1.95, "logps_train/chosen": -74.75362396240234, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -306.59027099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2298545837402344, "rewards_train/margins": 16.82301902770996, "rewards_train/rejected": -18.052873611450195, "step": 3954 }, { "epoch": 1.95, "learning_rate": 3.01535650265317e-07, "loss": 0.0, "step": 3955 }, { "epoch": 1.95, "logps_train/chosen": -81.7649917602539, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -320.2309875488281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7618505954742432, "rewards_train/margins": 17.540448904037476, "rewards_train/rejected": -19.30229949951172, "step": 3955 }, { "epoch": 1.95, "learning_rate": 3.0128658577669265e-07, "loss": 0.0, "step": 3956 }, { "epoch": 1.95, "logps_train/chosen": -76.59715270996094, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -304.6849365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3357901573181152, "rewards_train/margins": 16.87254762649536, "rewards_train/rejected": -18.208337783813477, "step": 3956 }, { "epoch": 1.95, "learning_rate": 3.0103757983038103e-07, "loss": 0.0, "step": 3957 }, { "epoch": 1.95, "logps_train/chosen": -73.2093505859375, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -306.8397216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1360225677490234, "rewards_train/margins": 17.352394104003906, "rewards_train/rejected": -18.48841667175293, "step": 3957 }, { "epoch": 1.95, "learning_rate": 3.0078863249974096e-07, "loss": 0.0, "step": 3958 }, { "epoch": 1.95, "logps_train/chosen": -78.57540130615234, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -334.3093566894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4027060270309448, "rewards_train/margins": 19.125593304634094, "rewards_train/rejected": -20.52829933166504, "step": 3958 }, { "epoch": 1.95, "learning_rate": 3.00539743858114e-07, "loss": 0.0, "step": 3959 }, { "epoch": 1.95, "logps_train/chosen": -75.26345825195312, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -309.8506164550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0829859972000122, "rewards_train/margins": 17.43215262889862, "rewards_train/rejected": -18.515138626098633, "step": 3959 }, { "epoch": 1.95, "learning_rate": 3.002909139788246e-07, "loss": 0.0, "step": 3960 }, { "epoch": 1.95, "logps_train/chosen": -78.05810546875, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -318.14837646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1647465229034424, "rewards_train/margins": 17.497942686080933, "rewards_train/rejected": -18.662689208984375, "step": 3960 }, { "epoch": 1.95, "learning_rate": 3.000421429351792e-07, "loss": 0.0, "step": 3961 }, { "epoch": 1.95, "logps_train/chosen": -80.14950561523438, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -308.3419494628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4424412250518799, "rewards_train/margins": 16.88091540336609, "rewards_train/rejected": -18.32335662841797, "step": 3961 }, { "epoch": 1.95, "learning_rate": 2.9979343080046775e-07, "loss": 0.0, "step": 3962 }, { "epoch": 1.95, "logps_train/chosen": -77.3169174194336, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -304.398681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.132814645767212, "rewards_train/margins": 16.896703481674194, "rewards_train/rejected": -18.029518127441406, "step": 3962 }, { "epoch": 1.95, "learning_rate": 2.995447776479628e-07, "loss": 0.0002, "step": 3963 }, { "epoch": 1.95, "logps_train/chosen": -85.53626251220703, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -323.64453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8514293432235718, "rewards_train/margins": 17.751989006996155, "rewards_train/rejected": -19.603418350219727, "step": 3963 }, { "epoch": 1.95, "learning_rate": 2.9929618355091887e-07, "loss": 0.0, "step": 3964 }, { "epoch": 1.95, "logps_train/chosen": -81.97856140136719, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -135.0, "logps_train/rejected": -326.5789794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4062542915344238, "rewards_train/margins": 17.748374462127686, "rewards_train/rejected": -19.15462875366211, "step": 3964 }, { "epoch": 1.95, "learning_rate": 2.990476485825736e-07, "loss": 0.0, "step": 3965 }, { "epoch": 1.95, "logps_train/chosen": -75.21073913574219, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -307.29754638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.004252314567566, "rewards_train/margins": 17.175454258918762, "rewards_train/rejected": -18.179706573486328, "step": 3965 }, { "epoch": 1.95, "learning_rate": 2.987991728161469e-07, "loss": 0.0001, "step": 3966 }, { "epoch": 1.95, "logps_train/chosen": -78.33983612060547, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -301.29168701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4814445972442627, "rewards_train/margins": 16.644549131393433, "rewards_train/rejected": -18.125993728637695, "step": 3966 }, { "epoch": 1.95, "learning_rate": 2.985507563248416e-07, "loss": 0.0, "step": 3967 }, { "epoch": 1.95, "logps_train/chosen": -80.32674407958984, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -313.6752624511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4637778997421265, "rewards_train/margins": 17.405215859413147, "rewards_train/rejected": -18.868993759155273, "step": 3967 }, { "epoch": 1.95, "learning_rate": 2.983023991818429e-07, "loss": 0.0, "step": 3968 }, { "epoch": 1.95, "logps_train/chosen": -79.67628479003906, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -319.423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3812024593353271, "rewards_train/margins": 17.86665177345276, "rewards_train/rejected": -19.247854232788086, "step": 3968 }, { "epoch": 1.95, "learning_rate": 2.980541014603183e-07, "loss": 0.0, "step": 3969 }, { "epoch": 1.95, "logps_train/chosen": -76.73983001708984, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -304.92333984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4148517847061157, "rewards_train/margins": 16.87015950679779, "rewards_train/rejected": -18.285011291503906, "step": 3969 }, { "epoch": 1.95, "learning_rate": 2.978058632334182e-07, "loss": 0.0001, "step": 3970 }, { "epoch": 1.95, "logps_train/chosen": -76.15957641601562, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -310.7003173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1624417304992676, "rewards_train/margins": 17.30299711227417, "rewards_train/rejected": -18.465438842773438, "step": 3970 }, { "epoch": 1.96, "learning_rate": 2.975576845742751e-07, "loss": 0.0, "step": 3971 }, { "epoch": 1.96, "logps_train/chosen": -80.51324462890625, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -315.0641784667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.67539644241333, "rewards_train/margins": 17.21051549911499, "rewards_train/rejected": -18.88591194152832, "step": 3971 }, { "epoch": 1.96, "learning_rate": 2.9730956555600404e-07, "loss": 0.0, "step": 3972 }, { "epoch": 1.96, "logps_train/chosen": -80.5865249633789, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -319.3199462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5766210556030273, "rewards_train/margins": 17.71104145050049, "rewards_train/rejected": -19.287662506103516, "step": 3972 }, { "epoch": 1.96, "learning_rate": 2.970615062517029e-07, "loss": 0.0, "step": 3973 }, { "epoch": 1.96, "logps_train/chosen": -73.86689758300781, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -315.32647705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.950068473815918, "rewards_train/margins": 17.7134370803833, "rewards_train/rejected": -18.66350555419922, "step": 3973 }, { "epoch": 1.96, "learning_rate": 2.9681350673445124e-07, "loss": 0.0, "step": 3974 }, { "epoch": 1.96, "logps_train/chosen": -76.1822509765625, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -316.19854736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046993017196655, "rewards_train/margins": 17.540059685707092, "rewards_train/rejected": -18.844758987426758, "step": 3974 }, { "epoch": 1.96, "learning_rate": 2.9656556707731174e-07, "loss": 0.0, "step": 3975 }, { "epoch": 1.96, "logps_train/chosen": -73.293701171875, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -315.30474853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0265862941741943, "rewards_train/margins": 17.897249460220337, "rewards_train/rejected": -18.92383575439453, "step": 3975 }, { "epoch": 1.96, "learning_rate": 2.9631768735332865e-07, "loss": 0.0, "step": 3976 }, { "epoch": 1.96, "logps_train/chosen": -82.25175476074219, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -318.1708679199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6112110614776611, "rewards_train/margins": 17.491034269332886, "rewards_train/rejected": -19.102245330810547, "step": 3976 }, { "epoch": 1.96, "learning_rate": 2.960698676355293e-07, "loss": 0.0001, "step": 3977 }, { "epoch": 1.96, "logps_train/chosen": -73.28376770019531, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -324.5377502441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.789851188659668, "rewards_train/margins": 18.68521213531494, "rewards_train/rejected": -19.47506332397461, "step": 3977 }, { "epoch": 1.96, "learning_rate": 2.9582210799692324e-07, "loss": 0.0, "step": 3978 }, { "epoch": 1.96, "logps_train/chosen": -76.46823120117188, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -319.89056396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953582763671875, "rewards_train/margins": 18.289548873901367, "rewards_train/rejected": -19.484907150268555, "step": 3978 }, { "epoch": 1.96, "learning_rate": 2.955744085105016e-07, "loss": 0.0, "step": 3979 }, { "epoch": 1.96, "logps_train/chosen": -79.99954223632812, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -306.49664306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3299832344055176, "rewards_train/margins": 16.90142011642456, "rewards_train/rejected": -18.231403350830078, "step": 3979 }, { "epoch": 1.96, "learning_rate": 2.9532676924923893e-07, "loss": 0.0, "step": 3980 }, { "epoch": 1.96, "logps_train/chosen": -75.06404113769531, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -305.7950439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1724679470062256, "rewards_train/margins": 17.15029740333557, "rewards_train/rejected": -18.322765350341797, "step": 3980 }, { "epoch": 1.96, "learning_rate": 2.95079190286091e-07, "loss": 0.0001, "step": 3981 }, { "epoch": 1.96, "logps_train/chosen": -76.4698486328125, "logps_train/ref_chosen": -60.5625, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -299.3974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.591516137123108, "rewards_train/margins": 16.34720265865326, "rewards_train/rejected": -17.938718795776367, "step": 3981 }, { "epoch": 1.96, "learning_rate": 2.9483167169399614e-07, "loss": 0.0004, "step": 3982 }, { "epoch": 1.96, "logps_train/chosen": -76.72129821777344, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -313.84197998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.251183032989502, "rewards_train/margins": 17.637406826019287, "rewards_train/rejected": -18.88858985900879, "step": 3982 }, { "epoch": 1.96, "learning_rate": 2.9458421354587564e-07, "loss": 0.0, "step": 3983 }, { "epoch": 1.96, "logps_train/chosen": -76.95806884765625, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -316.24853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1518622636795044, "rewards_train/margins": 17.885247111320496, "rewards_train/rejected": -19.037109375, "step": 3983 }, { "epoch": 1.96, "learning_rate": 2.9433681591463164e-07, "loss": 0.0, "step": 3984 }, { "epoch": 1.96, "logps_train/chosen": -80.06737518310547, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -310.30792236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3964836597442627, "rewards_train/margins": 17.153446912765503, "rewards_train/rejected": -18.549930572509766, "step": 3984 }, { "epoch": 1.96, "learning_rate": 2.940894788731496e-07, "loss": 0.0, "step": 3985 }, { "epoch": 1.96, "logps_train/chosen": -81.07522583007812, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -317.62725830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5921905040740967, "rewards_train/margins": 17.513453722000122, "rewards_train/rejected": -19.10564422607422, "step": 3985 }, { "epoch": 1.96, "learning_rate": 2.9384220249429644e-07, "loss": 0.0001, "step": 3986 }, { "epoch": 1.96, "logps_train/chosen": -79.37227630615234, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -308.8984069824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.474239706993103, "rewards_train/margins": 16.872878670692444, "rewards_train/rejected": -18.347118377685547, "step": 3986 }, { "epoch": 1.96, "learning_rate": 2.935949868509215e-07, "loss": 0.0, "step": 3987 }, { "epoch": 1.96, "logps_train/chosen": -79.41751098632812, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -328.23992919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.35825514793396, "rewards_train/margins": 18.44259476661682, "rewards_train/rejected": -19.80084991455078, "step": 3987 }, { "epoch": 1.96, "learning_rate": 2.9334783201585623e-07, "loss": 0.0, "step": 3988 }, { "epoch": 1.96, "logps_train/chosen": -76.96863555908203, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -325.68048095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.329969048500061, "rewards_train/margins": 18.28212583065033, "rewards_train/rejected": -19.61209487915039, "step": 3988 }, { "epoch": 1.96, "learning_rate": 2.9310073806191406e-07, "loss": 0.0, "step": 3989 }, { "epoch": 1.96, "logps_train/chosen": -75.50604248046875, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -311.1033020019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2340024709701538, "rewards_train/margins": 17.4971262216568, "rewards_train/rejected": -18.731128692626953, "step": 3989 }, { "epoch": 1.96, "learning_rate": 2.9285370506189066e-07, "loss": 0.0, "step": 3990 }, { "epoch": 1.96, "logps_train/chosen": -83.32359313964844, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -329.8262939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.718979835510254, "rewards_train/margins": 18.244215965270996, "rewards_train/rejected": -19.96319580078125, "step": 3990 }, { "epoch": 1.97, "learning_rate": 2.926067330885634e-07, "loss": 0.0, "step": 3991 }, { "epoch": 1.97, "logps_train/chosen": -74.80891418457031, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -310.5091247558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1273759603500366, "rewards_train/margins": 17.506497025489807, "rewards_train/rejected": -18.633872985839844, "step": 3991 }, { "epoch": 1.97, "learning_rate": 2.9235982221469225e-07, "loss": 0.0, "step": 3992 }, { "epoch": 1.97, "logps_train/chosen": -74.62149047851562, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -321.7174987792969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8776276111602783, "rewards_train/margins": 18.344221830368042, "rewards_train/rejected": -19.22184944152832, "step": 3992 }, { "epoch": 1.97, "learning_rate": 2.9211297251301823e-07, "loss": 0.0, "step": 3993 }, { "epoch": 1.97, "logps_train/chosen": -74.93215942382812, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -316.603271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2212427854537964, "rewards_train/margins": 17.96261990070343, "rewards_train/rejected": -19.183862686157227, "step": 3993 }, { "epoch": 1.97, "learning_rate": 2.918661840562655e-07, "loss": 0.0, "step": 3994 }, { "epoch": 1.97, "logps_train/chosen": -73.7356948852539, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -119.0, "logps_train/rejected": -300.40484619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1680034399032593, "rewards_train/margins": 16.973361611366272, "rewards_train/rejected": -18.14136505126953, "step": 3994 }, { "epoch": 1.97, "learning_rate": 2.9161945691713943e-07, "loss": 0.0001, "step": 3995 }, { "epoch": 1.97, "logps_train/chosen": -75.58967590332031, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -307.7998962402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.320051908493042, "rewards_train/margins": 17.247681856155396, "rewards_train/rejected": -18.567733764648438, "step": 3995 }, { "epoch": 1.97, "learning_rate": 2.9137279116832727e-07, "loss": 0.0, "step": 3996 }, { "epoch": 1.97, "logps_train/chosen": -76.75810241699219, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -302.4539794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.316191554069519, "rewards_train/margins": 16.78360092639923, "rewards_train/rejected": -18.09979248046875, "step": 3996 }, { "epoch": 1.97, "learning_rate": 2.9112618688249866e-07, "loss": 0.0, "step": 3997 }, { "epoch": 1.97, "logps_train/chosen": -77.38639068603516, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -326.732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0534343719482422, "rewards_train/margins": 18.481769561767578, "rewards_train/rejected": -19.53520393371582, "step": 3997 }, { "epoch": 1.97, "learning_rate": 2.908796441323049e-07, "loss": 0.0, "step": 3998 }, { "epoch": 1.97, "logps_train/chosen": -75.46994018554688, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -322.509521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1726288795471191, "rewards_train/margins": 18.27031660079956, "rewards_train/rejected": -19.44294548034668, "step": 3998 }, { "epoch": 1.97, "learning_rate": 2.9063316299037896e-07, "loss": 0.0, "step": 3999 }, { "epoch": 1.97, "logps_train/chosen": -81.9388427734375, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -326.8929748535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6029658317565918, "rewards_train/margins": 18.309963703155518, "rewards_train/rejected": -19.91292953491211, "step": 3999 }, { "epoch": 1.97, "learning_rate": 2.903867435293361e-07, "loss": 0.0001, "step": 4000 }, { "epoch": 1.97, "logps_train/chosen": -78.08457946777344, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -312.77423095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.343956470489502, "rewards_train/margins": 17.659785747528076, "rewards_train/rejected": -19.003742218017578, "step": 4000 }, { "epoch": 1.97, "learning_rate": 2.90140385821773e-07, "loss": 0.0, "step": 4001 }, { "epoch": 1.97, "logps_train/chosen": -76.7419204711914, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -319.4718017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1218000650405884, "rewards_train/margins": 18.280070662498474, "rewards_train/rejected": -19.401870727539062, "step": 4001 }, { "epoch": 1.97, "learning_rate": 2.898940899402683e-07, "loss": 0.0, "step": 4002 }, { "epoch": 1.97, "logps_train/chosen": -75.08562469482422, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -306.54547119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.189592719078064, "rewards_train/margins": 17.152848839759827, "rewards_train/rejected": -18.34244155883789, "step": 4002 }, { "epoch": 1.97, "learning_rate": 2.896478559573825e-07, "loss": 0.0001, "step": 4003 }, { "epoch": 1.97, "logps_train/chosen": -74.92103576660156, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -324.7446594238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8826801776885986, "rewards_train/margins": 18.561318159103394, "rewards_train/rejected": -19.443998336791992, "step": 4003 }, { "epoch": 1.97, "learning_rate": 2.8940168394565744e-07, "loss": 0.0, "step": 4004 }, { "epoch": 1.97, "logps_train/chosen": -72.17788696289062, "logps_train/ref_chosen": -61.09375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -301.47479248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1081453561782837, "rewards_train/margins": 16.903300642967224, "rewards_train/rejected": -18.011445999145508, "step": 4004 }, { "epoch": 1.97, "learning_rate": 2.891555739776177e-07, "loss": 0.0, "step": 4005 }, { "epoch": 1.97, "logps_train/chosen": -81.06999969482422, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -313.69110107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5281919240951538, "rewards_train/margins": 17.024412751197815, "rewards_train/rejected": -18.55260467529297, "step": 4005 }, { "epoch": 1.97, "learning_rate": 2.8890952612576844e-07, "loss": 0.0, "step": 4006 }, { "epoch": 1.97, "logps_train/chosen": -83.52259826660156, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -324.7078857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.746790885925293, "rewards_train/margins": 17.992941856384277, "rewards_train/rejected": -19.73973274230957, "step": 4006 }, { "epoch": 1.97, "learning_rate": 2.886635404625973e-07, "loss": 0.0, "step": 4007 }, { "epoch": 1.97, "logps_train/chosen": -76.87842559814453, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -306.72747802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421973705291748, "rewards_train/margins": 16.736422061920166, "rewards_train/rejected": -18.158395767211914, "step": 4007 }, { "epoch": 1.97, "learning_rate": 2.88417617060573e-07, "loss": 0.0001, "step": 4008 }, { "epoch": 1.97, "logps_train/chosen": -78.4547119140625, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -331.0090026855469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.219543218612671, "rewards_train/margins": 19.09048867225647, "rewards_train/rejected": -20.31003189086914, "step": 4008 }, { "epoch": 1.97, "learning_rate": 2.881717559921465e-07, "loss": 0.0, "step": 4009 }, { "epoch": 1.97, "logps_train/chosen": -73.90076446533203, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -316.77044677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0366343259811401, "rewards_train/margins": 17.966388821601868, "rewards_train/rejected": -19.003023147583008, "step": 4009 }, { "epoch": 1.97, "learning_rate": 2.879259573297501e-07, "loss": 0.0, "step": 4010 }, { "epoch": 1.97, "logps_train/chosen": -77.8882827758789, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -117.25, "logps_train/rejected": -300.94390869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5828710794448853, "rewards_train/margins": 16.783393263816833, "rewards_train/rejected": -18.36626434326172, "step": 4010 }, { "epoch": 1.97, "learning_rate": 2.8768022114579756e-07, "loss": 0.0001, "step": 4011 }, { "epoch": 1.97, "logps_train/chosen": -77.52682495117188, "logps_train/ref_chosen": -61.53125, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -313.09002685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5988736152648926, "rewards_train/margins": 17.35661268234253, "rewards_train/rejected": -18.955486297607422, "step": 4011 }, { "epoch": 1.98, "learning_rate": 2.874345475126847e-07, "loss": 0.0, "step": 4012 }, { "epoch": 1.98, "logps_train/chosen": -75.628662109375, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -306.2607116699219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1356698274612427, "rewards_train/margins": 17.254073977470398, "rewards_train/rejected": -18.38974380493164, "step": 4012 }, { "epoch": 1.98, "learning_rate": 2.8718893650278846e-07, "loss": 0.0, "step": 4013 }, { "epoch": 1.98, "logps_train/chosen": -77.98921203613281, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -304.0535888671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3610787391662598, "rewards_train/margins": 16.937249660491943, "rewards_train/rejected": -18.298328399658203, "step": 4013 }, { "epoch": 1.98, "learning_rate": 2.869433881884674e-07, "loss": 0.0, "step": 4014 }, { "epoch": 1.98, "logps_train/chosen": -79.4565658569336, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -320.7977600097656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3396015167236328, "rewards_train/margins": 17.957849502563477, "rewards_train/rejected": -19.29745101928711, "step": 4014 }, { "epoch": 1.98, "learning_rate": 2.8669790264206194e-07, "loss": 0.0, "step": 4015 }, { "epoch": 1.98, "logps_train/chosen": -82.40779876708984, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -317.4440612792969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7563557624816895, "rewards_train/margins": 17.493030071258545, "rewards_train/rejected": -19.249385833740234, "step": 4015 }, { "epoch": 1.98, "learning_rate": 2.864524799358937e-07, "loss": 0.0, "step": 4016 }, { "epoch": 1.98, "logps_train/chosen": -75.29351806640625, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -312.8111877441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3069887161254883, "rewards_train/margins": 17.87657070159912, "rewards_train/rejected": -19.18355941772461, "step": 4016 }, { "epoch": 1.98, "learning_rate": 2.862071201422659e-07, "loss": 0.0, "step": 4017 }, { "epoch": 1.98, "logps_train/chosen": -80.25413513183594, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -328.0762939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.532591700553894, "rewards_train/margins": 18.289780735969543, "rewards_train/rejected": -19.822372436523438, "step": 4017 }, { "epoch": 1.98, "learning_rate": 2.8596182333346284e-07, "loss": 0.0, "step": 4018 }, { "epoch": 1.98, "logps_train/chosen": -76.62147521972656, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -311.787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2659560441970825, "rewards_train/margins": 17.431506037712097, "rewards_train/rejected": -18.69746208190918, "step": 4018 }, { "epoch": 1.98, "learning_rate": 2.8571658958175124e-07, "loss": 0.0, "step": 4019 }, { "epoch": 1.98, "logps_train/chosen": -78.25480651855469, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -314.0981750488281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3970626592636108, "rewards_train/margins": 17.475988030433655, "rewards_train/rejected": -18.873050689697266, "step": 4019 }, { "epoch": 1.98, "learning_rate": 2.8547141895937814e-07, "loss": 0.0, "step": 4020 }, { "epoch": 1.98, "logps_train/chosen": -67.0676498413086, "logps_train/ref_chosen": -59.8125, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -305.48284912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7261493802070618, "rewards_train/margins": 17.541276156902313, "rewards_train/rejected": -18.267425537109375, "step": 4020 }, { "epoch": 1.98, "learning_rate": 2.852263115385725e-07, "loss": 0.0, "step": 4021 }, { "epoch": 1.98, "logps_train/chosen": -82.04913330078125, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -315.6722412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.49534273147583, "rewards_train/margins": 17.261680126190186, "rewards_train/rejected": -18.757022857666016, "step": 4021 }, { "epoch": 1.98, "learning_rate": 2.8498126739154505e-07, "loss": 0.0001, "step": 4022 }, { "epoch": 1.98, "logps_train/chosen": -78.50860595703125, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -314.0643310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.51240873336792, "rewards_train/margins": 17.503302097320557, "rewards_train/rejected": -19.015710830688477, "step": 4022 }, { "epoch": 1.98, "learning_rate": 2.8473628659048675e-07, "loss": 0.0, "step": 4023 }, { "epoch": 1.98, "logps_train/chosen": -77.32035827636719, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -305.71099853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3891161680221558, "rewards_train/margins": 16.88496220111847, "rewards_train/rejected": -18.274078369140625, "step": 4023 }, { "epoch": 1.98, "learning_rate": 2.84491369207571e-07, "loss": 0.0, "step": 4024 }, { "epoch": 1.98, "logps_train/chosen": -77.61348724365234, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -321.1184387207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2697718143463135, "rewards_train/margins": 18.13787055015564, "rewards_train/rejected": -19.407642364501953, "step": 4024 }, { "epoch": 1.98, "learning_rate": 2.8424651531495245e-07, "loss": 0.0, "step": 4025 }, { "epoch": 1.98, "logps_train/chosen": -78.08329010009766, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -327.3293151855469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3215614557266235, "rewards_train/margins": 18.512247443199158, "rewards_train/rejected": -19.83380889892578, "step": 4025 }, { "epoch": 1.98, "learning_rate": 2.840017249847659e-07, "loss": 0.0, "step": 4026 }, { "epoch": 1.98, "logps_train/chosen": -71.81593322753906, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -325.47589111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8800313472747803, "rewards_train/margins": 19.048956155776978, "rewards_train/rejected": -19.928987503051758, "step": 4026 }, { "epoch": 1.98, "learning_rate": 2.8375699828912894e-07, "loss": 0.0, "step": 4027 }, { "epoch": 1.98, "logps_train/chosen": -74.55984497070312, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -309.1805419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1643584966659546, "rewards_train/margins": 17.504523873329163, "rewards_train/rejected": -18.668882369995117, "step": 4027 }, { "epoch": 1.98, "learning_rate": 2.8351233530013943e-07, "loss": 0.0, "step": 4028 }, { "epoch": 1.98, "logps_train/chosen": -79.28437805175781, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -311.8392639160156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5483598709106445, "rewards_train/margins": 17.19254970550537, "rewards_train/rejected": -18.740909576416016, "step": 4028 }, { "epoch": 1.98, "learning_rate": 2.832677360898768e-07, "loss": 0.0001, "step": 4029 }, { "epoch": 1.98, "logps_train/chosen": -77.30766296386719, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -322.3754577636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.097464680671692, "rewards_train/margins": 17.975043177604675, "rewards_train/rejected": -19.072507858276367, "step": 4029 }, { "epoch": 1.98, "learning_rate": 2.8302320073040124e-07, "loss": 0.0, "step": 4030 }, { "epoch": 1.98, "logps_train/chosen": -75.49826049804688, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -303.5513000488281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3929409980773926, "rewards_train/margins": 16.7219557762146, "rewards_train/rejected": -18.114896774291992, "step": 4030 }, { "epoch": 1.98, "learning_rate": 2.8277872929375515e-07, "loss": 0.0, "step": 4031 }, { "epoch": 1.98, "logps_train/chosen": -75.4505615234375, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -305.4524841308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1920777559280396, "rewards_train/margins": 17.150781512260437, "rewards_train/rejected": -18.342859268188477, "step": 4031 }, { "epoch": 1.99, "learning_rate": 2.825343218519611e-07, "loss": 0.0001, "step": 4032 }, { "epoch": 1.99, "logps_train/chosen": -81.9504623413086, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -315.4527282714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8029568195343018, "rewards_train/margins": 17.197394132614136, "rewards_train/rejected": -19.000350952148438, "step": 4032 }, { "epoch": 1.99, "learning_rate": 2.822899784770231e-07, "loss": 0.0, "step": 4033 }, { "epoch": 1.99, "logps_train/chosen": -79.87002563476562, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -307.03692626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6707422733306885, "rewards_train/margins": 16.955119848251343, "rewards_train/rejected": -18.62586212158203, "step": 4033 }, { "epoch": 1.99, "learning_rate": 2.820456992409269e-07, "loss": 0.0001, "step": 4034 }, { "epoch": 1.99, "logps_train/chosen": -79.75808715820312, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -322.1361083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5613559484481812, "rewards_train/margins": 18.04385530948639, "rewards_train/rejected": -19.60521125793457, "step": 4034 }, { "epoch": 1.99, "learning_rate": 2.81801484215638e-07, "loss": 0.0001, "step": 4035 }, { "epoch": 1.99, "logps_train/chosen": -73.73446655273438, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -307.3570556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8898041248321533, "rewards_train/margins": 17.44292140007019, "rewards_train/rejected": -18.332725524902344, "step": 4035 }, { "epoch": 1.99, "learning_rate": 2.8155733347310417e-07, "loss": 0.0, "step": 4036 }, { "epoch": 1.99, "logps_train/chosen": -73.93620300292969, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -315.46923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0360033512115479, "rewards_train/margins": 18.09207320213318, "rewards_train/rejected": -19.128076553344727, "step": 4036 }, { "epoch": 1.99, "learning_rate": 2.813132470852543e-07, "loss": 0.0, "step": 4037 }, { "epoch": 1.99, "logps_train/chosen": -79.44896697998047, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -316.601806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3639150857925415, "rewards_train/margins": 17.74489915370941, "rewards_train/rejected": -19.108814239501953, "step": 4037 }, { "epoch": 1.99, "learning_rate": 2.810692251239971e-07, "loss": 0.0, "step": 4038 }, { "epoch": 1.99, "logps_train/chosen": -81.5491714477539, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -315.10198974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6338720321655273, "rewards_train/margins": 17.498204231262207, "rewards_train/rejected": -19.132076263427734, "step": 4038 }, { "epoch": 1.99, "learning_rate": 2.808252676612237e-07, "loss": 0.0001, "step": 4039 }, { "epoch": 1.99, "logps_train/chosen": -72.9754638671875, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -313.63714599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0387574434280396, "rewards_train/margins": 17.909480929374695, "rewards_train/rejected": -18.948238372802734, "step": 4039 }, { "epoch": 1.99, "learning_rate": 2.805813747688053e-07, "loss": 0.0, "step": 4040 }, { "epoch": 1.99, "logps_train/chosen": -77.12754821777344, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -316.4720458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3798439502716064, "rewards_train/margins": 18.066433668136597, "rewards_train/rejected": -19.446277618408203, "step": 4040 }, { "epoch": 1.99, "learning_rate": 2.8033754651859436e-07, "loss": 0.0, "step": 4041 }, { "epoch": 1.99, "logps_train/chosen": -81.70650482177734, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -308.4986877441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7914998531341553, "rewards_train/margins": 16.72174859046936, "rewards_train/rejected": -18.513248443603516, "step": 4041 }, { "epoch": 1.99, "learning_rate": 2.800937829824246e-07, "loss": 0.0, "step": 4042 }, { "epoch": 1.99, "logps_train/chosen": -78.96627044677734, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -321.90087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3713345527648926, "rewards_train/margins": 18.24448537826538, "rewards_train/rejected": -19.615819931030273, "step": 4042 }, { "epoch": 1.99, "learning_rate": 2.798500842321103e-07, "loss": 0.0002, "step": 4043 }, { "epoch": 1.99, "logps_train/chosen": -77.49345397949219, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -318.64654541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2680950164794922, "rewards_train/margins": 18.154766082763672, "rewards_train/rejected": -19.422861099243164, "step": 4043 }, { "epoch": 1.99, "learning_rate": 2.7960645033944664e-07, "loss": 0.0, "step": 4044 }, { "epoch": 1.99, "logps_train/chosen": -82.47725677490234, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -324.4306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6397664546966553, "rewards_train/margins": 17.878008127212524, "rewards_train/rejected": -19.51777458190918, "step": 4044 }, { "epoch": 1.99, "learning_rate": 2.7936288137620974e-07, "loss": 0.0, "step": 4045 }, { "epoch": 1.99, "logps_train/chosen": -74.2255630493164, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -315.2249755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.188718557357788, "rewards_train/margins": 18.030163049697876, "rewards_train/rejected": -19.218881607055664, "step": 4045 }, { "epoch": 1.99, "learning_rate": 2.791193774141569e-07, "loss": 0.0002, "step": 4046 }, { "epoch": 1.99, "logps_train/chosen": -80.16448974609375, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -327.37353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5577573776245117, "rewards_train/margins": 18.272419929504395, "rewards_train/rejected": -19.830177307128906, "step": 4046 }, { "epoch": 1.99, "learning_rate": 2.78875938525026e-07, "loss": 0.0, "step": 4047 }, { "epoch": 1.99, "logps_train/chosen": -77.20087432861328, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -307.2887878417969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.227412223815918, "rewards_train/margins": 16.860451698303223, "rewards_train/rejected": -18.08786392211914, "step": 4047 }, { "epoch": 1.99, "learning_rate": 2.7863256478053555e-07, "loss": 0.0001, "step": 4048 }, { "epoch": 1.99, "logps_train/chosen": -81.76316833496094, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -323.75238037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6739730834960938, "rewards_train/margins": 17.839839935302734, "rewards_train/rejected": -19.513813018798828, "step": 4048 }, { "epoch": 1.99, "learning_rate": 2.7838925625238537e-07, "loss": 0.0, "step": 4049 }, { "epoch": 1.99, "logps_train/chosen": -86.71458435058594, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -325.6942138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.867332935333252, "rewards_train/margins": 17.84193181991577, "rewards_train/rejected": -19.709264755249023, "step": 4049 }, { "epoch": 1.99, "learning_rate": 2.781460130122557e-07, "loss": 0.0, "step": 4050 }, { "epoch": 1.99, "logps_train/chosen": -79.36630249023438, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -311.6898498535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2949559688568115, "rewards_train/margins": 17.340582132339478, "rewards_train/rejected": -18.63553810119629, "step": 4050 }, { "epoch": 1.99, "learning_rate": 2.7790283513180736e-07, "loss": 0.0, "step": 4051 }, { "epoch": 1.99, "logps_train/chosen": -79.01826477050781, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -117.1875, "logps_train/rejected": -290.23077392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5828320980072021, "rewards_train/margins": 15.720764398574829, "rewards_train/rejected": -17.30359649658203, "step": 4051 }, { "epoch": 2.0, "learning_rate": 2.7765972268268273e-07, "loss": 0.0, "step": 4052 }, { "epoch": 2.0, "logps_train/chosen": -84.55000305175781, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -335.137451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5994830131530762, "rewards_train/margins": 18.680372714996338, "rewards_train/rejected": -20.279855728149414, "step": 4052 }, { "epoch": 2.0, "learning_rate": 2.7741667573650406e-07, "loss": 0.0, "step": 4053 }, { "epoch": 2.0, "logps_train/chosen": -78.8614730834961, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -325.3182373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5137357711791992, "rewards_train/margins": 18.432982444763184, "rewards_train/rejected": -19.946718215942383, "step": 4053 }, { "epoch": 2.0, "learning_rate": 2.7717369436487463e-07, "loss": 0.0001, "step": 4054 }, { "epoch": 2.0, "logps_train/chosen": -71.35215759277344, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -312.19598388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8594838380813599, "rewards_train/margins": 17.859430193901062, "rewards_train/rejected": -18.718914031982422, "step": 4054 }, { "epoch": 2.0, "learning_rate": 2.769307786393785e-07, "loss": 0.0, "step": 4055 }, { "epoch": 2.0, "logps_train/chosen": -78.65130615234375, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -311.63543701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5902771949768066, "rewards_train/margins": 17.299344539642334, "rewards_train/rejected": -18.88962173461914, "step": 4055 }, { "epoch": 2.0, "learning_rate": 2.766879286315799e-07, "loss": 0.0, "step": 4056 }, { "epoch": 2.0, "logps_train/chosen": -76.75469207763672, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -317.545166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1158500909805298, "rewards_train/margins": 17.734663367271423, "rewards_train/rejected": -18.850513458251953, "step": 4056 }, { "epoch": 2.0, "learning_rate": 2.764451444130246e-07, "loss": 0.0, "step": 4057 }, { "epoch": 2.0, "logps_train/chosen": -74.2298355102539, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -330.7829895019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.165122389793396, "rewards_train/margins": 18.860005021095276, "rewards_train/rejected": -20.025127410888672, "step": 4057 }, { "epoch": 2.0, "learning_rate": 2.762024260552383e-07, "loss": 0.0, "step": 4058 }, { "epoch": 2.0, "logps_train/chosen": -76.7056884765625, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -321.23126220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.182018518447876, "rewards_train/margins": 18.062204122543335, "rewards_train/rejected": -19.24422264099121, "step": 4058 }, { "epoch": 2.0, "learning_rate": 2.7595977362972745e-07, "loss": 0.0001, "step": 4059 }, { "epoch": 2.0, "logps_train/chosen": -80.19671630859375, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -314.35467529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3283137083053589, "rewards_train/margins": 17.466673016548157, "rewards_train/rejected": -18.794986724853516, "step": 4059 }, { "epoch": 2.0, "learning_rate": 2.7571718720797877e-07, "loss": 0.0, "step": 4060 }, { "epoch": 2.0, "logps_train/chosen": -81.63119506835938, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -330.41571044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3595058917999268, "rewards_train/margins": 18.459065198898315, "rewards_train/rejected": -19.818571090698242, "step": 4060 }, { "epoch": 2.0, "learning_rate": 2.7547466686146036e-07, "loss": 0.0, "step": 4061 }, { "epoch": 2.0, "logps_train/chosen": -76.68798828125, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -316.31707763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1848630905151367, "rewards_train/margins": 17.855534553527832, "rewards_train/rejected": -19.04039764404297, "step": 4061 }, { "epoch": 2.0, "learning_rate": 2.752322126616202e-07, "loss": 0.0, "step": 4062 }, { "epoch": 2.0, "logps_train/chosen": -77.11100769042969, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -317.73345947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3152999877929688, "rewards_train/margins": 17.897300720214844, "rewards_train/rejected": -19.212600708007812, "step": 4062 }, { "epoch": 2.0, "learning_rate": 2.749898246798866e-07, "loss": 0.0, "step": 4063 }, { "epoch": 2.0, "logps_train/chosen": -75.8868408203125, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -312.076416015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.314368486404419, "rewards_train/margins": 17.232969999313354, "rewards_train/rejected": -18.547338485717773, "step": 4063 }, { "epoch": 2.0, "learning_rate": 2.747475029876692e-07, "loss": 0.0001, "step": 4064 }, { "epoch": 2.0, "logps_train/chosen": -79.31533813476562, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -318.2783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5856846570968628, "rewards_train/margins": 17.636876225471497, "rewards_train/rejected": -19.22256088256836, "step": 4064 }, { "epoch": 2.0, "learning_rate": 2.7450524765635737e-07, "loss": 0.0, "step": 4065 }, { "epoch": 2.0, "logps_train/chosen": -79.52823638916016, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -324.45538330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4343668222427368, "rewards_train/margins": 17.942716002464294, "rewards_train/rejected": -19.37708282470703, "step": 4065 }, { "epoch": 2.0, "learning_rate": 2.7426305875732093e-07, "loss": 0.0, "step": 4066 }, { "epoch": 2.0, "logps_train/chosen": -72.5888442993164, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -308.406494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9401346445083618, "rewards_train/margins": 17.721314787864685, "rewards_train/rejected": -18.661449432373047, "step": 4066 }, { "epoch": 2.0, "learning_rate": 2.7402093636191084e-07, "loss": 0.0, "step": 4067 }, { "epoch": 2.0, "logps_train/chosen": -77.42454528808594, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -311.351806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4209704399108887, "rewards_train/margins": 17.351173877716064, "rewards_train/rejected": -18.772144317626953, "step": 4067 }, { "epoch": 2.0, "learning_rate": 2.737788805414577e-07, "loss": 0.0, "step": 4068 }, { "epoch": 2.0, "logps_train/chosen": -80.96177673339844, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -324.5726318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4786968231201172, "rewards_train/margins": 18.102102279663086, "rewards_train/rejected": -19.580799102783203, "step": 4068 }, { "epoch": 2.0, "learning_rate": 2.7353689136727285e-07, "loss": 0.0, "step": 4069 }, { "epoch": 2.0, "logps_train/chosen": -78.49154663085938, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -323.76287841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4100438356399536, "rewards_train/margins": 18.32845103740692, "rewards_train/rejected": -19.738494873046875, "step": 4069 }, { "epoch": 2.0, "learning_rate": 2.7329496891064777e-07, "loss": 0.0, "step": 4070 }, { "epoch": 2.0, "logps_train/chosen": -77.5113525390625, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -313.73077392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4047484397888184, "rewards_train/margins": 17.6007981300354, "rewards_train/rejected": -19.00554656982422, "step": 4070 }, { "epoch": 2.0, "learning_rate": 2.7305311324285503e-07, "loss": 0.0, "step": 4071 }, { "epoch": 2.0, "logps_train/chosen": -80.45388793945312, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -326.59326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.745389461517334, "rewards_train/margins": 18.27101755142212, "rewards_train/rejected": -20.016407012939453, "step": 4071 }, { "epoch": 2.0, "learning_rate": 2.7281132443514603e-07, "loss": 0.0, "step": 4072 }, { "epoch": 2.0, "logps_train/chosen": -80.8139419555664, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -325.0950012207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.608102798461914, "rewards_train/margins": 17.98703956604004, "rewards_train/rejected": -19.595142364501953, "step": 4072 }, { "epoch": 2.01, "learning_rate": 2.7256960255875396e-07, "loss": 0.0, "step": 4073 }, { "epoch": 2.01, "logps_train/chosen": -81.99911499023438, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -134.625, "logps_train/rejected": -336.9673156738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4852137565612793, "rewards_train/margins": 18.75175142288208, "rewards_train/rejected": -20.23696517944336, "step": 4073 }, { "epoch": 2.01, "learning_rate": 2.7232794768489186e-07, "loss": 0.0, "step": 4074 }, { "epoch": 2.01, "logps_train/chosen": -78.48457336425781, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -320.3896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4359571933746338, "rewards_train/margins": 17.597244024276733, "rewards_train/rejected": -19.033201217651367, "step": 4074 }, { "epoch": 2.01, "learning_rate": 2.7208635988475235e-07, "loss": 0.0001, "step": 4075 }, { "epoch": 2.01, "logps_train/chosen": -77.82166290283203, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -314.7608642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3944952487945557, "rewards_train/margins": 17.558154821395874, "rewards_train/rejected": -18.95265007019043, "step": 4075 }, { "epoch": 2.01, "learning_rate": 2.7184483922950936e-07, "loss": 0.0, "step": 4076 }, { "epoch": 2.01, "logps_train/chosen": -82.28944396972656, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -309.2583312988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6207900047302246, "rewards_train/margins": 16.930628299713135, "rewards_train/rejected": -18.55141830444336, "step": 4076 }, { "epoch": 2.01, "learning_rate": 2.716033857903162e-07, "loss": 0.0, "step": 4077 }, { "epoch": 2.01, "logps_train/chosen": -78.99813079833984, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -317.72564697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.337850570678711, "rewards_train/margins": 17.57451057434082, "rewards_train/rejected": -18.91236114501953, "step": 4077 }, { "epoch": 2.01, "learning_rate": 2.713619996383066e-07, "loss": 0.0, "step": 4078 }, { "epoch": 2.01, "logps_train/chosen": -79.27662658691406, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -321.552978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2699474096298218, "rewards_train/margins": 17.845409035682678, "rewards_train/rejected": -19.1153564453125, "step": 4078 }, { "epoch": 2.01, "learning_rate": 2.711206808445949e-07, "loss": 0.0, "step": 4079 }, { "epoch": 2.01, "logps_train/chosen": -73.72537231445312, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -303.23748779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9576447010040283, "rewards_train/margins": 17.224060773849487, "rewards_train/rejected": -18.181705474853516, "step": 4079 }, { "epoch": 2.01, "learning_rate": 2.7087942948027503e-07, "loss": 0.0, "step": 4080 }, { "epoch": 2.01, "logps_train/chosen": -70.79524230957031, "logps_train/ref_chosen": -60.5625, "logps_train/ref_rejected": -119.1875, "logps_train/rejected": -300.55572509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234448909759521, "rewards_train/margins": 17.110740900039673, "rewards_train/rejected": -18.134185791015625, "step": 4080 }, { "epoch": 2.01, "learning_rate": 2.7063824561642127e-07, "loss": 0.0, "step": 4081 }, { "epoch": 2.01, "logps_train/chosen": -91.73683166503906, "logps_train/ref_chosen": -68.5, "logps_train/ref_rejected": -133.0, "logps_train/rejected": -332.63006591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.325098991394043, "rewards_train/margins": 17.63722324371338, "rewards_train/rejected": -19.962322235107422, "step": 4081 }, { "epoch": 2.01, "learning_rate": 2.703971293240879e-07, "loss": 0.0, "step": 4082 }, { "epoch": 2.01, "logps_train/chosen": -82.76478576660156, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -327.3893737792969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7520155906677246, "rewards_train/margins": 17.968563556671143, "rewards_train/rejected": -19.720579147338867, "step": 4082 }, { "epoch": 2.01, "learning_rate": 2.7015608067430964e-07, "loss": 0.0, "step": 4083 }, { "epoch": 2.01, "logps_train/chosen": -75.068603515625, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -306.22296142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2416259050369263, "rewards_train/margins": 17.013192057609558, "rewards_train/rejected": -18.254817962646484, "step": 4083 }, { "epoch": 2.01, "learning_rate": 2.69915099738101e-07, "loss": 0.0, "step": 4084 }, { "epoch": 2.01, "logps_train/chosen": -74.07455444335938, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -310.2655029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0615811347961426, "rewards_train/margins": 17.573562145233154, "rewards_train/rejected": -18.635143280029297, "step": 4084 }, { "epoch": 2.01, "learning_rate": 2.6967418658645633e-07, "loss": 0.0, "step": 4085 }, { "epoch": 2.01, "logps_train/chosen": -77.68359375, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -316.4339599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3237794637680054, "rewards_train/margins": 17.902477860450745, "rewards_train/rejected": -19.22625732421875, "step": 4085 }, { "epoch": 2.01, "learning_rate": 2.6943334129035087e-07, "loss": 0.0, "step": 4086 }, { "epoch": 2.01, "logps_train/chosen": -74.94511413574219, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -312.2886962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2008593082427979, "rewards_train/margins": 17.710090398788452, "rewards_train/rejected": -18.91094970703125, "step": 4086 }, { "epoch": 2.01, "learning_rate": 2.691925639207385e-07, "loss": 0.0001, "step": 4087 }, { "epoch": 2.01, "logps_train/chosen": -77.48872375488281, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -313.2880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2844195365905762, "rewards_train/margins": 17.81758165359497, "rewards_train/rejected": -19.102001190185547, "step": 4087 }, { "epoch": 2.01, "learning_rate": 2.689518545485542e-07, "loss": 0.0, "step": 4088 }, { "epoch": 2.01, "logps_train/chosen": -79.13865661621094, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -319.2385559082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.345799446105957, "rewards_train/margins": 18.001980781555176, "rewards_train/rejected": -19.347780227661133, "step": 4088 }, { "epoch": 2.01, "learning_rate": 2.6871121324471304e-07, "loss": 0.0001, "step": 4089 }, { "epoch": 2.01, "logps_train/chosen": -75.3631820678711, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -315.11962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2909083366394043, "rewards_train/margins": 17.65669870376587, "rewards_train/rejected": -18.947607040405273, "step": 4089 }, { "epoch": 2.01, "learning_rate": 2.684706400801087e-07, "loss": 0.0, "step": 4090 }, { "epoch": 2.01, "logps_train/chosen": -76.47058868408203, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -314.9188232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.439393162727356, "rewards_train/margins": 17.591551661491394, "rewards_train/rejected": -19.03094482421875, "step": 4090 }, { "epoch": 2.01, "learning_rate": 2.682301351256163e-07, "loss": 0.0, "step": 4091 }, { "epoch": 2.01, "logps_train/chosen": -76.74067687988281, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -318.0322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.118159532546997, "rewards_train/margins": 18.228031873703003, "rewards_train/rejected": -19.34619140625, "step": 4091 }, { "epoch": 2.01, "learning_rate": 2.6798969845209e-07, "loss": 0.0, "step": 4092 }, { "epoch": 2.01, "logps_train/chosen": -75.51899719238281, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -308.2176208496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.187300205230713, "rewards_train/margins": 17.14266347885132, "rewards_train/rejected": -18.32996368408203, "step": 4092 }, { "epoch": 2.02, "learning_rate": 2.6774933013036393e-07, "loss": 0.0002, "step": 4093 }, { "epoch": 2.02, "logps_train/chosen": -80.18396759033203, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -330.09075927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.509070634841919, "rewards_train/margins": 18.43120837211609, "rewards_train/rejected": -19.940279006958008, "step": 4093 }, { "epoch": 2.02, "learning_rate": 2.6750903023125256e-07, "loss": 0.0, "step": 4094 }, { "epoch": 2.02, "logps_train/chosen": -79.3045654296875, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -325.5199279785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6153688430786133, "rewards_train/margins": 18.313969612121582, "rewards_train/rejected": -19.929338455200195, "step": 4094 }, { "epoch": 2.02, "learning_rate": 2.6726879882554967e-07, "loss": 0.0, "step": 4095 }, { "epoch": 2.02, "logps_train/chosen": -76.33983612060547, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -313.04693603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1709465980529785, "rewards_train/margins": 17.8211989402771, "rewards_train/rejected": -18.992145538330078, "step": 4095 }, { "epoch": 2.02, "learning_rate": 2.6702863598402904e-07, "loss": 0.0, "step": 4096 }, { "epoch": 2.02, "logps_train/chosen": -78.935302734375, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -325.3426818847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3744139671325684, "rewards_train/margins": 18.49188470840454, "rewards_train/rejected": -19.86629867553711, "step": 4096 }, { "epoch": 2.02, "learning_rate": 2.6678854177744413e-07, "loss": 0.0, "step": 4097 }, { "epoch": 2.02, "logps_train/chosen": -80.53630065917969, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -320.5830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2823901176452637, "rewards_train/margins": 17.789095401763916, "rewards_train/rejected": -19.07148551940918, "step": 4097 }, { "epoch": 2.02, "learning_rate": 2.6654851627652865e-07, "loss": 0.0, "step": 4098 }, { "epoch": 2.02, "logps_train/chosen": -77.14102172851562, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -312.6358642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3261628150939941, "rewards_train/margins": 17.48996114730835, "rewards_train/rejected": -18.816123962402344, "step": 4098 }, { "epoch": 2.02, "learning_rate": 2.663085595519956e-07, "loss": 0.0, "step": 4099 }, { "epoch": 2.02, "logps_train/chosen": -82.94939422607422, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -133.0, "logps_train/rejected": -336.494873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4805353879928589, "rewards_train/margins": 18.866220593452454, "rewards_train/rejected": -20.346755981445312, "step": 4099 }, { "epoch": 2.02, "learning_rate": 2.660686716745375e-07, "loss": 0.0, "step": 4100 }, { "epoch": 2.02, "logps_train/chosen": -76.04910278320312, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -315.98211669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2686314582824707, "rewards_train/margins": 17.546184062957764, "rewards_train/rejected": -18.814815521240234, "step": 4100 }, { "epoch": 2.02, "learning_rate": 2.658288527148275e-07, "loss": 0.0, "step": 4101 }, { "epoch": 2.02, "logps_train/chosen": -71.80258178710938, "logps_train/ref_chosen": -61.09375, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -301.4271240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0721521377563477, "rewards_train/margins": 17.027642250061035, "rewards_train/rejected": -18.099794387817383, "step": 4101 }, { "epoch": 2.02, "learning_rate": 2.6558910274351764e-07, "loss": 0.0001, "step": 4102 }, { "epoch": 2.02, "logps_train/chosen": -81.20648956298828, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -316.3897399902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.558637261390686, "rewards_train/margins": 17.228969931602478, "rewards_train/rejected": -18.787607192993164, "step": 4102 }, { "epoch": 2.02, "learning_rate": 2.6534942183123964e-07, "loss": 0.0001, "step": 4103 }, { "epoch": 2.02, "logps_train/chosen": -80.16558837890625, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -314.78045654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.604938268661499, "rewards_train/margins": 17.541271448135376, "rewards_train/rejected": -19.146209716796875, "step": 4103 }, { "epoch": 2.02, "learning_rate": 2.6510981004860547e-07, "loss": 0.0, "step": 4104 }, { "epoch": 2.02, "logps_train/chosen": -76.79047393798828, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -309.9276123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2693796157836914, "rewards_train/margins": 17.3439359664917, "rewards_train/rejected": -18.61331558227539, "step": 4104 }, { "epoch": 2.02, "learning_rate": 2.6487026746620633e-07, "loss": 0.0, "step": 4105 }, { "epoch": 2.02, "logps_train/chosen": -83.07884216308594, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -329.245849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6697494983673096, "rewards_train/margins": 18.119778394699097, "rewards_train/rejected": -19.789527893066406, "step": 4105 }, { "epoch": 2.02, "learning_rate": 2.64630794154613e-07, "loss": 0.0, "step": 4106 }, { "epoch": 2.02, "logps_train/chosen": -79.01593017578125, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -317.9759216308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2096500396728516, "rewards_train/margins": 17.988576889038086, "rewards_train/rejected": -19.198226928710938, "step": 4106 }, { "epoch": 2.02, "learning_rate": 2.6439139018437585e-07, "loss": 0.0001, "step": 4107 }, { "epoch": 2.02, "logps_train/chosen": -78.05824279785156, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -315.66497802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4722546339035034, "rewards_train/margins": 17.566510319709778, "rewards_train/rejected": -19.03876495361328, "step": 4107 }, { "epoch": 2.02, "learning_rate": 2.6415205562602504e-07, "loss": 0.0, "step": 4108 }, { "epoch": 2.02, "logps_train/chosen": -89.98902893066406, "logps_train/ref_chosen": -68.5625, "logps_train/ref_rejected": -133.625, "logps_train/rejected": -330.7928466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1444101333618164, "rewards_train/margins": 17.570713996887207, "rewards_train/rejected": -19.715124130249023, "step": 4108 }, { "epoch": 2.02, "learning_rate": 2.6391279055006986e-07, "loss": 0.0, "step": 4109 }, { "epoch": 2.02, "logps_train/chosen": -76.83341979980469, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -313.83502197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3693771362304688, "rewards_train/margins": 17.715984344482422, "rewards_train/rejected": -19.08536148071289, "step": 4109 }, { "epoch": 2.02, "learning_rate": 2.6367359502699986e-07, "loss": 0.0001, "step": 4110 }, { "epoch": 2.02, "logps_train/chosen": -77.8102798461914, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -327.54901123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1238012313842773, "rewards_train/margins": 18.851414680480957, "rewards_train/rejected": -19.975215911865234, "step": 4110 }, { "epoch": 2.02, "learning_rate": 2.6343446912728346e-07, "loss": 0.0, "step": 4111 }, { "epoch": 2.02, "logps_train/chosen": -81.21195220947266, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -325.8689270019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5430700778961182, "rewards_train/margins": 18.135717153549194, "rewards_train/rejected": -19.678787231445312, "step": 4111 }, { "epoch": 2.02, "learning_rate": 2.631954129213685e-07, "loss": 0.0, "step": 4112 }, { "epoch": 2.02, "logps_train/chosen": -84.17678833007812, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -323.21258544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7414580583572388, "rewards_train/margins": 17.933316111564636, "rewards_train/rejected": -19.674774169921875, "step": 4112 }, { "epoch": 2.03, "learning_rate": 2.6295642647968304e-07, "loss": 0.0, "step": 4113 }, { "epoch": 2.03, "logps_train/chosen": -79.03653717041016, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -310.89984130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6083412170410156, "rewards_train/margins": 17.014358520507812, "rewards_train/rejected": -18.622699737548828, "step": 4113 }, { "epoch": 2.03, "learning_rate": 2.627175098726338e-07, "loss": 0.0, "step": 4114 }, { "epoch": 2.03, "logps_train/chosen": -85.75045776367188, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -327.17254638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9210420846939087, "rewards_train/margins": 18.08166253566742, "rewards_train/rejected": -20.002704620361328, "step": 4114 }, { "epoch": 2.03, "learning_rate": 2.6247866317060707e-07, "loss": 0.0, "step": 4115 }, { "epoch": 2.03, "logps_train/chosen": -78.05812072753906, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -327.76812744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0989272594451904, "rewards_train/margins": 18.39214253425598, "rewards_train/rejected": -19.491069793701172, "step": 4115 }, { "epoch": 2.03, "learning_rate": 2.6223988644396915e-07, "loss": 0.0001, "step": 4116 }, { "epoch": 2.03, "logps_train/chosen": -71.34471893310547, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -310.4307556152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7833003401756287, "rewards_train/margins": 17.894979536533356, "rewards_train/rejected": -18.678279876708984, "step": 4116 }, { "epoch": 2.03, "learning_rate": 2.62001179763065e-07, "loss": 0.0, "step": 4117 }, { "epoch": 2.03, "logps_train/chosen": -74.95133972167969, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -317.6156005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1894700527191162, "rewards_train/margins": 17.928926706314087, "rewards_train/rejected": -19.118396759033203, "step": 4117 }, { "epoch": 2.03, "learning_rate": 2.617625431982193e-07, "loss": 0.0, "step": 4118 }, { "epoch": 2.03, "logps_train/chosen": -75.57073211669922, "logps_train/ref_chosen": -60.1875, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -312.1388854980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5395925045013428, "rewards_train/margins": 17.61399483680725, "rewards_train/rejected": -19.153587341308594, "step": 4118 }, { "epoch": 2.03, "learning_rate": 2.615239768197357e-07, "loss": 0.0, "step": 4119 }, { "epoch": 2.03, "logps_train/chosen": -80.83987426757812, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -311.925537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6813514232635498, "rewards_train/margins": 17.21945309638977, "rewards_train/rejected": -18.90080451965332, "step": 4119 }, { "epoch": 2.03, "learning_rate": 2.6128548069789797e-07, "loss": 0.0003, "step": 4120 }, { "epoch": 2.03, "logps_train/chosen": -76.58877563476562, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -315.52740478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2895169258117676, "rewards_train/margins": 17.847400188446045, "rewards_train/rejected": -19.136917114257812, "step": 4120 }, { "epoch": 2.03, "learning_rate": 2.6104705490296834e-07, "loss": 0.0, "step": 4121 }, { "epoch": 2.03, "logps_train/chosen": -78.90301513671875, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -315.97979736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5440609455108643, "rewards_train/margins": 17.452208280563354, "rewards_train/rejected": -18.99626922607422, "step": 4121 }, { "epoch": 2.03, "learning_rate": 2.6080869950518856e-07, "loss": 0.0, "step": 4122 }, { "epoch": 2.03, "logps_train/chosen": -79.68201446533203, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -314.7362060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4611210823059082, "rewards_train/margins": 17.62040662765503, "rewards_train/rejected": -19.081527709960938, "step": 4122 }, { "epoch": 2.03, "learning_rate": 2.6057041457478033e-07, "loss": 0.0, "step": 4123 }, { "epoch": 2.03, "logps_train/chosen": -75.82492065429688, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -313.9666442871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2843968868255615, "rewards_train/margins": 17.696789026260376, "rewards_train/rejected": -18.981185913085938, "step": 4123 }, { "epoch": 2.03, "learning_rate": 2.6033220018194314e-07, "loss": 0.0001, "step": 4124 }, { "epoch": 2.03, "logps_train/chosen": -76.65774536132812, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -320.03350830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1247097253799438, "rewards_train/margins": 17.97419774532318, "rewards_train/rejected": -19.098907470703125, "step": 4124 }, { "epoch": 2.03, "learning_rate": 2.6009405639685706e-07, "loss": 0.0, "step": 4125 }, { "epoch": 2.03, "logps_train/chosen": -76.31443786621094, "logps_train/ref_chosen": -60.875, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -308.86187744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5454820394515991, "rewards_train/margins": 16.885922074317932, "rewards_train/rejected": -18.43140411376953, "step": 4125 }, { "epoch": 2.03, "learning_rate": 2.598559832896812e-07, "loss": 0.0, "step": 4126 }, { "epoch": 2.03, "logps_train/chosen": -76.87317657470703, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -318.254150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.130969524383545, "rewards_train/margins": 17.85831117630005, "rewards_train/rejected": -18.989280700683594, "step": 4126 }, { "epoch": 2.03, "learning_rate": 2.596179809305526e-07, "loss": 0.0, "step": 4127 }, { "epoch": 2.03, "logps_train/chosen": -74.18062591552734, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -331.9240417480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1010701656341553, "rewards_train/margins": 19.167457818984985, "rewards_train/rejected": -20.26852798461914, "step": 4127 }, { "epoch": 2.03, "learning_rate": 2.5938004938958915e-07, "loss": 0.0, "step": 4128 }, { "epoch": 2.03, "logps_train/chosen": -69.62507629394531, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -313.1275329589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7426345348358154, "rewards_train/margins": 18.0796902179718, "rewards_train/rejected": -18.822324752807617, "step": 4128 }, { "epoch": 2.03, "learning_rate": 2.5914218873688677e-07, "loss": 0.0, "step": 4129 }, { "epoch": 2.03, "logps_train/chosen": -75.24449157714844, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -305.22088623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1197131872177124, "rewards_train/margins": 17.216729760169983, "rewards_train/rejected": -18.336442947387695, "step": 4129 }, { "epoch": 2.03, "learning_rate": 2.5890439904252057e-07, "loss": 0.0, "step": 4130 }, { "epoch": 2.03, "logps_train/chosen": -75.43594360351562, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -308.3043518066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.19132399559021, "rewards_train/margins": 17.574609994888306, "rewards_train/rejected": -18.765933990478516, "step": 4130 }, { "epoch": 2.03, "learning_rate": 2.5866668037654553e-07, "loss": 0.0, "step": 4131 }, { "epoch": 2.03, "logps_train/chosen": -79.45569610595703, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -319.66845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5973761081695557, "rewards_train/margins": 17.92669653892517, "rewards_train/rejected": -19.524072647094727, "step": 4131 }, { "epoch": 2.03, "learning_rate": 2.584290328089949e-07, "loss": 0.0, "step": 4132 }, { "epoch": 2.03, "logps_train/chosen": -76.5097885131836, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -307.23516845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4996603727340698, "rewards_train/margins": 16.974245190620422, "rewards_train/rejected": -18.473905563354492, "step": 4132 }, { "epoch": 2.03, "learning_rate": 2.5819145640988125e-07, "loss": 0.0001, "step": 4133 }, { "epoch": 2.03, "logps_train/chosen": -78.82411193847656, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -322.2938537597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3426165580749512, "rewards_train/margins": 17.92290163040161, "rewards_train/rejected": -19.265518188476562, "step": 4133 }, { "epoch": 2.04, "learning_rate": 2.579539512491961e-07, "loss": 0.0, "step": 4134 }, { "epoch": 2.04, "logps_train/chosen": -80.91500854492188, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -324.707275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3020474910736084, "rewards_train/margins": 17.94758629798889, "rewards_train/rejected": -19.2496337890625, "step": 4134 }, { "epoch": 2.04, "learning_rate": 2.5771651739691024e-07, "loss": 0.0, "step": 4135 }, { "epoch": 2.04, "logps_train/chosen": -73.76185607910156, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -304.189697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.271277666091919, "rewards_train/margins": 17.141247510910034, "rewards_train/rejected": -18.412525177001953, "step": 4135 }, { "epoch": 2.04, "learning_rate": 2.574791549229733e-07, "loss": 0.0, "step": 4136 }, { "epoch": 2.04, "logps_train/chosen": -83.02157592773438, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -326.7904357910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8338953256607056, "rewards_train/margins": 17.785088896751404, "rewards_train/rejected": -19.61898422241211, "step": 4136 }, { "epoch": 2.04, "learning_rate": 2.572418638973136e-07, "loss": 0.0, "step": 4137 }, { "epoch": 2.04, "logps_train/chosen": -78.95991516113281, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -312.1134338378906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5482865571975708, "rewards_train/margins": 17.32394587993622, "rewards_train/rejected": -18.87223243713379, "step": 4137 }, { "epoch": 2.04, "learning_rate": 2.570046443898393e-07, "loss": 0.0, "step": 4138 }, { "epoch": 2.04, "logps_train/chosen": -78.20111083984375, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -316.8708801269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2355897426605225, "rewards_train/margins": 17.746031045913696, "rewards_train/rejected": -18.98162078857422, "step": 4138 }, { "epoch": 2.04, "learning_rate": 2.5676749647043597e-07, "loss": 0.0, "step": 4139 }, { "epoch": 2.04, "logps_train/chosen": -77.81227111816406, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -314.9517822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4298596382141113, "rewards_train/margins": 17.705358028411865, "rewards_train/rejected": -19.135217666625977, "step": 4139 }, { "epoch": 2.04, "learning_rate": 2.565304202089695e-07, "loss": 0.0, "step": 4140 }, { "epoch": 2.04, "logps_train/chosen": -78.58085632324219, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -315.4478759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4675582647323608, "rewards_train/margins": 17.45300829410553, "rewards_train/rejected": -18.92056655883789, "step": 4140 }, { "epoch": 2.04, "learning_rate": 2.562934156752845e-07, "loss": 0.0001, "step": 4141 }, { "epoch": 2.04, "logps_train/chosen": -76.79306030273438, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -319.557373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1987879276275635, "rewards_train/margins": 18.229068517684937, "rewards_train/rejected": -19.4278564453125, "step": 4141 }, { "epoch": 2.04, "learning_rate": 2.560564829392033e-07, "loss": 0.0, "step": 4142 }, { "epoch": 2.04, "logps_train/chosen": -78.08638000488281, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -311.1239013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3576606512069702, "rewards_train/margins": 17.40199625492096, "rewards_train/rejected": -18.75965690612793, "step": 4142 }, { "epoch": 2.04, "learning_rate": 2.5581962207052854e-07, "loss": 0.0003, "step": 4143 }, { "epoch": 2.04, "logps_train/chosen": -82.61161804199219, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -133.5, "logps_train/rejected": -338.1253662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.479374647140503, "rewards_train/margins": 18.98086667060852, "rewards_train/rejected": -20.460241317749023, "step": 4143 }, { "epoch": 2.04, "learning_rate": 2.5558283313904075e-07, "loss": 0.0, "step": 4144 }, { "epoch": 2.04, "logps_train/chosen": -72.55167388916016, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -315.9383544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9335856437683105, "rewards_train/margins": 18.091402530670166, "rewards_train/rejected": -19.024988174438477, "step": 4144 }, { "epoch": 2.04, "learning_rate": 2.5534611621449935e-07, "loss": 0.0, "step": 4145 }, { "epoch": 2.04, "logps_train/chosen": -83.2904052734375, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -320.0434875488281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7185918092727661, "rewards_train/margins": 17.7352694272995, "rewards_train/rejected": -19.453861236572266, "step": 4145 }, { "epoch": 2.04, "learning_rate": 2.5510947136664324e-07, "loss": 0.0, "step": 4146 }, { "epoch": 2.04, "logps_train/chosen": -73.44528198242188, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -308.1882629394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0460909605026245, "rewards_train/margins": 17.320390820503235, "rewards_train/rejected": -18.36648178100586, "step": 4146 }, { "epoch": 2.04, "learning_rate": 2.5487289866518936e-07, "loss": 0.0, "step": 4147 }, { "epoch": 2.04, "logps_train/chosen": -76.728271484375, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -302.9820861816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.217553973197937, "rewards_train/margins": 16.90458118915558, "rewards_train/rejected": -18.122135162353516, "step": 4147 }, { "epoch": 2.04, "learning_rate": 2.5463639817983353e-07, "loss": 0.0, "step": 4148 }, { "epoch": 2.04, "logps_train/chosen": -80.93875122070312, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -312.1435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.656033992767334, "rewards_train/margins": 17.149824619293213, "rewards_train/rejected": -18.805858612060547, "step": 4148 }, { "epoch": 2.04, "learning_rate": 2.5439996998025026e-07, "loss": 0.0, "step": 4149 }, { "epoch": 2.04, "logps_train/chosen": -80.07064056396484, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -313.44256591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.529939889907837, "rewards_train/margins": 17.412952184677124, "rewards_train/rejected": -18.94289207458496, "step": 4149 }, { "epoch": 2.04, "learning_rate": 2.541636141360933e-07, "loss": 0.0005, "step": 4150 }, { "epoch": 2.04, "logps_train/chosen": -74.08982849121094, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -307.36480712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.126096487045288, "rewards_train/margins": 17.417173624038696, "rewards_train/rejected": -18.543270111083984, "step": 4150 }, { "epoch": 2.04, "learning_rate": 2.539273307169944e-07, "loss": 0.0, "step": 4151 }, { "epoch": 2.04, "logps_train/chosen": -77.89218139648438, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -312.847900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5004491806030273, "rewards_train/margins": 17.48253345489502, "rewards_train/rejected": -18.982982635498047, "step": 4151 }, { "epoch": 2.04, "learning_rate": 2.536911197925642e-07, "loss": 0.0, "step": 4152 }, { "epoch": 2.04, "logps_train/chosen": -74.10682678222656, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -113.375, "logps_train/rejected": -292.04815673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1877827644348145, "rewards_train/margins": 16.679433345794678, "rewards_train/rejected": -17.867216110229492, "step": 4152 }, { "epoch": 2.04, "learning_rate": 2.534549814323923e-07, "loss": 0.0, "step": 4153 }, { "epoch": 2.04, "logps_train/chosen": -74.4422607421875, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -318.0047302246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0544805526733398, "rewards_train/margins": 17.90858745574951, "rewards_train/rejected": -18.96306800842285, "step": 4153 }, { "epoch": 2.05, "learning_rate": 2.5321891570604645e-07, "loss": 0.0, "step": 4154 }, { "epoch": 2.05, "logps_train/chosen": -76.44232177734375, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -313.9128723144531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1342713832855225, "rewards_train/margins": 17.783972024917603, "rewards_train/rejected": -18.918243408203125, "step": 4154 }, { "epoch": 2.05, "learning_rate": 2.529829226830733e-07, "loss": 0.0, "step": 4155 }, { "epoch": 2.05, "logps_train/chosen": -75.05941009521484, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -312.58837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9509114623069763, "rewards_train/margins": 18.001677572727203, "rewards_train/rejected": -18.95258903503418, "step": 4155 }, { "epoch": 2.05, "learning_rate": 2.5274700243299796e-07, "loss": 0.0, "step": 4156 }, { "epoch": 2.05, "logps_train/chosen": -79.3778076171875, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -318.2818908691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.44085693359375, "rewards_train/margins": 17.7354793548584, "rewards_train/rejected": -19.17633628845215, "step": 4156 }, { "epoch": 2.05, "learning_rate": 2.5251115502532394e-07, "loss": 0.0, "step": 4157 }, { "epoch": 2.05, "logps_train/chosen": -80.0859375, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -313.33294677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5330071449279785, "rewards_train/margins": 17.656880855560303, "rewards_train/rejected": -19.18988800048828, "step": 4157 }, { "epoch": 2.05, "learning_rate": 2.5227538052953377e-07, "loss": 0.0, "step": 4158 }, { "epoch": 2.05, "logps_train/chosen": -80.35493469238281, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -317.05096435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7453569173812866, "rewards_train/margins": 17.67087233066559, "rewards_train/rejected": -19.416229248046875, "step": 4158 }, { "epoch": 2.05, "learning_rate": 2.520396790150881e-07, "loss": 0.0, "step": 4159 }, { "epoch": 2.05, "logps_train/chosen": -79.39867401123047, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -325.01214599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2168693542480469, "rewards_train/margins": 18.254558563232422, "rewards_train/rejected": -19.47142791748047, "step": 4159 }, { "epoch": 2.05, "learning_rate": 2.5180405055142617e-07, "loss": 0.0, "step": 4160 }, { "epoch": 2.05, "logps_train/chosen": -78.02239227294922, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -312.757080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3871515989303589, "rewards_train/margins": 17.347688794136047, "rewards_train/rejected": -18.734840393066406, "step": 4160 }, { "epoch": 2.05, "learning_rate": 2.515684952079655e-07, "loss": 0.0, "step": 4161 }, { "epoch": 2.05, "logps_train/chosen": -83.95346069335938, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -324.44775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8640953302383423, "rewards_train/margins": 18.07794415950775, "rewards_train/rejected": -19.942039489746094, "step": 4161 }, { "epoch": 2.05, "learning_rate": 2.513330130541028e-07, "loss": 0.0, "step": 4162 }, { "epoch": 2.05, "logps_train/chosen": -75.65830993652344, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -303.8639221191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9980084300041199, "rewards_train/margins": 17.34565955400467, "rewards_train/rejected": -18.34366798400879, "step": 4162 }, { "epoch": 2.05, "learning_rate": 2.510976041592123e-07, "loss": 0.0, "step": 4163 }, { "epoch": 2.05, "logps_train/chosen": -79.616455078125, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -318.2225341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4135010242462158, "rewards_train/margins": 17.77037501335144, "rewards_train/rejected": -19.183876037597656, "step": 4163 }, { "epoch": 2.05, "learning_rate": 2.508622685926469e-07, "loss": 0.0, "step": 4164 }, { "epoch": 2.05, "logps_train/chosen": -80.91340637207031, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -319.6466064453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5588207244873047, "rewards_train/margins": 18.02800941467285, "rewards_train/rejected": -19.586830139160156, "step": 4164 }, { "epoch": 2.05, "learning_rate": 2.506270064237386e-07, "loss": 0.0, "step": 4165 }, { "epoch": 2.05, "logps_train/chosen": -84.15570831298828, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -310.47991943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7270939350128174, "rewards_train/margins": 16.679686307907104, "rewards_train/rejected": -18.406780242919922, "step": 4165 }, { "epoch": 2.05, "learning_rate": 2.503918177217969e-07, "loss": 0.0003, "step": 4166 }, { "epoch": 2.05, "logps_train/chosen": -72.10600280761719, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -309.3414611816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9325735569000244, "rewards_train/margins": 17.671544790267944, "rewards_train/rejected": -18.60411834716797, "step": 4166 }, { "epoch": 2.05, "learning_rate": 2.5015670255610975e-07, "loss": 0.0, "step": 4167 }, { "epoch": 2.05, "logps_train/chosen": -73.7475357055664, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -303.2679443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9982889890670776, "rewards_train/margins": 17.184951901435852, "rewards_train/rejected": -18.18324089050293, "step": 4167 }, { "epoch": 2.05, "learning_rate": 2.4992166099594414e-07, "loss": 0.0, "step": 4168 }, { "epoch": 2.05, "logps_train/chosen": -75.82438659667969, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -314.0720520019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.310662031173706, "rewards_train/margins": 17.43531346321106, "rewards_train/rejected": -18.745975494384766, "step": 4168 }, { "epoch": 2.05, "learning_rate": 2.496866931105447e-07, "loss": 0.0, "step": 4169 }, { "epoch": 2.05, "logps_train/chosen": -83.81964111328125, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -319.26629638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7573550939559937, "rewards_train/margins": 17.431289315223694, "rewards_train/rejected": -19.188644409179688, "step": 4169 }, { "epoch": 2.05, "learning_rate": 2.494517989691345e-07, "loss": 0.0, "step": 4170 }, { "epoch": 2.05, "logps_train/chosen": -75.78279113769531, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -310.92999267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2690503597259521, "rewards_train/margins": 17.44055199623108, "rewards_train/rejected": -18.70960235595703, "step": 4170 }, { "epoch": 2.05, "learning_rate": 2.4921697864091473e-07, "loss": 0.0, "step": 4171 }, { "epoch": 2.05, "logps_train/chosen": -75.48233032226562, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -320.2452697753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.121963620185852, "rewards_train/margins": 18.080005526542664, "rewards_train/rejected": -19.201969146728516, "step": 4171 }, { "epoch": 2.05, "learning_rate": 2.4898223219506554e-07, "loss": 0.0, "step": 4172 }, { "epoch": 2.05, "logps_train/chosen": -75.212890625, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -316.73046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3532229661941528, "rewards_train/margins": 17.74238097667694, "rewards_train/rejected": -19.095603942871094, "step": 4172 }, { "epoch": 2.05, "learning_rate": 2.4874755970074447e-07, "loss": 0.0, "step": 4173 }, { "epoch": 2.05, "logps_train/chosen": -80.06048583984375, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -312.2294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3812930583953857, "rewards_train/margins": 17.33647847175598, "rewards_train/rejected": -18.717771530151367, "step": 4173 }, { "epoch": 2.06, "learning_rate": 2.4851296122708756e-07, "loss": 0.0001, "step": 4174 }, { "epoch": 2.06, "logps_train/chosen": -82.47576904296875, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -323.17626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.65919828414917, "rewards_train/margins": 17.641438007354736, "rewards_train/rejected": -19.300636291503906, "step": 4174 }, { "epoch": 2.06, "learning_rate": 2.4827843684320964e-07, "loss": 0.0, "step": 4175 }, { "epoch": 2.06, "logps_train/chosen": -78.69839477539062, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -310.300537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5636868476867676, "rewards_train/margins": 17.106650829315186, "rewards_train/rejected": -18.670337677001953, "step": 4175 }, { "epoch": 2.06, "learning_rate": 2.480439866182024e-07, "loss": 0.0001, "step": 4176 }, { "epoch": 2.06, "logps_train/chosen": -74.36785125732422, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -311.2595520019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0553884506225586, "rewards_train/margins": 17.636239051818848, "rewards_train/rejected": -18.691627502441406, "step": 4176 }, { "epoch": 2.06, "learning_rate": 2.478096106211368e-07, "loss": 0.0001, "step": 4177 }, { "epoch": 2.06, "logps_train/chosen": -80.0623779296875, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -320.373779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.615466594696045, "rewards_train/margins": 17.75584840774536, "rewards_train/rejected": -19.371315002441406, "step": 4177 }, { "epoch": 2.06, "learning_rate": 2.4757530892106206e-07, "loss": 0.0, "step": 4178 }, { "epoch": 2.06, "logps_train/chosen": -79.5224838256836, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -316.35614013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3622827529907227, "rewards_train/margins": 17.523770332336426, "rewards_train/rejected": -18.88605308532715, "step": 4178 }, { "epoch": 2.06, "learning_rate": 2.4734108158700415e-07, "loss": 0.0001, "step": 4179 }, { "epoch": 2.06, "logps_train/chosen": -81.02043151855469, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -320.03814697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.419670581817627, "rewards_train/margins": 17.99244260787964, "rewards_train/rejected": -19.412113189697266, "step": 4179 }, { "epoch": 2.06, "learning_rate": 2.471069286879688e-07, "loss": 0.0001, "step": 4180 }, { "epoch": 2.06, "logps_train/chosen": -74.56243896484375, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -313.9063720703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.035736322402954, "rewards_train/margins": 17.488494634628296, "rewards_train/rejected": -18.52423095703125, "step": 4180 }, { "epoch": 2.06, "learning_rate": 2.468728502929386e-07, "loss": 0.0, "step": 4181 }, { "epoch": 2.06, "logps_train/chosen": -72.89649200439453, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -301.059326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1124029159545898, "rewards_train/margins": 16.97551441192627, "rewards_train/rejected": -18.08791732788086, "step": 4181 }, { "epoch": 2.06, "learning_rate": 2.4663884647087465e-07, "loss": 0.0, "step": 4182 }, { "epoch": 2.06, "logps_train/chosen": -78.21426391601562, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -320.4307861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3326082229614258, "rewards_train/margins": 17.74826717376709, "rewards_train/rejected": -19.080875396728516, "step": 4182 }, { "epoch": 2.06, "learning_rate": 2.4640491729071635e-07, "loss": 0.0, "step": 4183 }, { "epoch": 2.06, "logps_train/chosen": -80.03260803222656, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -332.7235107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4148329496383667, "rewards_train/margins": 18.52450954914093, "rewards_train/rejected": -19.939342498779297, "step": 4183 }, { "epoch": 2.06, "learning_rate": 2.4617106282138053e-07, "loss": 0.0, "step": 4184 }, { "epoch": 2.06, "logps_train/chosen": -78.46041870117188, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -310.97509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2910120487213135, "rewards_train/margins": 17.255571126937866, "rewards_train/rejected": -18.54658317565918, "step": 4184 }, { "epoch": 2.06, "learning_rate": 2.459372831317624e-07, "loss": 0.0001, "step": 4185 }, { "epoch": 2.06, "logps_train/chosen": -73.58499145507812, "logps_train/ref_chosen": -60.5, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -303.286865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3078641891479492, "rewards_train/margins": 16.974629402160645, "rewards_train/rejected": -18.282493591308594, "step": 4185 }, { "epoch": 2.06, "learning_rate": 2.4570357829073484e-07, "loss": 0.0, "step": 4186 }, { "epoch": 2.06, "logps_train/chosen": -76.22693634033203, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -316.42193603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0584845542907715, "rewards_train/margins": 17.766424655914307, "rewards_train/rejected": -18.824909210205078, "step": 4186 }, { "epoch": 2.06, "learning_rate": 2.4546994836714925e-07, "loss": 0.0, "step": 4187 }, { "epoch": 2.06, "logps_train/chosen": -76.9287109375, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -322.77081298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0088376998901367, "rewards_train/margins": 18.3888521194458, "rewards_train/rejected": -19.397689819335938, "step": 4187 }, { "epoch": 2.06, "learning_rate": 2.4523639342983437e-07, "loss": 0.0, "step": 4188 }, { "epoch": 2.06, "logps_train/chosen": -82.517822265625, "logps_train/ref_chosen": -68.0, "logps_train/ref_rejected": -133.875, "logps_train/rejected": -321.5648193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.454028844833374, "rewards_train/margins": 17.313732385635376, "rewards_train/rejected": -18.76776123046875, "step": 4188 }, { "epoch": 2.06, "learning_rate": 2.450029135475969e-07, "loss": 0.0, "step": 4189 }, { "epoch": 2.06, "logps_train/chosen": -77.58407592773438, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -316.08428955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3649513721466064, "rewards_train/margins": 17.731171369552612, "rewards_train/rejected": -19.09612274169922, "step": 4189 }, { "epoch": 2.06, "learning_rate": 2.447695087892221e-07, "loss": 0.0001, "step": 4190 }, { "epoch": 2.06, "logps_train/chosen": -76.05450439453125, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -314.85626220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3176573514938354, "rewards_train/margins": 17.41659939289093, "rewards_train/rejected": -18.734256744384766, "step": 4190 }, { "epoch": 2.06, "learning_rate": 2.445361792234719e-07, "loss": 0.0, "step": 4191 }, { "epoch": 2.06, "logps_train/chosen": -77.14324951171875, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -305.915283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.372430682182312, "rewards_train/margins": 17.166658520698547, "rewards_train/rejected": -18.53908920288086, "step": 4191 }, { "epoch": 2.06, "learning_rate": 2.443029249190873e-07, "loss": 0.0, "step": 4192 }, { "epoch": 2.06, "logps_train/chosen": -78.55789184570312, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -324.4595031738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3932888507843018, "rewards_train/margins": 17.974243879318237, "rewards_train/rejected": -19.36753273010254, "step": 4192 }, { "epoch": 2.06, "learning_rate": 2.4406974594478634e-07, "loss": 0.0, "step": 4193 }, { "epoch": 2.06, "logps_train/chosen": -74.74320220947266, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -324.1864013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9909223318099976, "rewards_train/margins": 18.3102365732193, "rewards_train/rejected": -19.301158905029297, "step": 4193 }, { "epoch": 2.06, "learning_rate": 2.4383664236926506e-07, "loss": 0.0, "step": 4194 }, { "epoch": 2.06, "logps_train/chosen": -80.70113372802734, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -327.603759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5314905643463135, "rewards_train/margins": 18.394805669784546, "rewards_train/rejected": -19.92629623413086, "step": 4194 }, { "epoch": 2.07, "learning_rate": 2.436036142611976e-07, "loss": 0.0, "step": 4195 }, { "epoch": 2.07, "logps_train/chosen": -81.34530639648438, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -322.16357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.591463565826416, "rewards_train/margins": 17.76561689376831, "rewards_train/rejected": -19.357080459594727, "step": 4195 }, { "epoch": 2.07, "learning_rate": 2.4337066168923553e-07, "loss": 0.0, "step": 4196 }, { "epoch": 2.07, "logps_train/chosen": -70.38530731201172, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -303.2740173339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.851079523563385, "rewards_train/margins": 17.211821019649506, "rewards_train/rejected": -18.06290054321289, "step": 4196 }, { "epoch": 2.07, "learning_rate": 2.431377847220082e-07, "loss": 0.0001, "step": 4197 }, { "epoch": 2.07, "logps_train/chosen": -75.59058380126953, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -307.532470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1449227333068848, "rewards_train/margins": 17.05012083053589, "rewards_train/rejected": -18.195043563842773, "step": 4197 }, { "epoch": 2.07, "learning_rate": 2.4290498342812254e-07, "loss": 0.0, "step": 4198 }, { "epoch": 2.07, "logps_train/chosen": -80.55497741699219, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -325.57122802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4479296207427979, "rewards_train/margins": 18.146743535995483, "rewards_train/rejected": -19.59467315673828, "step": 4198 }, { "epoch": 2.07, "learning_rate": 2.4267225787616376e-07, "loss": 0.0, "step": 4199 }, { "epoch": 2.07, "logps_train/chosen": -80.76496124267578, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -317.5489501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.627911925315857, "rewards_train/margins": 17.627276062965393, "rewards_train/rejected": -19.25518798828125, "step": 4199 }, { "epoch": 2.07, "learning_rate": 2.4243960813469417e-07, "loss": 0.0, "step": 4200 }, { "epoch": 2.07, "logps_train/chosen": -79.83504486083984, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -313.3586120605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.417488932609558, "rewards_train/margins": 17.348937153816223, "rewards_train/rejected": -18.76642608642578, "step": 4200 }, { "epoch": 2.07, "learning_rate": 2.422070342722538e-07, "loss": 0.0, "step": 4201 }, { "epoch": 2.07, "logps_train/chosen": -71.99945068359375, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -301.57257080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9896917343139648, "rewards_train/margins": 17.074837684631348, "rewards_train/rejected": -18.064529418945312, "step": 4201 }, { "epoch": 2.07, "learning_rate": 2.4197453635736085e-07, "loss": 0.0, "step": 4202 }, { "epoch": 2.07, "logps_train/chosen": -76.37187957763672, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -317.4781494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2948538064956665, "rewards_train/margins": 17.879573225975037, "rewards_train/rejected": -19.174427032470703, "step": 4202 }, { "epoch": 2.07, "learning_rate": 2.417421144585106e-07, "loss": 0.0, "step": 4203 }, { "epoch": 2.07, "logps_train/chosen": -74.91471099853516, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -320.35015869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9932288527488708, "rewards_train/margins": 18.126358091831207, "rewards_train/rejected": -19.119586944580078, "step": 4203 }, { "epoch": 2.07, "learning_rate": 2.415097686441759e-07, "loss": 0.0, "step": 4204 }, { "epoch": 2.07, "logps_train/chosen": -82.00422668457031, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -317.1799011230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5274245738983154, "rewards_train/margins": 17.79544949531555, "rewards_train/rejected": -19.322874069213867, "step": 4204 }, { "epoch": 2.07, "learning_rate": 2.412774989828078e-07, "loss": 0.0, "step": 4205 }, { "epoch": 2.07, "logps_train/chosen": -72.14022064208984, "logps_train/ref_chosen": -60.96875, "logps_train/ref_rejected": -120.5625, "logps_train/rejected": -305.40069580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.117586612701416, "rewards_train/margins": 17.36437749862671, "rewards_train/rejected": -18.481964111328125, "step": 4205 }, { "epoch": 2.07, "learning_rate": 2.410453055428343e-07, "loss": 0.0, "step": 4206 }, { "epoch": 2.07, "logps_train/chosen": -82.09496307373047, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -322.549560546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5644768476486206, "rewards_train/margins": 17.762354969978333, "rewards_train/rejected": -19.326831817626953, "step": 4206 }, { "epoch": 2.07, "learning_rate": 2.4081318839266115e-07, "loss": 0.0, "step": 4207 }, { "epoch": 2.07, "logps_train/chosen": -80.68125915527344, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -330.0630187988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6296000480651855, "rewards_train/margins": 18.360687732696533, "rewards_train/rejected": -19.99028778076172, "step": 4207 }, { "epoch": 2.07, "learning_rate": 2.4058114760067154e-07, "loss": 0.0, "step": 4208 }, { "epoch": 2.07, "logps_train/chosen": -71.70140075683594, "logps_train/ref_chosen": -61.125, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -295.8424377441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0570542812347412, "rewards_train/margins": 16.57162308692932, "rewards_train/rejected": -17.628677368164062, "step": 4208 }, { "epoch": 2.07, "learning_rate": 2.4034918323522624e-07, "loss": 0.0, "step": 4209 }, { "epoch": 2.07, "logps_train/chosen": -77.9681625366211, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -314.58966064453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5157127380371094, "rewards_train/margins": 17.67386817932129, "rewards_train/rejected": -19.1895809173584, "step": 4209 }, { "epoch": 2.07, "learning_rate": 2.401172953646638e-07, "loss": 0.0, "step": 4210 }, { "epoch": 2.07, "logps_train/chosen": -80.0583724975586, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -315.509033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2957299947738647, "rewards_train/margins": 17.71913778781891, "rewards_train/rejected": -19.014867782592773, "step": 4210 }, { "epoch": 2.07, "learning_rate": 2.3988548405729976e-07, "loss": 0.0, "step": 4211 }, { "epoch": 2.07, "logps_train/chosen": -72.93224334716797, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -320.2055969238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9412226676940918, "rewards_train/margins": 18.152775287628174, "rewards_train/rejected": -19.093997955322266, "step": 4211 }, { "epoch": 2.07, "learning_rate": 2.396537493814273e-07, "loss": 0.0, "step": 4212 }, { "epoch": 2.07, "logps_train/chosen": -74.02705383300781, "logps_train/ref_chosen": -60.53125, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -310.73272705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3516311645507812, "rewards_train/margins": 17.524181365966797, "rewards_train/rejected": -18.875812530517578, "step": 4212 }, { "epoch": 2.07, "learning_rate": 2.394220914053169e-07, "loss": 0.0, "step": 4213 }, { "epoch": 2.07, "logps_train/chosen": -82.43122863769531, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -134.0, "logps_train/rejected": -336.27294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6344319581985474, "rewards_train/margins": 18.592180132865906, "rewards_train/rejected": -20.226612091064453, "step": 4213 }, { "epoch": 2.07, "learning_rate": 2.391905101972168e-07, "loss": 0.0, "step": 4214 }, { "epoch": 2.07, "logps_train/chosen": -73.80345916748047, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -303.22528076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1506098508834839, "rewards_train/margins": 17.030367016792297, "rewards_train/rejected": -18.18097686767578, "step": 4214 }, { "epoch": 2.08, "learning_rate": 2.389590058253523e-07, "loss": 0.0, "step": 4215 }, { "epoch": 2.08, "logps_train/chosen": -81.72987365722656, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -310.11737060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5498428344726562, "rewards_train/margins": 17.049394607543945, "rewards_train/rejected": -18.5992374420166, "step": 4215 }, { "epoch": 2.08, "learning_rate": 2.3872757835792597e-07, "loss": 0.0, "step": 4216 }, { "epoch": 2.08, "logps_train/chosen": -80.90641784667969, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -330.48248291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4522632360458374, "rewards_train/margins": 18.615760445594788, "rewards_train/rejected": -20.068023681640625, "step": 4216 }, { "epoch": 2.08, "learning_rate": 2.384962278631182e-07, "loss": 0.0001, "step": 4217 }, { "epoch": 2.08, "logps_train/chosen": -83.80033874511719, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -320.17974853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8386279344558716, "rewards_train/margins": 17.2957524061203, "rewards_train/rejected": -19.134380340576172, "step": 4217 }, { "epoch": 2.08, "learning_rate": 2.3826495440908628e-07, "loss": 0.0001, "step": 4218 }, { "epoch": 2.08, "logps_train/chosen": -71.30349731445312, "logps_train/ref_chosen": -61.28125, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -304.58935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0015897750854492, "rewards_train/margins": 17.48185634613037, "rewards_train/rejected": -18.48344612121582, "step": 4218 }, { "epoch": 2.08, "learning_rate": 2.380337580639647e-07, "loss": 0.0001, "step": 4219 }, { "epoch": 2.08, "logps_train/chosen": -80.11415100097656, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -310.6109619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2372938394546509, "rewards_train/margins": 17.047725081443787, "rewards_train/rejected": -18.285018920898438, "step": 4219 }, { "epoch": 2.08, "learning_rate": 2.3780263889586594e-07, "loss": 0.0, "step": 4220 }, { "epoch": 2.08, "logps_train/chosen": -73.10565948486328, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -118.125, "logps_train/rejected": -298.13079833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0914251804351807, "rewards_train/margins": 16.91056990623474, "rewards_train/rejected": -18.001995086669922, "step": 4220 }, { "epoch": 2.08, "learning_rate": 2.3757159697287893e-07, "loss": 0.0, "step": 4221 }, { "epoch": 2.08, "logps_train/chosen": -76.34318542480469, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -322.005615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1957937479019165, "rewards_train/margins": 18.298030257225037, "rewards_train/rejected": -19.493824005126953, "step": 4221 }, { "epoch": 2.08, "learning_rate": 2.3734063236307035e-07, "loss": 0.0, "step": 4222 }, { "epoch": 2.08, "logps_train/chosen": -77.25172424316406, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -316.8855285644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0969493389129639, "rewards_train/margins": 17.58105969429016, "rewards_train/rejected": -18.678009033203125, "step": 4222 }, { "epoch": 2.08, "learning_rate": 2.3710974513448357e-07, "loss": 0.0, "step": 4223 }, { "epoch": 2.08, "logps_train/chosen": -74.18653106689453, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -312.765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.076465368270874, "rewards_train/margins": 17.802147150039673, "rewards_train/rejected": -18.878612518310547, "step": 4223 }, { "epoch": 2.08, "learning_rate": 2.3687893535513997e-07, "loss": 0.0, "step": 4224 }, { "epoch": 2.08, "logps_train/chosen": -77.29192352294922, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -118.25, "logps_train/rejected": -302.27972412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4390308856964111, "rewards_train/margins": 16.96618914604187, "rewards_train/rejected": -18.40522003173828, "step": 4224 }, { "epoch": 2.08, "learning_rate": 2.3664820309303758e-07, "loss": 0.0, "step": 4225 }, { "epoch": 2.08, "logps_train/chosen": -76.80403900146484, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -303.5557861328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1613123416900635, "rewards_train/margins": 16.696414709091187, "rewards_train/rejected": -17.85772705078125, "step": 4225 }, { "epoch": 2.08, "learning_rate": 2.3641754841615136e-07, "loss": 0.0001, "step": 4226 }, { "epoch": 2.08, "logps_train/chosen": -82.89714050292969, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -316.91510009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.686833381652832, "rewards_train/margins": 17.277138710021973, "rewards_train/rejected": -18.963972091674805, "step": 4226 }, { "epoch": 2.08, "learning_rate": 2.3618697139243433e-07, "loss": 0.0, "step": 4227 }, { "epoch": 2.08, "logps_train/chosen": -73.48869323730469, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -302.40631103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.084831714630127, "rewards_train/margins": 16.861266613006592, "rewards_train/rejected": -17.94609832763672, "step": 4227 }, { "epoch": 2.08, "learning_rate": 2.359564720898153e-07, "loss": 0.001, "step": 4228 }, { "epoch": 2.08, "logps_train/chosen": -84.83622741699219, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -317.2901306152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7679250240325928, "rewards_train/margins": 17.292534112930298, "rewards_train/rejected": -19.06045913696289, "step": 4228 }, { "epoch": 2.08, "learning_rate": 2.357260505762015e-07, "loss": 0.0002, "step": 4229 }, { "epoch": 2.08, "logps_train/chosen": -74.76528930664062, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -310.7872619628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2521638870239258, "rewards_train/margins": 17.320849418640137, "rewards_train/rejected": -18.573013305664062, "step": 4229 }, { "epoch": 2.08, "learning_rate": 2.3549570691947644e-07, "loss": 0.0, "step": 4230 }, { "epoch": 2.08, "logps_train/chosen": -78.81965637207031, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -311.21258544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5263996124267578, "rewards_train/margins": 17.375377655029297, "rewards_train/rejected": -18.901777267456055, "step": 4230 }, { "epoch": 2.08, "learning_rate": 2.3526544118750074e-07, "loss": 0.0001, "step": 4231 }, { "epoch": 2.08, "logps_train/chosen": -73.70278930664062, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -307.528076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1296056509017944, "rewards_train/margins": 17.219982743263245, "rewards_train/rejected": -18.34958839416504, "step": 4231 }, { "epoch": 2.08, "learning_rate": 2.3503525344811265e-07, "loss": 0.0, "step": 4232 }, { "epoch": 2.08, "logps_train/chosen": -78.56251525878906, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -312.64874267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.401759147644043, "rewards_train/margins": 17.48240375518799, "rewards_train/rejected": -18.88416290283203, "step": 4232 }, { "epoch": 2.08, "learning_rate": 2.348051437691268e-07, "loss": 0.0, "step": 4233 }, { "epoch": 2.08, "logps_train/chosen": -76.60147094726562, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -324.8857727050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.310098648071289, "rewards_train/margins": 18.405332565307617, "rewards_train/rejected": -19.715431213378906, "step": 4233 }, { "epoch": 2.08, "learning_rate": 2.3457511221833503e-07, "loss": 0.0, "step": 4234 }, { "epoch": 2.08, "logps_train/chosen": -78.4100341796875, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -316.3064270019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.502013921737671, "rewards_train/margins": 17.506508111953735, "rewards_train/rejected": -19.008522033691406, "step": 4234 }, { "epoch": 2.09, "learning_rate": 2.3434515886350609e-07, "loss": 0.0001, "step": 4235 }, { "epoch": 2.09, "logps_train/chosen": -77.67613220214844, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -314.6370849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4827980995178223, "rewards_train/margins": 17.526078701019287, "rewards_train/rejected": -19.00887680053711, "step": 4235 }, { "epoch": 2.09, "learning_rate": 2.341152837723861e-07, "loss": 0.0, "step": 4236 }, { "epoch": 2.09, "logps_train/chosen": -78.2117919921875, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -322.8022766113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3291386365890503, "rewards_train/margins": 18.277066349983215, "rewards_train/rejected": -19.606204986572266, "step": 4236 }, { "epoch": 2.09, "learning_rate": 2.338854870126976e-07, "loss": 0.0, "step": 4237 }, { "epoch": 2.09, "logps_train/chosen": -74.67591857910156, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -309.4570007324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2433732748031616, "rewards_train/margins": 17.26990568637848, "rewards_train/rejected": -18.51327896118164, "step": 4237 }, { "epoch": 2.09, "learning_rate": 2.336557686521402e-07, "loss": 0.0, "step": 4238 }, { "epoch": 2.09, "logps_train/chosen": -78.76329803466797, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -308.19451904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4945428371429443, "rewards_train/margins": 17.08770203590393, "rewards_train/rejected": -18.582244873046875, "step": 4238 }, { "epoch": 2.09, "learning_rate": 2.334261287583909e-07, "loss": 0.0001, "step": 4239 }, { "epoch": 2.09, "logps_train/chosen": -73.65692901611328, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -119.1875, "logps_train/rejected": -306.45465087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2413523197174072, "rewards_train/margins": 17.485950231552124, "rewards_train/rejected": -18.72730255126953, "step": 4239 }, { "epoch": 2.09, "learning_rate": 2.3319656739910248e-07, "loss": 0.0, "step": 4240 }, { "epoch": 2.09, "logps_train/chosen": -73.77862548828125, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -305.9837646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0293270349502563, "rewards_train/margins": 17.36631429195404, "rewards_train/rejected": -18.395641326904297, "step": 4240 }, { "epoch": 2.09, "learning_rate": 2.329670846419056e-07, "loss": 0.0, "step": 4241 }, { "epoch": 2.09, "logps_train/chosen": -82.41783142089844, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -327.7230224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5162458419799805, "rewards_train/margins": 18.252495765686035, "rewards_train/rejected": -19.768741607666016, "step": 4241 }, { "epoch": 2.09, "learning_rate": 2.3273768055440785e-07, "loss": 0.0, "step": 4242 }, { "epoch": 2.09, "logps_train/chosen": -82.79360961914062, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -321.6836242675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6133702993392944, "rewards_train/margins": 17.775890946388245, "rewards_train/rejected": -19.38926124572754, "step": 4242 }, { "epoch": 2.09, "learning_rate": 2.3250835520419248e-07, "loss": 0.0, "step": 4243 }, { "epoch": 2.09, "logps_train/chosen": -75.24160766601562, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -322.530029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9457912445068359, "rewards_train/margins": 18.62264060974121, "rewards_train/rejected": -19.568431854248047, "step": 4243 }, { "epoch": 2.09, "learning_rate": 2.3227910865882083e-07, "loss": 0.0, "step": 4244 }, { "epoch": 2.09, "logps_train/chosen": -80.00912475585938, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -312.35430908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4798192977905273, "rewards_train/margins": 17.277291297912598, "rewards_train/rejected": -18.757110595703125, "step": 4244 }, { "epoch": 2.09, "learning_rate": 2.3204994098583024e-07, "loss": 0.0, "step": 4245 }, { "epoch": 2.09, "logps_train/chosen": -76.87671661376953, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -309.4378662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4499521255493164, "rewards_train/margins": 17.330307960510254, "rewards_train/rejected": -18.78026008605957, "step": 4245 }, { "epoch": 2.09, "learning_rate": 2.3182085225273495e-07, "loss": 0.0001, "step": 4246 }, { "epoch": 2.09, "logps_train/chosen": -80.59577941894531, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -329.5624694824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3612384796142578, "rewards_train/margins": 18.308582305908203, "rewards_train/rejected": -19.66982078552246, "step": 4246 }, { "epoch": 2.09, "learning_rate": 2.3159184252702634e-07, "loss": 0.0003, "step": 4247 }, { "epoch": 2.09, "logps_train/chosen": -76.02246856689453, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -316.21295166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1496100425720215, "rewards_train/margins": 17.907137393951416, "rewards_train/rejected": -19.056747436523438, "step": 4247 }, { "epoch": 2.09, "learning_rate": 2.3136291187617212e-07, "loss": 0.0, "step": 4248 }, { "epoch": 2.09, "logps_train/chosen": -81.90342712402344, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -322.36932373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5932235717773438, "rewards_train/margins": 17.79659080505371, "rewards_train/rejected": -19.389814376831055, "step": 4248 }, { "epoch": 2.09, "learning_rate": 2.3113406036761674e-07, "loss": 0.0, "step": 4249 }, { "epoch": 2.09, "logps_train/chosen": -76.28557586669922, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -316.677490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.353020191192627, "rewards_train/margins": 17.803744792938232, "rewards_train/rejected": -19.15676498413086, "step": 4249 }, { "epoch": 2.09, "learning_rate": 2.3090528806878125e-07, "loss": 0.0, "step": 4250 }, { "epoch": 2.09, "logps_train/chosen": -81.99528503417969, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -308.69146728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7307536602020264, "rewards_train/margins": 16.7068989276886, "rewards_train/rejected": -18.437652587890625, "step": 4250 }, { "epoch": 2.09, "learning_rate": 2.3067659504706389e-07, "loss": 0.0, "step": 4251 }, { "epoch": 2.09, "logps_train/chosen": -74.47360229492188, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -119.5, "logps_train/rejected": -309.6829528808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2723114490509033, "rewards_train/margins": 17.748425245285034, "rewards_train/rejected": -19.020736694335938, "step": 4251 }, { "epoch": 2.09, "learning_rate": 2.304479813698389e-07, "loss": 0.0, "step": 4252 }, { "epoch": 2.09, "logps_train/chosen": -73.54790496826172, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -305.46875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0545463562011719, "rewards_train/margins": 17.441450119018555, "rewards_train/rejected": -18.495996475219727, "step": 4252 }, { "epoch": 2.09, "learning_rate": 2.3021944710445728e-07, "loss": 0.0001, "step": 4253 }, { "epoch": 2.09, "logps_train/chosen": -79.74168395996094, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -318.6126708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3948230743408203, "rewards_train/margins": 17.929433822631836, "rewards_train/rejected": -19.324256896972656, "step": 4253 }, { "epoch": 2.09, "learning_rate": 2.299909923182472e-07, "loss": 0.0001, "step": 4254 }, { "epoch": 2.09, "logps_train/chosen": -80.79080200195312, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -326.2575378417969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.74094557762146, "rewards_train/margins": 18.06234574317932, "rewards_train/rejected": -19.80329132080078, "step": 4254 }, { "epoch": 2.1, "learning_rate": 2.2976261707851268e-07, "loss": 0.0, "step": 4255 }, { "epoch": 2.1, "logps_train/chosen": -82.58406066894531, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -318.47039794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7044998407363892, "rewards_train/margins": 17.547327399253845, "rewards_train/rejected": -19.251827239990234, "step": 4255 }, { "epoch": 2.1, "learning_rate": 2.2953432145253455e-07, "loss": 0.0, "step": 4256 }, { "epoch": 2.1, "logps_train/chosen": -76.9101791381836, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -305.82427978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3333027362823486, "rewards_train/margins": 16.956011533737183, "rewards_train/rejected": -18.28931427001953, "step": 4256 }, { "epoch": 2.1, "learning_rate": 2.293061055075707e-07, "loss": 0.0002, "step": 4257 }, { "epoch": 2.1, "logps_train/chosen": -84.11174011230469, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -316.8623962402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.800675868988037, "rewards_train/margins": 17.451189517974854, "rewards_train/rejected": -19.25186538696289, "step": 4257 }, { "epoch": 2.1, "learning_rate": 2.2907796931085437e-07, "loss": 0.0001, "step": 4258 }, { "epoch": 2.1, "logps_train/chosen": -86.11869812011719, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -324.115966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.3341846466064453, "rewards_train/margins": 17.563642501831055, "rewards_train/rejected": -19.8978271484375, "step": 4258 }, { "epoch": 2.1, "learning_rate": 2.2884991292959654e-07, "loss": 0.0, "step": 4259 }, { "epoch": 2.1, "logps_train/chosen": -75.36569213867188, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -317.0220947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0692834854125977, "rewards_train/margins": 17.810853004455566, "rewards_train/rejected": -18.880136489868164, "step": 4259 }, { "epoch": 2.1, "learning_rate": 2.286219364309841e-07, "loss": 0.0, "step": 4260 }, { "epoch": 2.1, "logps_train/chosen": -75.9516830444336, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -310.75091552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640652418136597, "rewards_train/margins": 17.39979660511017, "rewards_train/rejected": -18.563861846923828, "step": 4260 }, { "epoch": 2.1, "learning_rate": 2.2839403988218014e-07, "loss": 0.0002, "step": 4261 }, { "epoch": 2.1, "logps_train/chosen": -76.71302032470703, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -309.4603271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.451672911643982, "rewards_train/margins": 17.496996521949768, "rewards_train/rejected": -18.94866943359375, "step": 4261 }, { "epoch": 2.1, "learning_rate": 2.2816622335032499e-07, "loss": 0.0, "step": 4262 }, { "epoch": 2.1, "logps_train/chosen": -75.28178405761719, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -323.5828857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.261772632598877, "rewards_train/margins": 18.381476879119873, "rewards_train/rejected": -19.64324951171875, "step": 4262 }, { "epoch": 2.1, "learning_rate": 2.2793848690253465e-07, "loss": 0.0, "step": 4263 }, { "epoch": 2.1, "logps_train/chosen": -77.79450988769531, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -323.5115966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.245222806930542, "rewards_train/margins": 18.42126965522766, "rewards_train/rejected": -19.666492462158203, "step": 4263 }, { "epoch": 2.1, "learning_rate": 2.277108306059019e-07, "loss": 0.0, "step": 4264 }, { "epoch": 2.1, "logps_train/chosen": -81.55766296386719, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -311.90985107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8894572257995605, "rewards_train/margins": 16.80528688430786, "rewards_train/rejected": -18.694744110107422, "step": 4264 }, { "epoch": 2.1, "learning_rate": 2.274832545274956e-07, "loss": 0.0, "step": 4265 }, { "epoch": 2.1, "logps_train/chosen": -80.27073669433594, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -303.29815673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6854233741760254, "rewards_train/margins": 16.575643062591553, "rewards_train/rejected": -18.261066436767578, "step": 4265 }, { "epoch": 2.1, "learning_rate": 2.2725575873436166e-07, "loss": 0.0003, "step": 4266 }, { "epoch": 2.1, "logps_train/chosen": -70.59649658203125, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -119.9375, "logps_train/rejected": -304.56256103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6334292888641357, "rewards_train/margins": 17.832106828689575, "rewards_train/rejected": -18.46553611755371, "step": 4266 }, { "epoch": 2.1, "learning_rate": 2.2702834329352155e-07, "loss": 0.0001, "step": 4267 }, { "epoch": 2.1, "logps_train/chosen": -80.62641906738281, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -316.14813232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4348344802856445, "rewards_train/margins": 17.72373104095459, "rewards_train/rejected": -19.158565521240234, "step": 4267 }, { "epoch": 2.1, "learning_rate": 2.2680100827197325e-07, "loss": 0.0, "step": 4268 }, { "epoch": 2.1, "logps_train/chosen": -74.3779067993164, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -310.3782958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9656714200973511, "rewards_train/margins": 17.768253445625305, "rewards_train/rejected": -18.733924865722656, "step": 4268 }, { "epoch": 2.1, "learning_rate": 2.2657375373669158e-07, "loss": 0.0004, "step": 4269 }, { "epoch": 2.1, "logps_train/chosen": -79.65582275390625, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -333.18243408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3177309036254883, "rewards_train/margins": 18.84812068939209, "rewards_train/rejected": -20.165851593017578, "step": 4269 }, { "epoch": 2.1, "learning_rate": 2.2634657975462712e-07, "loss": 0.0, "step": 4270 }, { "epoch": 2.1, "logps_train/chosen": -79.85246276855469, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -313.87042236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5808030366897583, "rewards_train/margins": 17.529285311698914, "rewards_train/rejected": -19.110088348388672, "step": 4270 }, { "epoch": 2.1, "learning_rate": 2.261194863927068e-07, "loss": 0.0001, "step": 4271 }, { "epoch": 2.1, "logps_train/chosen": -77.88743591308594, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -321.0306701660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.381614089012146, "rewards_train/margins": 18.003483414649963, "rewards_train/rejected": -19.38509750366211, "step": 4271 }, { "epoch": 2.1, "learning_rate": 2.2589247371783365e-07, "loss": 0.0, "step": 4272 }, { "epoch": 2.1, "logps_train/chosen": -81.90658569335938, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -316.4791259765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7515959739685059, "rewards_train/margins": 17.537577152252197, "rewards_train/rejected": -19.289173126220703, "step": 4272 }, { "epoch": 2.1, "learning_rate": 2.2566554179688752e-07, "loss": 0.0001, "step": 4273 }, { "epoch": 2.1, "logps_train/chosen": -70.12303924560547, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -309.56915283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5613276958465576, "rewards_train/margins": 17.913216829299927, "rewards_train/rejected": -18.474544525146484, "step": 4273 }, { "epoch": 2.1, "learning_rate": 2.25438690696724e-07, "loss": 0.0, "step": 4274 }, { "epoch": 2.1, "logps_train/chosen": -75.53532409667969, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -323.7368469238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.195719838142395, "rewards_train/margins": 18.624743342399597, "rewards_train/rejected": -19.820463180541992, "step": 4274 }, { "epoch": 2.1, "learning_rate": 2.2521192048417464e-07, "loss": 0.0, "step": 4275 }, { "epoch": 2.1, "logps_train/chosen": -77.14668273925781, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -325.54559326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.32394540309906, "rewards_train/margins": 17.98251974582672, "rewards_train/rejected": -19.30646514892578, "step": 4275 }, { "epoch": 2.11, "learning_rate": 2.2498523122604807e-07, "loss": 0.0, "step": 4276 }, { "epoch": 2.11, "logps_train/chosen": -78.7469482421875, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -312.1707763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5067750215530396, "rewards_train/margins": 17.13901126384735, "rewards_train/rejected": -18.64578628540039, "step": 4276 }, { "epoch": 2.11, "learning_rate": 2.247586229891278e-07, "loss": 0.0, "step": 4277 }, { "epoch": 2.11, "logps_train/chosen": -75.40253448486328, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -317.0032653808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0348820686340332, "rewards_train/margins": 18.009389400482178, "rewards_train/rejected": -19.04427146911621, "step": 4277 }, { "epoch": 2.11, "learning_rate": 2.245320958401744e-07, "loss": 0.0, "step": 4278 }, { "epoch": 2.11, "logps_train/chosen": -77.51632690429688, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -313.2349853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5155483484268188, "rewards_train/margins": 17.32611119747162, "rewards_train/rejected": -18.841659545898438, "step": 4278 }, { "epoch": 2.11, "learning_rate": 2.2430564984592476e-07, "loss": 0.0003, "step": 4279 }, { "epoch": 2.11, "logps_train/chosen": -77.42514038085938, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -330.4913330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4633147716522217, "rewards_train/margins": 18.83386540412903, "rewards_train/rejected": -20.29718017578125, "step": 4279 }, { "epoch": 2.11, "learning_rate": 2.2407928507309058e-07, "loss": 0.0, "step": 4280 }, { "epoch": 2.11, "logps_train/chosen": -74.05094909667969, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -132.75, "logps_train/rejected": -325.585205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8448898196220398, "rewards_train/margins": 18.43443375825882, "rewards_train/rejected": -19.27932357788086, "step": 4280 }, { "epoch": 2.11, "learning_rate": 2.2385300158836112e-07, "loss": 0.0, "step": 4281 }, { "epoch": 2.11, "logps_train/chosen": -71.58477020263672, "logps_train/ref_chosen": -61.53125, "logps_train/ref_rejected": -116.5625, "logps_train/rejected": -303.1412353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0042288303375244, "rewards_train/margins": 17.65173888206482, "rewards_train/rejected": -18.655967712402344, "step": 4281 }, { "epoch": 2.11, "learning_rate": 2.2362679945840074e-07, "loss": 0.0, "step": 4282 }, { "epoch": 2.11, "logps_train/chosen": -74.1348648071289, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -307.249755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.026719093322754, "rewards_train/margins": 17.20987606048584, "rewards_train/rejected": -18.236595153808594, "step": 4282 }, { "epoch": 2.11, "learning_rate": 2.2340067874984992e-07, "loss": 0.0, "step": 4283 }, { "epoch": 2.11, "logps_train/chosen": -81.68970489501953, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -320.646728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4139413833618164, "rewards_train/margins": 17.749171257019043, "rewards_train/rejected": -19.16311264038086, "step": 4283 }, { "epoch": 2.11, "learning_rate": 2.231746395293258e-07, "loss": 0.0, "step": 4284 }, { "epoch": 2.11, "logps_train/chosen": -76.39866638183594, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -313.36468505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.24103844165802, "rewards_train/margins": 17.454121470451355, "rewards_train/rejected": -18.695159912109375, "step": 4284 }, { "epoch": 2.11, "learning_rate": 2.229486818634208e-07, "loss": 0.0, "step": 4285 }, { "epoch": 2.11, "logps_train/chosen": -77.88763427734375, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -313.28875732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.495746374130249, "rewards_train/margins": 17.443334817886353, "rewards_train/rejected": -18.9390811920166, "step": 4285 }, { "epoch": 2.11, "learning_rate": 2.2272280581870357e-07, "loss": 0.0, "step": 4286 }, { "epoch": 2.11, "logps_train/chosen": -72.59980773925781, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -299.40185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9387896060943604, "rewards_train/margins": 16.91106390953064, "rewards_train/rejected": -17.849853515625, "step": 4286 }, { "epoch": 2.11, "learning_rate": 2.224970114617186e-07, "loss": 0.0, "step": 4287 }, { "epoch": 2.11, "logps_train/chosen": -79.79673767089844, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -319.5382080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3716174364089966, "rewards_train/margins": 17.979958415031433, "rewards_train/rejected": -19.35157585144043, "step": 4287 }, { "epoch": 2.11, "learning_rate": 2.2227129885898677e-07, "loss": 0.0, "step": 4288 }, { "epoch": 2.11, "logps_train/chosen": -76.7633285522461, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -314.638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2992818355560303, "rewards_train/margins": 17.993587732315063, "rewards_train/rejected": -19.292869567871094, "step": 4288 }, { "epoch": 2.11, "learning_rate": 2.220456680770043e-07, "loss": 0.0, "step": 4289 }, { "epoch": 2.11, "logps_train/chosen": -75.04960632324219, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -306.9626159667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3614544868469238, "rewards_train/margins": 16.84315824508667, "rewards_train/rejected": -18.204612731933594, "step": 4289 }, { "epoch": 2.11, "learning_rate": 2.2182011918224347e-07, "loss": 0.0, "step": 4290 }, { "epoch": 2.11, "logps_train/chosen": -83.2154541015625, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -320.86083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.815441370010376, "rewards_train/margins": 17.701892614364624, "rewards_train/rejected": -19.517333984375, "step": 4290 }, { "epoch": 2.11, "learning_rate": 2.2159465224115293e-07, "loss": 0.0, "step": 4291 }, { "epoch": 2.11, "logps_train/chosen": -76.92172241210938, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -308.155517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3452484607696533, "rewards_train/margins": 17.390080213546753, "rewards_train/rejected": -18.735328674316406, "step": 4291 }, { "epoch": 2.11, "learning_rate": 2.213692673201561e-07, "loss": 0.0, "step": 4292 }, { "epoch": 2.11, "logps_train/chosen": -80.37893676757812, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -321.94866943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.266262412071228, "rewards_train/margins": 18.109952569007874, "rewards_train/rejected": -19.3762149810791, "step": 4292 }, { "epoch": 2.11, "learning_rate": 2.2114396448565326e-07, "loss": 0.0, "step": 4293 }, { "epoch": 2.11, "logps_train/chosen": -77.49552154541016, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -314.33074951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.275479793548584, "rewards_train/margins": 17.827518939971924, "rewards_train/rejected": -19.102998733520508, "step": 4293 }, { "epoch": 2.11, "learning_rate": 2.2091874380402047e-07, "loss": 0.0, "step": 4294 }, { "epoch": 2.11, "logps_train/chosen": -80.6821060180664, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -327.2988586425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3077123165130615, "rewards_train/margins": 18.193805932998657, "rewards_train/rejected": -19.50151824951172, "step": 4294 }, { "epoch": 2.11, "learning_rate": 2.2069360534160863e-07, "loss": 0.0, "step": 4295 }, { "epoch": 2.11, "logps_train/chosen": -76.66874694824219, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -316.4171142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3070597648620605, "rewards_train/margins": 17.873466968536377, "rewards_train/rejected": -19.180526733398438, "step": 4295 }, { "epoch": 2.12, "learning_rate": 2.204685491647455e-07, "loss": 0.0, "step": 4296 }, { "epoch": 2.12, "logps_train/chosen": -76.69197082519531, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -307.7261657714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2722725868225098, "rewards_train/margins": 17.248929500579834, "rewards_train/rejected": -18.521202087402344, "step": 4296 }, { "epoch": 2.12, "learning_rate": 2.2024357533973397e-07, "loss": 0.0, "step": 4297 }, { "epoch": 2.12, "logps_train/chosen": -80.59678649902344, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -319.53131103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.54200279712677, "rewards_train/margins": 17.775383830070496, "rewards_train/rejected": -19.317386627197266, "step": 4297 }, { "epoch": 2.12, "learning_rate": 2.2001868393285273e-07, "loss": 0.0001, "step": 4298 }, { "epoch": 2.12, "logps_train/chosen": -76.40901947021484, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -312.77935791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.427254319190979, "rewards_train/margins": 17.46903932094574, "rewards_train/rejected": -18.89629364013672, "step": 4298 }, { "epoch": 2.12, "learning_rate": 2.1979387501035663e-07, "loss": 0.0, "step": 4299 }, { "epoch": 2.12, "logps_train/chosen": -75.15278625488281, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -308.33251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2684528827667236, "rewards_train/margins": 17.387118101119995, "rewards_train/rejected": -18.65557098388672, "step": 4299 }, { "epoch": 2.12, "learning_rate": 2.195691486384757e-07, "loss": 0.0001, "step": 4300 }, { "epoch": 2.12, "logps_train/chosen": -74.90628814697266, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -320.2776794433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.197709083557129, "rewards_train/margins": 18.01375102996826, "rewards_train/rejected": -19.21146011352539, "step": 4300 }, { "epoch": 2.12, "learning_rate": 2.1934450488341583e-07, "loss": 0.0, "step": 4301 }, { "epoch": 2.12, "logps_train/chosen": -74.15585327148438, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -307.02362060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2074307203292847, "rewards_train/margins": 17.375352025032043, "rewards_train/rejected": -18.582782745361328, "step": 4301 }, { "epoch": 2.12, "learning_rate": 2.191199438113585e-07, "loss": 0.0, "step": 4302 }, { "epoch": 2.12, "logps_train/chosen": -74.56825256347656, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -119.0, "logps_train/rejected": -299.4213562011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2913955450057983, "rewards_train/margins": 16.750299096107483, "rewards_train/rejected": -18.04169464111328, "step": 4302 }, { "epoch": 2.12, "learning_rate": 2.1889546548846116e-07, "loss": 0.0, "step": 4303 }, { "epoch": 2.12, "logps_train/chosen": -82.97666931152344, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -326.639892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.551914930343628, "rewards_train/margins": 17.869401216506958, "rewards_train/rejected": -19.421316146850586, "step": 4303 }, { "epoch": 2.12, "learning_rate": 2.1867106998085655e-07, "loss": 0.0, "step": 4304 }, { "epoch": 2.12, "logps_train/chosen": -82.53765869140625, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -324.7820129394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6743223667144775, "rewards_train/margins": 17.729856252670288, "rewards_train/rejected": -19.404178619384766, "step": 4304 }, { "epoch": 2.12, "learning_rate": 2.1844675735465284e-07, "loss": 0.0, "step": 4305 }, { "epoch": 2.12, "logps_train/chosen": -76.85099792480469, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -315.98114013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2453049421310425, "rewards_train/margins": 17.663844227790833, "rewards_train/rejected": -18.909149169921875, "step": 4305 }, { "epoch": 2.12, "learning_rate": 2.1822252767593453e-07, "loss": 0.0001, "step": 4306 }, { "epoch": 2.12, "logps_train/chosen": -77.90364837646484, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -316.60882568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4498381614685059, "rewards_train/margins": 17.721739292144775, "rewards_train/rejected": -19.17157745361328, "step": 4306 }, { "epoch": 2.12, "learning_rate": 2.1799838101076084e-07, "loss": 0.0, "step": 4307 }, { "epoch": 2.12, "logps_train/chosen": -85.26049041748047, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -329.0972900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8357173204421997, "rewards_train/margins": 18.186022639274597, "rewards_train/rejected": -20.021739959716797, "step": 4307 }, { "epoch": 2.12, "learning_rate": 2.1777431742516688e-07, "loss": 0.0, "step": 4308 }, { "epoch": 2.12, "logps_train/chosen": -78.78118896484375, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -305.40301513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.59364652633667, "rewards_train/margins": 16.787474155426025, "rewards_train/rejected": -18.381120681762695, "step": 4308 }, { "epoch": 2.12, "learning_rate": 2.1755033698516373e-07, "loss": 0.0, "step": 4309 }, { "epoch": 2.12, "logps_train/chosen": -81.96385192871094, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -311.4887390136719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6006336212158203, "rewards_train/margins": 17.10942268371582, "rewards_train/rejected": -18.71005630493164, "step": 4309 }, { "epoch": 2.12, "learning_rate": 2.173264397567368e-07, "loss": 0.0, "step": 4310 }, { "epoch": 2.12, "logps_train/chosen": -77.79303741455078, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -312.68585205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3322337865829468, "rewards_train/margins": 17.60002100467682, "rewards_train/rejected": -18.932254791259766, "step": 4310 }, { "epoch": 2.12, "learning_rate": 2.1710262580584838e-07, "loss": 0.0, "step": 4311 }, { "epoch": 2.12, "logps_train/chosen": -80.24049377441406, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -326.3394470214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4196546077728271, "rewards_train/margins": 18.16516900062561, "rewards_train/rejected": -19.584823608398438, "step": 4311 }, { "epoch": 2.12, "learning_rate": 2.1687889519843532e-07, "loss": 0.0, "step": 4312 }, { "epoch": 2.12, "logps_train/chosen": -76.14968872070312, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -322.546630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3345000743865967, "rewards_train/margins": 18.22846531867981, "rewards_train/rejected": -19.562965393066406, "step": 4312 }, { "epoch": 2.12, "learning_rate": 2.1665524800041013e-07, "loss": 0.0, "step": 4313 }, { "epoch": 2.12, "logps_train/chosen": -76.57384490966797, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -305.70098876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.287316083908081, "rewards_train/margins": 17.21901249885559, "rewards_train/rejected": -18.506328582763672, "step": 4313 }, { "epoch": 2.12, "learning_rate": 2.1643168427766066e-07, "loss": 0.0, "step": 4314 }, { "epoch": 2.12, "logps_train/chosen": -72.97107696533203, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -303.77996826171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1912975311279297, "rewards_train/margins": 17.194902420043945, "rewards_train/rejected": -18.386199951171875, "step": 4314 }, { "epoch": 2.12, "learning_rate": 2.1620820409605062e-07, "loss": 0.0002, "step": 4315 }, { "epoch": 2.12, "logps_train/chosen": -80.62648010253906, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -298.53350830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5908706188201904, "rewards_train/margins": 16.237090349197388, "rewards_train/rejected": -17.827960968017578, "step": 4315 }, { "epoch": 2.13, "learning_rate": 2.1598480752141863e-07, "loss": 0.0, "step": 4316 }, { "epoch": 2.13, "logps_train/chosen": -84.979248046875, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -317.3850402832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.01901912689209, "rewards_train/margins": 17.27983570098877, "rewards_train/rejected": -19.29885482788086, "step": 4316 }, { "epoch": 2.13, "learning_rate": 2.1576149461957865e-07, "loss": 0.0002, "step": 4317 }, { "epoch": 2.13, "logps_train/chosen": -78.62742614746094, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -311.22998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4698225259780884, "rewards_train/margins": 17.45327317714691, "rewards_train/rejected": -18.923095703125, "step": 4317 }, { "epoch": 2.13, "learning_rate": 2.155382654563205e-07, "loss": 0.0001, "step": 4318 }, { "epoch": 2.13, "logps_train/chosen": -79.63134002685547, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -313.4631042480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4055166244506836, "rewards_train/margins": 17.292500495910645, "rewards_train/rejected": -18.698017120361328, "step": 4318 }, { "epoch": 2.13, "learning_rate": 2.1531512009740877e-07, "loss": 0.0001, "step": 4319 }, { "epoch": 2.13, "logps_train/chosen": -77.25211334228516, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -322.0000305175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3076330423355103, "rewards_train/margins": 18.042859435081482, "rewards_train/rejected": -19.350492477416992, "step": 4319 }, { "epoch": 2.13, "learning_rate": 2.150920586085835e-07, "loss": 0.0, "step": 4320 }, { "epoch": 2.13, "logps_train/chosen": -79.01134490966797, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -318.7489013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6551871299743652, "rewards_train/margins": 17.811649799346924, "rewards_train/rejected": -19.46683692932129, "step": 4320 }, { "epoch": 2.13, "learning_rate": 2.1486908105556046e-07, "loss": 0.0, "step": 4321 }, { "epoch": 2.13, "logps_train/chosen": -75.60137939453125, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -322.708740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1564515829086304, "rewards_train/margins": 18.515251755714417, "rewards_train/rejected": -19.671703338623047, "step": 4321 }, { "epoch": 2.13, "learning_rate": 2.1464618750403012e-07, "loss": 0.0, "step": 4322 }, { "epoch": 2.13, "logps_train/chosen": -79.70826721191406, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -313.65594482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6086204051971436, "rewards_train/margins": 17.47777485847473, "rewards_train/rejected": -19.086395263671875, "step": 4322 }, { "epoch": 2.13, "learning_rate": 2.1442337801965843e-07, "loss": 0.0, "step": 4323 }, { "epoch": 2.13, "logps_train/chosen": -77.7225112915039, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -326.4363708496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1153669357299805, "rewards_train/margins": 18.27680492401123, "rewards_train/rejected": -19.39217185974121, "step": 4323 }, { "epoch": 2.13, "learning_rate": 2.142006526680864e-07, "loss": 0.0, "step": 4324 }, { "epoch": 2.13, "logps_train/chosen": -81.81802368164062, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -320.41766357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6417152881622314, "rewards_train/margins": 17.857178449630737, "rewards_train/rejected": -19.49889373779297, "step": 4324 }, { "epoch": 2.13, "learning_rate": 2.1397801151493078e-07, "loss": 0.0, "step": 4325 }, { "epoch": 2.13, "logps_train/chosen": -78.55726623535156, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -325.94598388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.297377109527588, "rewards_train/margins": 18.928571224212646, "rewards_train/rejected": -20.225948333740234, "step": 4325 }, { "epoch": 2.13, "learning_rate": 2.13755454625783e-07, "loss": 0.0, "step": 4326 }, { "epoch": 2.13, "logps_train/chosen": -77.46812438964844, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -312.437255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.245152235031128, "rewards_train/margins": 17.858485460281372, "rewards_train/rejected": -19.1036376953125, "step": 4326 }, { "epoch": 2.13, "learning_rate": 2.1353298206620957e-07, "loss": 0.0002, "step": 4327 }, { "epoch": 2.13, "logps_train/chosen": -73.57453155517578, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -317.3847961425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2140933275222778, "rewards_train/margins": 18.06066405773163, "rewards_train/rejected": -19.274757385253906, "step": 4327 }, { "epoch": 2.13, "learning_rate": 2.1331059390175305e-07, "loss": 0.0, "step": 4328 }, { "epoch": 2.13, "logps_train/chosen": -81.59056091308594, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -322.25469970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4673081636428833, "rewards_train/margins": 17.94136416912079, "rewards_train/rejected": -19.408672332763672, "step": 4328 }, { "epoch": 2.13, "learning_rate": 2.1308829019792968e-07, "loss": 0.0, "step": 4329 }, { "epoch": 2.13, "logps_train/chosen": -75.78309631347656, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -313.86944580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1576311588287354, "rewards_train/margins": 18.24855399131775, "rewards_train/rejected": -19.406185150146484, "step": 4329 }, { "epoch": 2.13, "learning_rate": 2.1286607102023203e-07, "loss": 0.0001, "step": 4330 }, { "epoch": 2.13, "logps_train/chosen": -77.21966552734375, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -314.7901916503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3509221076965332, "rewards_train/margins": 17.70671033859253, "rewards_train/rejected": -19.057632446289062, "step": 4330 }, { "epoch": 2.13, "learning_rate": 2.1264393643412775e-07, "loss": 0.0, "step": 4331 }, { "epoch": 2.13, "logps_train/chosen": -80.63148498535156, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -133.0, "logps_train/rejected": -332.3311767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4081684350967407, "rewards_train/margins": 18.52319371700287, "rewards_train/rejected": -19.93136215209961, "step": 4331 }, { "epoch": 2.13, "learning_rate": 2.1242188650505843e-07, "loss": 0.0001, "step": 4332 }, { "epoch": 2.13, "logps_train/chosen": -73.47393798828125, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -318.61737060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.063629150390625, "rewards_train/margins": 18.34844970703125, "rewards_train/rejected": -19.412078857421875, "step": 4332 }, { "epoch": 2.13, "learning_rate": 2.1219992129844207e-07, "loss": 0.0, "step": 4333 }, { "epoch": 2.13, "logps_train/chosen": -77.22084045410156, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -311.41473388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4484999179840088, "rewards_train/margins": 17.5002019405365, "rewards_train/rejected": -18.948701858520508, "step": 4333 }, { "epoch": 2.13, "learning_rate": 2.1197804087967085e-07, "loss": 0.0, "step": 4334 }, { "epoch": 2.13, "logps_train/chosen": -81.62054443359375, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -329.98040771484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5126402378082275, "rewards_train/margins": 18.4286630153656, "rewards_train/rejected": -19.941303253173828, "step": 4334 }, { "epoch": 2.13, "learning_rate": 2.1175624531411213e-07, "loss": 0.0004, "step": 4335 }, { "epoch": 2.13, "logps_train/chosen": -77.45549774169922, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -315.0566711425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4786070585250854, "rewards_train/margins": 17.359970211982727, "rewards_train/rejected": -18.838577270507812, "step": 4335 }, { "epoch": 2.13, "learning_rate": 2.1153453466710875e-07, "loss": 0.0, "step": 4336 }, { "epoch": 2.13, "logps_train/chosen": -78.35702514648438, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -319.1220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4865329265594482, "rewards_train/margins": 17.838563203811646, "rewards_train/rejected": -19.325096130371094, "step": 4336 }, { "epoch": 2.14, "learning_rate": 2.113129090039779e-07, "loss": 0.0, "step": 4337 }, { "epoch": 2.14, "logps_train/chosen": -79.97003173828125, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -318.1479187011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.32966947555542, "rewards_train/margins": 17.969400882720947, "rewards_train/rejected": -19.299070358276367, "step": 4337 }, { "epoch": 2.14, "learning_rate": 2.1109136839001206e-07, "loss": 0.0, "step": 4338 }, { "epoch": 2.14, "logps_train/chosen": -81.79629516601562, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -322.8182373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4679838418960571, "rewards_train/margins": 17.9715074300766, "rewards_train/rejected": -19.439491271972656, "step": 4338 }, { "epoch": 2.14, "learning_rate": 2.1086991289047839e-07, "loss": 0.0003, "step": 4339 }, { "epoch": 2.14, "logps_train/chosen": -78.99891662597656, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -303.45550537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3865127563476562, "rewards_train/margins": 16.767438888549805, "rewards_train/rejected": -18.15395164489746, "step": 4339 }, { "epoch": 2.14, "learning_rate": 2.106485425706196e-07, "loss": 0.0, "step": 4340 }, { "epoch": 2.14, "logps_train/chosen": -73.36853790283203, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -313.07086181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8981817960739136, "rewards_train/margins": 18.121357083320618, "rewards_train/rejected": -19.01953887939453, "step": 4340 }, { "epoch": 2.14, "learning_rate": 2.104272574956526e-07, "loss": 0.0, "step": 4341 }, { "epoch": 2.14, "logps_train/chosen": -74.1888656616211, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -311.2262268066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0301172733306885, "rewards_train/margins": 18.071704626083374, "rewards_train/rejected": -19.101821899414062, "step": 4341 }, { "epoch": 2.14, "learning_rate": 2.1020605773076933e-07, "loss": 0.0, "step": 4342 }, { "epoch": 2.14, "logps_train/chosen": -73.89666748046875, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -312.21417236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9560732841491699, "rewards_train/margins": 17.65045404434204, "rewards_train/rejected": -18.60652732849121, "step": 4342 }, { "epoch": 2.14, "learning_rate": 2.0998494334113732e-07, "loss": 0.0, "step": 4343 }, { "epoch": 2.14, "logps_train/chosen": -83.07209777832031, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -311.8759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8587238788604736, "rewards_train/margins": 17.085758924484253, "rewards_train/rejected": -18.944482803344727, "step": 4343 }, { "epoch": 2.14, "learning_rate": 2.0976391439189756e-07, "loss": 0.0, "step": 4344 }, { "epoch": 2.14, "logps_train/chosen": -79.98295593261719, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -325.83770751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.372490644454956, "rewards_train/margins": 18.198390245437622, "rewards_train/rejected": -19.570880889892578, "step": 4344 }, { "epoch": 2.14, "learning_rate": 2.0954297094816702e-07, "loss": 0.0, "step": 4345 }, { "epoch": 2.14, "logps_train/chosen": -79.63218688964844, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -305.039306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5486679077148438, "rewards_train/margins": 16.678260803222656, "rewards_train/rejected": -18.2269287109375, "step": 4345 }, { "epoch": 2.14, "learning_rate": 2.093221130750376e-07, "loss": 0.0001, "step": 4346 }, { "epoch": 2.14, "logps_train/chosen": -74.89126586914062, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -317.76324462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.07560133934021, "rewards_train/margins": 18.11654496192932, "rewards_train/rejected": -19.19214630126953, "step": 4346 }, { "epoch": 2.14, "learning_rate": 2.091013408375747e-07, "loss": 0.0, "step": 4347 }, { "epoch": 2.14, "logps_train/chosen": -75.06564331054688, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -317.41937255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0724337100982666, "rewards_train/margins": 18.210227727890015, "rewards_train/rejected": -19.28266143798828, "step": 4347 }, { "epoch": 2.14, "learning_rate": 2.088806543008199e-07, "loss": 0.0, "step": 4348 }, { "epoch": 2.14, "logps_train/chosen": -80.11327362060547, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -301.97454833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.357323169708252, "rewards_train/margins": 16.72093915939331, "rewards_train/rejected": -18.078262329101562, "step": 4348 }, { "epoch": 2.14, "learning_rate": 2.0866005352978872e-07, "loss": 0.0003, "step": 4349 }, { "epoch": 2.14, "logps_train/chosen": -79.22319030761719, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -321.8414306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5933151245117188, "rewards_train/margins": 18.139705657958984, "rewards_train/rejected": -19.733020782470703, "step": 4349 }, { "epoch": 2.14, "learning_rate": 2.0843953858947172e-07, "loss": 0.0, "step": 4350 }, { "epoch": 2.14, "logps_train/chosen": -81.22106170654297, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -324.98095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4299187660217285, "rewards_train/margins": 18.101476192474365, "rewards_train/rejected": -19.531394958496094, "step": 4350 }, { "epoch": 2.14, "learning_rate": 2.082191095448338e-07, "loss": 0.0, "step": 4351 }, { "epoch": 2.14, "logps_train/chosen": -77.29342651367188, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -322.03192138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2506803274154663, "rewards_train/margins": 18.13747465610504, "rewards_train/rejected": -19.388154983520508, "step": 4351 }, { "epoch": 2.14, "learning_rate": 2.0799876646081526e-07, "loss": 0.0, "step": 4352 }, { "epoch": 2.14, "logps_train/chosen": -81.23490905761719, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -324.02679443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.408402681350708, "rewards_train/margins": 18.152090787887573, "rewards_train/rejected": -19.56049346923828, "step": 4352 }, { "epoch": 2.14, "learning_rate": 2.077785094023305e-07, "loss": 0.0, "step": 4353 }, { "epoch": 2.14, "logps_train/chosen": -80.89553833007812, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -316.6053466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.595852255821228, "rewards_train/margins": 17.67430078983307, "rewards_train/rejected": -19.270153045654297, "step": 4353 }, { "epoch": 2.14, "learning_rate": 2.0755833843426852e-07, "loss": 0.0003, "step": 4354 }, { "epoch": 2.14, "logps_train/chosen": -72.74263000488281, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -306.34234619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0560015439987183, "rewards_train/margins": 17.322620511054993, "rewards_train/rejected": -18.37862205505371, "step": 4354 }, { "epoch": 2.14, "learning_rate": 2.0733825362149353e-07, "loss": 0.0, "step": 4355 }, { "epoch": 2.14, "logps_train/chosen": -77.87724304199219, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -320.88800048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1776174306869507, "rewards_train/margins": 18.282274842262268, "rewards_train/rejected": -19.45989227294922, "step": 4355 }, { "epoch": 2.14, "learning_rate": 2.0711825502884384e-07, "loss": 0.0, "step": 4356 }, { "epoch": 2.14, "logps_train/chosen": -74.49401092529297, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -310.0821533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2297968864440918, "rewards_train/margins": 17.68388605117798, "rewards_train/rejected": -18.91368293762207, "step": 4356 }, { "epoch": 2.15, "learning_rate": 2.0689834272113233e-07, "loss": 0.0001, "step": 4357 }, { "epoch": 2.15, "logps_train/chosen": -77.90745544433594, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -308.6358642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2741929292678833, "rewards_train/margins": 17.26649272441864, "rewards_train/rejected": -18.540685653686523, "step": 4357 }, { "epoch": 2.15, "learning_rate": 2.0667851676314702e-07, "loss": 0.0, "step": 4358 }, { "epoch": 2.15, "logps_train/chosen": -82.81422424316406, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -315.7724304199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8135513067245483, "rewards_train/margins": 17.270917534828186, "rewards_train/rejected": -19.084468841552734, "step": 4358 }, { "epoch": 2.15, "learning_rate": 2.0645877721964993e-07, "loss": 0.0001, "step": 4359 }, { "epoch": 2.15, "logps_train/chosen": -79.83724975585938, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -315.59930419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4484713077545166, "rewards_train/margins": 17.501399755477905, "rewards_train/rejected": -18.949871063232422, "step": 4359 }, { "epoch": 2.15, "learning_rate": 2.0623912415537787e-07, "loss": 0.0001, "step": 4360 }, { "epoch": 2.15, "logps_train/chosen": -79.72225952148438, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -332.64990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4055752754211426, "rewards_train/margins": 18.690375804901123, "rewards_train/rejected": -20.095951080322266, "step": 4360 }, { "epoch": 2.15, "learning_rate": 2.0601955763504204e-07, "loss": 0.0, "step": 4361 }, { "epoch": 2.15, "logps_train/chosen": -83.85401153564453, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -313.2504577636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8072277307510376, "rewards_train/margins": 17.05263364315033, "rewards_train/rejected": -18.859861373901367, "step": 4361 }, { "epoch": 2.15, "learning_rate": 2.05800077723328e-07, "loss": 0.0, "step": 4362 }, { "epoch": 2.15, "logps_train/chosen": -79.15400695800781, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -326.681884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5767292976379395, "rewards_train/margins": 18.33061933517456, "rewards_train/rejected": -19.9073486328125, "step": 4362 }, { "epoch": 2.15, "learning_rate": 2.0558068448489646e-07, "loss": 0.0, "step": 4363 }, { "epoch": 2.15, "logps_train/chosen": -84.46697998046875, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -314.7376708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0602962970733643, "rewards_train/margins": 17.11474108695984, "rewards_train/rejected": -19.175037384033203, "step": 4363 }, { "epoch": 2.15, "learning_rate": 2.0536137798438196e-07, "loss": 0.0, "step": 4364 }, { "epoch": 2.15, "logps_train/chosen": -75.00529479980469, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -312.8926696777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.256901741027832, "rewards_train/margins": 17.798232078552246, "rewards_train/rejected": -19.055133819580078, "step": 4364 }, { "epoch": 2.15, "learning_rate": 2.0514215828639363e-07, "loss": 0.0006, "step": 4365 }, { "epoch": 2.15, "logps_train/chosen": -81.42595672607422, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -329.845947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5762139558792114, "rewards_train/margins": 18.427913546562195, "rewards_train/rejected": -20.004127502441406, "step": 4365 }, { "epoch": 2.15, "learning_rate": 2.04923025455515e-07, "loss": 0.0, "step": 4366 }, { "epoch": 2.15, "logps_train/chosen": -79.19412231445312, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -315.8305358886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5259549617767334, "rewards_train/margins": 17.699093103408813, "rewards_train/rejected": -19.225048065185547, "step": 4366 }, { "epoch": 2.15, "learning_rate": 2.0470397955630425e-07, "loss": 0.0, "step": 4367 }, { "epoch": 2.15, "logps_train/chosen": -79.68753814697266, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -325.37042236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4537148475646973, "rewards_train/margins": 18.38782262802124, "rewards_train/rejected": -19.841537475585938, "step": 4367 }, { "epoch": 2.15, "learning_rate": 2.0448502065329382e-07, "loss": 0.0001, "step": 4368 }, { "epoch": 2.15, "logps_train/chosen": -74.74329376220703, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -308.53326416015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2163699865341187, "rewards_train/margins": 17.446332573890686, "rewards_train/rejected": -18.662702560424805, "step": 4368 }, { "epoch": 2.15, "learning_rate": 2.0426614881099007e-07, "loss": 0.0003, "step": 4369 }, { "epoch": 2.15, "logps_train/chosen": -75.74740600585938, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -315.41607666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1690280437469482, "rewards_train/margins": 17.898946046829224, "rewards_train/rejected": -19.067974090576172, "step": 4369 }, { "epoch": 2.15, "learning_rate": 2.0404736409387467e-07, "loss": 0.0, "step": 4370 }, { "epoch": 2.15, "logps_train/chosen": -75.97258758544922, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -317.9624328613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3065357208251953, "rewards_train/margins": 18.096298217773438, "rewards_train/rejected": -19.402833938598633, "step": 4370 }, { "epoch": 2.15, "learning_rate": 2.0382866656640286e-07, "loss": 0.0, "step": 4371 }, { "epoch": 2.15, "logps_train/chosen": -83.20586395263672, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -335.63958740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6945610046386719, "rewards_train/margins": 18.77193832397461, "rewards_train/rejected": -20.46649932861328, "step": 4371 }, { "epoch": 2.15, "learning_rate": 2.0361005629300415e-07, "loss": 0.0001, "step": 4372 }, { "epoch": 2.15, "logps_train/chosen": -80.5938949584961, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -323.43963623046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7625148296356201, "rewards_train/margins": 18.162993669509888, "rewards_train/rejected": -19.925508499145508, "step": 4372 }, { "epoch": 2.15, "learning_rate": 2.03391533338083e-07, "loss": 0.0, "step": 4373 }, { "epoch": 2.15, "logps_train/chosen": -82.3486328125, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -329.31695556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5757813453674316, "rewards_train/margins": 18.313092708587646, "rewards_train/rejected": -19.888874053955078, "step": 4373 }, { "epoch": 2.15, "learning_rate": 2.031730977660176e-07, "loss": 0.0, "step": 4374 }, { "epoch": 2.15, "logps_train/chosen": -77.51455688476562, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -319.2598876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2335848808288574, "rewards_train/margins": 17.949142932891846, "rewards_train/rejected": -19.182727813720703, "step": 4374 }, { "epoch": 2.15, "learning_rate": 2.0295474964116045e-07, "loss": 0.0, "step": 4375 }, { "epoch": 2.15, "logps_train/chosen": -80.08650207519531, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -328.1322937011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.459822177886963, "rewards_train/margins": 18.167762279510498, "rewards_train/rejected": -19.62758445739746, "step": 4375 }, { "epoch": 2.15, "learning_rate": 2.0273648902783834e-07, "loss": 0.0, "step": 4376 }, { "epoch": 2.15, "logps_train/chosen": -72.41133880615234, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -317.49896240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.137203335762024, "rewards_train/margins": 18.196042895317078, "rewards_train/rejected": -19.3332462310791, "step": 4376 }, { "epoch": 2.16, "learning_rate": 2.0251831599035258e-07, "loss": 0.0, "step": 4377 }, { "epoch": 2.16, "logps_train/chosen": -79.75765991210938, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -324.7411804199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6281580924987793, "rewards_train/margins": 18.22867441177368, "rewards_train/rejected": -19.85683250427246, "step": 4377 }, { "epoch": 2.16, "learning_rate": 2.0230023059297824e-07, "loss": 0.0, "step": 4378 }, { "epoch": 2.16, "logps_train/chosen": -73.3383560180664, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -317.13201904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8024632334709167, "rewards_train/margins": 18.3216250538826, "rewards_train/rejected": -19.124088287353516, "step": 4378 }, { "epoch": 2.16, "learning_rate": 2.020822328999646e-07, "loss": 0.0002, "step": 4379 }, { "epoch": 2.16, "logps_train/chosen": -77.28277587890625, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -304.8135986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5002262592315674, "rewards_train/margins": 16.87917971611023, "rewards_train/rejected": -18.379405975341797, "step": 4379 }, { "epoch": 2.16, "learning_rate": 2.018643229755358e-07, "loss": 0.0, "step": 4380 }, { "epoch": 2.16, "logps_train/chosen": -81.70841979980469, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -322.7583923339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.583292841911316, "rewards_train/margins": 17.72320854663849, "rewards_train/rejected": -19.306501388549805, "step": 4380 }, { "epoch": 2.16, "learning_rate": 2.0164650088388885e-07, "loss": 0.0, "step": 4381 }, { "epoch": 2.16, "logps_train/chosen": -83.22311401367188, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -328.532470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6807582378387451, "rewards_train/margins": 18.362332582473755, "rewards_train/rejected": -20.0430908203125, "step": 4381 }, { "epoch": 2.16, "learning_rate": 2.0142876668919595e-07, "loss": 0.0, "step": 4382 }, { "epoch": 2.16, "logps_train/chosen": -71.40477752685547, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -308.98974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0032223463058472, "rewards_train/margins": 17.6168452501297, "rewards_train/rejected": -18.620067596435547, "step": 4382 }, { "epoch": 2.16, "learning_rate": 2.0121112045560346e-07, "loss": 0.0, "step": 4383 }, { "epoch": 2.16, "logps_train/chosen": -73.57807922363281, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -308.8980712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2548301219940186, "rewards_train/margins": 17.411444902420044, "rewards_train/rejected": -18.666275024414062, "step": 4383 }, { "epoch": 2.16, "learning_rate": 2.0099356224723063e-07, "loss": 0.0, "step": 4384 }, { "epoch": 2.16, "logps_train/chosen": -78.3393325805664, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -308.3614196777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4711159467697144, "rewards_train/margins": 17.08851158618927, "rewards_train/rejected": -18.559627532958984, "step": 4384 }, { "epoch": 2.16, "learning_rate": 2.007760921281722e-07, "loss": 0.0, "step": 4385 }, { "epoch": 2.16, "logps_train/chosen": -79.1506576538086, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -321.840087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3077412843704224, "rewards_train/margins": 18.04989993572235, "rewards_train/rejected": -19.357641220092773, "step": 4385 }, { "epoch": 2.16, "learning_rate": 2.005587101624961e-07, "loss": 0.0, "step": 4386 }, { "epoch": 2.16, "logps_train/chosen": -79.70993041992188, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -322.52545166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4285857677459717, "rewards_train/margins": 18.16443943977356, "rewards_train/rejected": -19.59302520751953, "step": 4386 }, { "epoch": 2.16, "learning_rate": 2.0034141641424436e-07, "loss": 0.0, "step": 4387 }, { "epoch": 2.16, "logps_train/chosen": -76.27978515625, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -318.4506530761719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.151513695716858, "rewards_train/margins": 17.957516074180603, "rewards_train/rejected": -19.10902976989746, "step": 4387 }, { "epoch": 2.16, "learning_rate": 2.0012421094743353e-07, "loss": 0.0, "step": 4388 }, { "epoch": 2.16, "logps_train/chosen": -78.61590576171875, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -316.6480407714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.416473388671875, "rewards_train/margins": 17.546476364135742, "rewards_train/rejected": -18.962949752807617, "step": 4388 }, { "epoch": 2.16, "learning_rate": 1.999070938260537e-07, "loss": 0.0, "step": 4389 }, { "epoch": 2.16, "logps_train/chosen": -78.05403137207031, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -308.181396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4436358213424683, "rewards_train/margins": 17.32308781147003, "rewards_train/rejected": -18.7667236328125, "step": 4389 }, { "epoch": 2.16, "learning_rate": 1.9969006511406893e-07, "loss": 0.0001, "step": 4390 }, { "epoch": 2.16, "logps_train/chosen": -78.557861328125, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -327.9580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5471923351287842, "rewards_train/margins": 18.479758501052856, "rewards_train/rejected": -20.02695083618164, "step": 4390 }, { "epoch": 2.16, "learning_rate": 1.994731248754173e-07, "loss": 0.0, "step": 4391 }, { "epoch": 2.16, "logps_train/chosen": -76.57327270507812, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -318.29632568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3658723831176758, "rewards_train/margins": 17.95761013031006, "rewards_train/rejected": -19.323482513427734, "step": 4391 }, { "epoch": 2.16, "learning_rate": 1.9925627317401128e-07, "loss": 0.0, "step": 4392 }, { "epoch": 2.16, "logps_train/chosen": -79.36677551269531, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -315.71923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4931720495224, "rewards_train/margins": 17.542230248451233, "rewards_train/rejected": -19.035402297973633, "step": 4392 }, { "epoch": 2.16, "learning_rate": 1.9903951007373616e-07, "loss": 0.0001, "step": 4393 }, { "epoch": 2.16, "logps_train/chosen": -77.99009704589844, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -316.4235534667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2735698223114014, "rewards_train/margins": 17.7722065448761, "rewards_train/rejected": -19.0457763671875, "step": 4393 }, { "epoch": 2.16, "learning_rate": 1.9882283563845216e-07, "loss": 0.0005, "step": 4394 }, { "epoch": 2.16, "logps_train/chosen": -80.07131958007812, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -319.86968994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5379667282104492, "rewards_train/margins": 17.517361640930176, "rewards_train/rejected": -19.055328369140625, "step": 4394 }, { "epoch": 2.16, "learning_rate": 1.9860624993199342e-07, "loss": 0.0001, "step": 4395 }, { "epoch": 2.16, "logps_train/chosen": -78.7777099609375, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -318.08935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4369993209838867, "rewards_train/margins": 17.659436225891113, "rewards_train/rejected": -19.096435546875, "step": 4395 }, { "epoch": 2.16, "learning_rate": 1.9838975301816684e-07, "loss": 0.0, "step": 4396 }, { "epoch": 2.16, "logps_train/chosen": -75.61477661132812, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -308.323486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.357645034790039, "rewards_train/margins": 17.278024673461914, "rewards_train/rejected": -18.635669708251953, "step": 4396 }, { "epoch": 2.16, "learning_rate": 1.9817334496075444e-07, "loss": 0.0002, "step": 4397 }, { "epoch": 2.16, "logps_train/chosen": -77.18589782714844, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -316.4579772949219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.316465139389038, "rewards_train/margins": 17.863123178482056, "rewards_train/rejected": -19.179588317871094, "step": 4397 }, { "epoch": 2.17, "learning_rate": 1.979570258235112e-07, "loss": 0.0, "step": 4398 }, { "epoch": 2.17, "logps_train/chosen": -82.12614440917969, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -322.421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6405439376831055, "rewards_train/margins": 17.78406810760498, "rewards_train/rejected": -19.424612045288086, "step": 4398 }, { "epoch": 2.17, "learning_rate": 1.977407956701661e-07, "loss": 0.0, "step": 4399 }, { "epoch": 2.17, "logps_train/chosen": -77.4676513671875, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -316.45355224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.293981909751892, "rewards_train/margins": 17.80640184879303, "rewards_train/rejected": -19.100383758544922, "step": 4399 }, { "epoch": 2.17, "learning_rate": 1.9752465456442234e-07, "loss": 0.0, "step": 4400 }, { "epoch": 2.17, "logps_train/chosen": -79.04415130615234, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -332.50048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4343470335006714, "rewards_train/margins": 18.656328320503235, "rewards_train/rejected": -20.090675354003906, "step": 4400 }, { "epoch": 2.17, "learning_rate": 1.973086025699564e-07, "loss": 0.0, "step": 4401 }, { "epoch": 2.17, "logps_train/chosen": -79.1539306640625, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -324.46014404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4649535417556763, "rewards_train/margins": 18.046784281730652, "rewards_train/rejected": -19.511737823486328, "step": 4401 }, { "epoch": 2.17, "learning_rate": 1.9709263975041867e-07, "loss": 0.0, "step": 4402 }, { "epoch": 2.17, "logps_train/chosen": -77.88663482666016, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -331.54937744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3151772022247314, "rewards_train/margins": 18.797573804855347, "rewards_train/rejected": -20.112751007080078, "step": 4402 }, { "epoch": 2.17, "learning_rate": 1.96876766169433e-07, "loss": 0.0, "step": 4403 }, { "epoch": 2.17, "logps_train/chosen": -81.57608032226562, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -320.1474304199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6627837419509888, "rewards_train/margins": 17.620562434196472, "rewards_train/rejected": -19.28334617614746, "step": 4403 }, { "epoch": 2.17, "learning_rate": 1.9666098189059755e-07, "loss": 0.0, "step": 4404 }, { "epoch": 2.17, "logps_train/chosen": -77.69473266601562, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -320.51214599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2913239002227783, "rewards_train/margins": 18.174248456954956, "rewards_train/rejected": -19.465572357177734, "step": 4404 }, { "epoch": 2.17, "learning_rate": 1.9644528697748374e-07, "loss": 0.0, "step": 4405 }, { "epoch": 2.17, "logps_train/chosen": -83.4582748413086, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -332.6119384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.533132553100586, "rewards_train/margins": 18.663801193237305, "rewards_train/rejected": -20.19693374633789, "step": 4405 }, { "epoch": 2.17, "learning_rate": 1.9622968149363655e-07, "loss": 0.0, "step": 4406 }, { "epoch": 2.17, "logps_train/chosen": -74.0029067993164, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -305.18389892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.155564308166504, "rewards_train/margins": 17.242758750915527, "rewards_train/rejected": -18.39832305908203, "step": 4406 }, { "epoch": 2.17, "learning_rate": 1.960141655025751e-07, "loss": 0.0, "step": 4407 }, { "epoch": 2.17, "logps_train/chosen": -78.95087432861328, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -312.1173095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6047066450119019, "rewards_train/margins": 17.344327092170715, "rewards_train/rejected": -18.949033737182617, "step": 4407 }, { "epoch": 2.17, "learning_rate": 1.957987390677917e-07, "loss": 0.0, "step": 4408 }, { "epoch": 2.17, "logps_train/chosen": -77.4466323852539, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -301.5588684082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3107280731201172, "rewards_train/margins": 16.830997467041016, "rewards_train/rejected": -18.141725540161133, "step": 4408 }, { "epoch": 2.17, "learning_rate": 1.9558340225275232e-07, "loss": 0.0, "step": 4409 }, { "epoch": 2.17, "logps_train/chosen": -80.67365264892578, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -321.22137451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.773810625076294, "rewards_train/margins": 18.010777235031128, "rewards_train/rejected": -19.784587860107422, "step": 4409 }, { "epoch": 2.17, "learning_rate": 1.953681551208971e-07, "loss": 0.0, "step": 4410 }, { "epoch": 2.17, "logps_train/chosen": -75.17359924316406, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -322.85284423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1021262407302856, "rewards_train/margins": 18.51176917552948, "rewards_train/rejected": -19.613895416259766, "step": 4410 }, { "epoch": 2.17, "learning_rate": 1.951529977356386e-07, "loss": 0.0, "step": 4411 }, { "epoch": 2.17, "logps_train/chosen": -82.18205261230469, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -326.00323486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.632218599319458, "rewards_train/margins": 18.538225889205933, "rewards_train/rejected": -20.17044448852539, "step": 4411 }, { "epoch": 2.17, "learning_rate": 1.949379301603642e-07, "loss": 0.0, "step": 4412 }, { "epoch": 2.17, "logps_train/chosen": -77.06794738769531, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -306.3419189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1889231204986572, "rewards_train/margins": 17.26675581932068, "rewards_train/rejected": -18.455678939819336, "step": 4412 }, { "epoch": 2.17, "learning_rate": 1.9472295245843406e-07, "loss": 0.0, "step": 4413 }, { "epoch": 2.17, "logps_train/chosen": -77.6495361328125, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -331.59381103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3220332860946655, "rewards_train/margins": 19.058831810951233, "rewards_train/rejected": -20.3808650970459, "step": 4413 }, { "epoch": 2.17, "learning_rate": 1.9450806469318188e-07, "loss": 0.0, "step": 4414 }, { "epoch": 2.17, "logps_train/chosen": -74.98782348632812, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -317.1232604980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0046418905258179, "rewards_train/margins": 18.29801619052887, "rewards_train/rejected": -19.302658081054688, "step": 4414 }, { "epoch": 2.17, "learning_rate": 1.9429326692791537e-07, "loss": 0.0, "step": 4415 }, { "epoch": 2.17, "logps_train/chosen": -84.63140869140625, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -338.285400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.060797691345215, "rewards_train/margins": 18.939126014709473, "rewards_train/rejected": -20.999923706054688, "step": 4415 }, { "epoch": 2.17, "learning_rate": 1.9407855922591532e-07, "loss": 0.0, "step": 4416 }, { "epoch": 2.17, "logps_train/chosen": -80.77230072021484, "logps_train/ref_chosen": -61.59375, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -318.0877685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9189298152923584, "rewards_train/margins": 17.370655298233032, "rewards_train/rejected": -19.28958511352539, "step": 4416 }, { "epoch": 2.17, "learning_rate": 1.9386394165043596e-07, "loss": 0.0, "step": 4417 }, { "epoch": 2.17, "logps_train/chosen": -79.04905700683594, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -330.0732116699219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3980945348739624, "rewards_train/margins": 18.846530556678772, "rewards_train/rejected": -20.244625091552734, "step": 4417 }, { "epoch": 2.18, "learning_rate": 1.9364941426470498e-07, "loss": 0.0, "step": 4418 }, { "epoch": 2.18, "logps_train/chosen": -75.83296203613281, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -317.6505432128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1999956369400024, "rewards_train/margins": 17.901925444602966, "rewards_train/rejected": -19.10192108154297, "step": 4418 }, { "epoch": 2.18, "learning_rate": 1.9343497713192385e-07, "loss": 0.0, "step": 4419 }, { "epoch": 2.18, "logps_train/chosen": -79.5705795288086, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -328.61370849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5279078483581543, "rewards_train/margins": 18.54889154434204, "rewards_train/rejected": -20.076799392700195, "step": 4419 }, { "epoch": 2.18, "learning_rate": 1.9322063031526719e-07, "loss": 0.0, "step": 4420 }, { "epoch": 2.18, "logps_train/chosen": -79.4593505859375, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -315.5523681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.48626708984375, "rewards_train/margins": 17.768579483032227, "rewards_train/rejected": -19.254846572875977, "step": 4420 }, { "epoch": 2.18, "learning_rate": 1.9300637387788266e-07, "loss": 0.0, "step": 4421 }, { "epoch": 2.18, "logps_train/chosen": -84.00296020507812, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -320.486083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7999546527862549, "rewards_train/margins": 17.307541608810425, "rewards_train/rejected": -19.10749626159668, "step": 4421 }, { "epoch": 2.18, "learning_rate": 1.9279220788289207e-07, "loss": 0.0, "step": 4422 }, { "epoch": 2.18, "logps_train/chosen": -71.42985534667969, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -311.28619384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9330977201461792, "rewards_train/margins": 17.965980648994446, "rewards_train/rejected": -18.899078369140625, "step": 4422 }, { "epoch": 2.18, "learning_rate": 1.9257813239339005e-07, "loss": 0.0, "step": 4423 }, { "epoch": 2.18, "logps_train/chosen": -74.98381042480469, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -309.26507568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0839765071868896, "rewards_train/margins": 17.27505135536194, "rewards_train/rejected": -18.359027862548828, "step": 4423 }, { "epoch": 2.18, "learning_rate": 1.9236414747244445e-07, "loss": 0.0, "step": 4424 }, { "epoch": 2.18, "logps_train/chosen": -74.44317626953125, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -302.63348388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.119756817817688, "rewards_train/margins": 17.07391345500946, "rewards_train/rejected": -18.19367027282715, "step": 4424 }, { "epoch": 2.18, "learning_rate": 1.9215025318309703e-07, "loss": 0.0, "step": 4425 }, { "epoch": 2.18, "logps_train/chosen": -81.80352020263672, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -328.14935302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5773248672485352, "rewards_train/margins": 18.1892671585083, "rewards_train/rejected": -19.766592025756836, "step": 4425 }, { "epoch": 2.18, "learning_rate": 1.919364495883623e-07, "loss": 0.0, "step": 4426 }, { "epoch": 2.18, "logps_train/chosen": -76.88887786865234, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -308.2845458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3165242671966553, "rewards_train/margins": 17.344743967056274, "rewards_train/rejected": -18.66126823425293, "step": 4426 }, { "epoch": 2.18, "learning_rate": 1.917227367512283e-07, "loss": 0.0, "step": 4427 }, { "epoch": 2.18, "logps_train/chosen": -75.26187133789062, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -317.07568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2502102851867676, "rewards_train/margins": 17.951499462127686, "rewards_train/rejected": -19.201709747314453, "step": 4427 }, { "epoch": 2.18, "learning_rate": 1.9150911473465604e-07, "loss": 0.0, "step": 4428 }, { "epoch": 2.18, "logps_train/chosen": -79.8095703125, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -324.85333251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4053703546524048, "rewards_train/margins": 18.269514441490173, "rewards_train/rejected": -19.674884796142578, "step": 4428 }, { "epoch": 2.18, "learning_rate": 1.9129558360158055e-07, "loss": 0.0001, "step": 4429 }, { "epoch": 2.18, "logps_train/chosen": -73.49122619628906, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -302.40234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0228044986724854, "rewards_train/margins": 16.94067358970642, "rewards_train/rejected": -17.963478088378906, "step": 4429 }, { "epoch": 2.18, "learning_rate": 1.9108214341490885e-07, "loss": 0.0004, "step": 4430 }, { "epoch": 2.18, "logps_train/chosen": -76.65327453613281, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -325.052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1664254665374756, "rewards_train/margins": 18.753109216690063, "rewards_train/rejected": -19.91953468322754, "step": 4430 }, { "epoch": 2.18, "learning_rate": 1.9086879423752218e-07, "loss": 0.0, "step": 4431 }, { "epoch": 2.18, "logps_train/chosen": -77.09693145751953, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -133.75, "logps_train/rejected": -337.2737121582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.287036657333374, "rewards_train/margins": 19.065529108047485, "rewards_train/rejected": -20.35256576538086, "step": 4431 }, { "epoch": 2.18, "learning_rate": 1.9065553613227504e-07, "loss": 0.0, "step": 4432 }, { "epoch": 2.18, "logps_train/chosen": -78.83677673339844, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -317.068115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4463239908218384, "rewards_train/margins": 18.083922743797302, "rewards_train/rejected": -19.53024673461914, "step": 4432 }, { "epoch": 2.18, "learning_rate": 1.90442369161994e-07, "loss": 0.0, "step": 4433 }, { "epoch": 2.18, "logps_train/chosen": -67.7690200805664, "logps_train/ref_chosen": -58.78125, "logps_train/ref_rejected": -117.0625, "logps_train/rejected": -302.20660400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8997292518615723, "rewards_train/margins": 17.613072872161865, "rewards_train/rejected": -18.512802124023438, "step": 4433 }, { "epoch": 2.18, "learning_rate": 1.9022929338947997e-07, "loss": 0.0001, "step": 4434 }, { "epoch": 2.18, "logps_train/chosen": -80.75896453857422, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -325.5174255371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6295101642608643, "rewards_train/margins": 18.103825330734253, "rewards_train/rejected": -19.733335494995117, "step": 4434 }, { "epoch": 2.18, "learning_rate": 1.9001630887750642e-07, "loss": 0.0, "step": 4435 }, { "epoch": 2.18, "logps_train/chosen": -81.52061462402344, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -314.8609924316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7580674886703491, "rewards_train/margins": 17.594292283058167, "rewards_train/rejected": -19.352359771728516, "step": 4435 }, { "epoch": 2.18, "learning_rate": 1.8980341568881976e-07, "loss": 0.0002, "step": 4436 }, { "epoch": 2.18, "logps_train/chosen": -84.60157775878906, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -328.3343505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8385261297225952, "rewards_train/margins": 18.295791268348694, "rewards_train/rejected": -20.13431739807129, "step": 4436 }, { "epoch": 2.18, "learning_rate": 1.895906138861401e-07, "loss": 0.0, "step": 4437 }, { "epoch": 2.18, "logps_train/chosen": -78.96552276611328, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -314.940185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.380927562713623, "rewards_train/margins": 17.653425693511963, "rewards_train/rejected": -19.034353256225586, "step": 4437 }, { "epoch": 2.19, "learning_rate": 1.893779035321602e-07, "loss": 0.0, "step": 4438 }, { "epoch": 2.19, "logps_train/chosen": -84.5653076171875, "logps_train/ref_chosen": -68.0, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -334.27923583984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6563841104507446, "rewards_train/margins": 18.597026228904724, "rewards_train/rejected": -20.25341033935547, "step": 4438 }, { "epoch": 2.19, "learning_rate": 1.8916528468954595e-07, "loss": 0.0, "step": 4439 }, { "epoch": 2.19, "logps_train/chosen": -79.31742858886719, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -321.65283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3705123662948608, "rewards_train/margins": 18.176215767860413, "rewards_train/rejected": -19.546728134155273, "step": 4439 }, { "epoch": 2.19, "learning_rate": 1.8895275742093607e-07, "loss": 0.0, "step": 4440 }, { "epoch": 2.19, "logps_train/chosen": -76.6982192993164, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -324.00274658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1933084726333618, "rewards_train/margins": 18.51370370388031, "rewards_train/rejected": -19.707012176513672, "step": 4440 }, { "epoch": 2.19, "learning_rate": 1.8874032178894288e-07, "loss": 0.0, "step": 4441 }, { "epoch": 2.19, "logps_train/chosen": -83.34262084960938, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -329.980712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6205897331237793, "rewards_train/margins": 18.44867181777954, "rewards_train/rejected": -20.06926155090332, "step": 4441 }, { "epoch": 2.19, "learning_rate": 1.8852797785615126e-07, "loss": 0.0, "step": 4442 }, { "epoch": 2.19, "logps_train/chosen": -84.15643310546875, "logps_train/ref_chosen": -68.1875, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -329.64569091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5977720022201538, "rewards_train/margins": 18.51074469089508, "rewards_train/rejected": -20.108516693115234, "step": 4442 }, { "epoch": 2.19, "learning_rate": 1.8831572568511888e-07, "loss": 0.0002, "step": 4443 }, { "epoch": 2.19, "logps_train/chosen": -78.32809448242188, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -325.67987060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2351534366607666, "rewards_train/margins": 18.48234534263611, "rewards_train/rejected": -19.717498779296875, "step": 4443 }, { "epoch": 2.19, "learning_rate": 1.8810356533837723e-07, "loss": 0.0, "step": 4444 }, { "epoch": 2.19, "logps_train/chosen": -69.49684143066406, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -308.347412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.700417160987854, "rewards_train/margins": 17.975097060203552, "rewards_train/rejected": -18.675514221191406, "step": 4444 }, { "epoch": 2.19, "learning_rate": 1.8789149687842954e-07, "loss": 0.0, "step": 4445 }, { "epoch": 2.19, "logps_train/chosen": -75.71035766601562, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -321.6477355957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0875393152236938, "rewards_train/margins": 18.309414267539978, "rewards_train/rejected": -19.396953582763672, "step": 4445 }, { "epoch": 2.19, "learning_rate": 1.876795203677528e-07, "loss": 0.0, "step": 4446 }, { "epoch": 2.19, "logps_train/chosen": -75.22360229492188, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -321.87933349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2422821521759033, "rewards_train/margins": 18.140185117721558, "rewards_train/rejected": -19.38246726989746, "step": 4446 }, { "epoch": 2.19, "learning_rate": 1.8746763586879727e-07, "loss": 0.0, "step": 4447 }, { "epoch": 2.19, "logps_train/chosen": -82.07684326171875, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -315.58428955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7268738746643066, "rewards_train/margins": 17.46119451522827, "rewards_train/rejected": -19.188068389892578, "step": 4447 }, { "epoch": 2.19, "learning_rate": 1.872558434439847e-07, "loss": 0.0, "step": 4448 }, { "epoch": 2.19, "logps_train/chosen": -89.17710876464844, "logps_train/ref_chosen": -69.9375, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -341.476806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.921324610710144, "rewards_train/margins": 18.944031834602356, "rewards_train/rejected": -20.8653564453125, "step": 4448 }, { "epoch": 2.19, "learning_rate": 1.8704414315571116e-07, "loss": 0.0, "step": 4449 }, { "epoch": 2.19, "logps_train/chosen": -76.41287231445312, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -308.794189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3660433292388916, "rewards_train/margins": 17.224117040634155, "rewards_train/rejected": -18.590160369873047, "step": 4449 }, { "epoch": 2.19, "learning_rate": 1.8683253506634483e-07, "loss": 0.0, "step": 4450 }, { "epoch": 2.19, "logps_train/chosen": -78.22599792480469, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -306.86260986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4743092060089111, "rewards_train/margins": 17.0418860912323, "rewards_train/rejected": -18.51619529724121, "step": 4450 }, { "epoch": 2.19, "learning_rate": 1.8662101923822664e-07, "loss": 0.0, "step": 4451 }, { "epoch": 2.19, "logps_train/chosen": -82.42080688476562, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -328.17474365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5985013246536255, "rewards_train/margins": 18.29211986064911, "rewards_train/rejected": -19.890621185302734, "step": 4451 }, { "epoch": 2.19, "learning_rate": 1.8640959573367098e-07, "loss": 0.0, "step": 4452 }, { "epoch": 2.19, "logps_train/chosen": -75.06612396240234, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -306.0762939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.240401268005371, "rewards_train/margins": 17.27079486846924, "rewards_train/rejected": -18.51119613647461, "step": 4452 }, { "epoch": 2.19, "learning_rate": 1.8619826461496446e-07, "loss": 0.0, "step": 4453 }, { "epoch": 2.19, "logps_train/chosen": -77.56309509277344, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -321.21417236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4663679599761963, "rewards_train/margins": 18.07106375694275, "rewards_train/rejected": -19.537431716918945, "step": 4453 }, { "epoch": 2.19, "learning_rate": 1.859870259443666e-07, "loss": 0.0, "step": 4454 }, { "epoch": 2.19, "logps_train/chosen": -76.26129150390625, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -309.44195556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.321441650390625, "rewards_train/margins": 17.38222885131836, "rewards_train/rejected": -18.703670501708984, "step": 4454 }, { "epoch": 2.19, "learning_rate": 1.8577587978410964e-07, "loss": 0.0, "step": 4455 }, { "epoch": 2.19, "logps_train/chosen": -75.92752075195312, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -314.843505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1584250926971436, "rewards_train/margins": 17.777486085891724, "rewards_train/rejected": -18.935911178588867, "step": 4455 }, { "epoch": 2.19, "learning_rate": 1.8556482619639903e-07, "loss": 0.0, "step": 4456 }, { "epoch": 2.19, "logps_train/chosen": -77.6628189086914, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -319.30145263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4195778369903564, "rewards_train/margins": 18.145725965499878, "rewards_train/rejected": -19.565303802490234, "step": 4456 }, { "epoch": 2.19, "learning_rate": 1.8535386524341222e-07, "loss": 0.0, "step": 4457 }, { "epoch": 2.19, "logps_train/chosen": -75.33000946044922, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -312.74774169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.070940375328064, "rewards_train/margins": 17.95774233341217, "rewards_train/rejected": -19.028682708740234, "step": 4457 }, { "epoch": 2.19, "learning_rate": 1.851429969872998e-07, "loss": 0.0, "step": 4458 }, { "epoch": 2.19, "logps_train/chosen": -73.71031188964844, "logps_train/ref_chosen": -60.78125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -312.5723876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2933456897735596, "rewards_train/margins": 17.60124659538269, "rewards_train/rejected": -18.89459228515625, "step": 4458 }, { "epoch": 2.2, "learning_rate": 1.8493222149018523e-07, "loss": 0.0, "step": 4459 }, { "epoch": 2.2, "logps_train/chosen": -76.71412658691406, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -330.3712158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2074484825134277, "rewards_train/margins": 18.974302768707275, "rewards_train/rejected": -20.181751251220703, "step": 4459 }, { "epoch": 2.2, "learning_rate": 1.8472153881416415e-07, "loss": 0.0, "step": 4460 }, { "epoch": 2.2, "logps_train/chosen": -79.52653503417969, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -316.36065673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5627118349075317, "rewards_train/margins": 17.685903429985046, "rewards_train/rejected": -19.248615264892578, "step": 4460 }, { "epoch": 2.2, "learning_rate": 1.8451094902130505e-07, "loss": 0.0002, "step": 4461 }, { "epoch": 2.2, "logps_train/chosen": -76.83177947998047, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -317.310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3596916198730469, "rewards_train/margins": 18.118284225463867, "rewards_train/rejected": -19.477975845336914, "step": 4461 }, { "epoch": 2.2, "learning_rate": 1.8430045217364948e-07, "loss": 0.0, "step": 4462 }, { "epoch": 2.2, "logps_train/chosen": -81.74024963378906, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -324.3451232910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.623682975769043, "rewards_train/margins": 18.111123085021973, "rewards_train/rejected": -19.734806060791016, "step": 4462 }, { "epoch": 2.2, "learning_rate": 1.8409004833321068e-07, "loss": 0.0, "step": 4463 }, { "epoch": 2.2, "logps_train/chosen": -78.24166870117188, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -318.3099670410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.522800087928772, "rewards_train/margins": 17.92445409297943, "rewards_train/rejected": -19.447254180908203, "step": 4463 }, { "epoch": 2.2, "learning_rate": 1.838797375619755e-07, "loss": 0.0, "step": 4464 }, { "epoch": 2.2, "logps_train/chosen": -82.70973205566406, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -332.4130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.646949291229248, "rewards_train/margins": 18.61359453201294, "rewards_train/rejected": -20.260543823242188, "step": 4464 }, { "epoch": 2.2, "learning_rate": 1.8366951992190272e-07, "loss": 0.0, "step": 4465 }, { "epoch": 2.2, "logps_train/chosen": -75.5497817993164, "logps_train/ref_chosen": -61.46875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -325.8866882324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4087133407592773, "rewards_train/margins": 18.377366065979004, "rewards_train/rejected": -19.78607940673828, "step": 4465 }, { "epoch": 2.2, "learning_rate": 1.8345939547492368e-07, "loss": 0.0, "step": 4466 }, { "epoch": 2.2, "logps_train/chosen": -77.83805847167969, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -328.46124267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3206713199615479, "rewards_train/margins": 18.632680654525757, "rewards_train/rejected": -19.953351974487305, "step": 4466 }, { "epoch": 2.2, "learning_rate": 1.832493642829429e-07, "loss": 0.0, "step": 4467 }, { "epoch": 2.2, "logps_train/chosen": -74.99362182617188, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -307.15277099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1861788034439087, "rewards_train/margins": 17.53939950466156, "rewards_train/rejected": -18.72557830810547, "step": 4467 }, { "epoch": 2.2, "learning_rate": 1.8303942640783677e-07, "loss": 0.0, "step": 4468 }, { "epoch": 2.2, "logps_train/chosen": -80.69619750976562, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -315.27337646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6075587272644043, "rewards_train/margins": 17.575832843780518, "rewards_train/rejected": -19.183391571044922, "step": 4468 }, { "epoch": 2.2, "learning_rate": 1.8282958191145437e-07, "loss": 0.0, "step": 4469 }, { "epoch": 2.2, "logps_train/chosen": -74.1846923828125, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -314.6726989746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.955871045589447, "rewards_train/margins": 17.98307853937149, "rewards_train/rejected": -18.938949584960938, "step": 4469 }, { "epoch": 2.2, "learning_rate": 1.8261983085561712e-07, "loss": 0.0, "step": 4470 }, { "epoch": 2.2, "logps_train/chosen": -78.72428894042969, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -327.2176513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3749186992645264, "rewards_train/margins": 18.592747926712036, "rewards_train/rejected": -19.967666625976562, "step": 4470 }, { "epoch": 2.2, "learning_rate": 1.8241017330211955e-07, "loss": 0.0, "step": 4471 }, { "epoch": 2.2, "logps_train/chosen": -81.71733093261719, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -320.8634948730469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6964893341064453, "rewards_train/margins": 17.819841384887695, "rewards_train/rejected": -19.51633071899414, "step": 4471 }, { "epoch": 2.2, "learning_rate": 1.8220060931272792e-07, "loss": 0.0, "step": 4472 }, { "epoch": 2.2, "logps_train/chosen": -82.13835906982422, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -314.60650634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9012136459350586, "rewards_train/margins": 17.27672290802002, "rewards_train/rejected": -19.177936553955078, "step": 4472 }, { "epoch": 2.2, "learning_rate": 1.81991138949181e-07, "loss": 0.0001, "step": 4473 }, { "epoch": 2.2, "logps_train/chosen": -76.5733642578125, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -328.35357666015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.255138874053955, "rewards_train/margins": 18.65190076828003, "rewards_train/rejected": -19.907039642333984, "step": 4473 }, { "epoch": 2.2, "learning_rate": 1.817817622731906e-07, "loss": 0.0, "step": 4474 }, { "epoch": 2.2, "logps_train/chosen": -80.12139892578125, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -321.6305847167969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6772758960723877, "rewards_train/margins": 17.862149000167847, "rewards_train/rejected": -19.539424896240234, "step": 4474 }, { "epoch": 2.2, "learning_rate": 1.815724793464403e-07, "loss": 0.0, "step": 4475 }, { "epoch": 2.2, "logps_train/chosen": -86.01777648925781, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -329.36328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9382516145706177, "rewards_train/margins": 18.31731402873993, "rewards_train/rejected": -20.255565643310547, "step": 4475 }, { "epoch": 2.2, "learning_rate": 1.8136329023058627e-07, "loss": 0.0, "step": 4476 }, { "epoch": 2.2, "logps_train/chosen": -74.8517837524414, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -318.16851806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1629124879837036, "rewards_train/margins": 18.164976000785828, "rewards_train/rejected": -19.32788848876953, "step": 4476 }, { "epoch": 2.2, "learning_rate": 1.8115419498725681e-07, "loss": 0.0, "step": 4477 }, { "epoch": 2.2, "logps_train/chosen": -82.59552001953125, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -311.43804931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.629815697669983, "rewards_train/margins": 17.08039700984955, "rewards_train/rejected": -18.71021270751953, "step": 4477 }, { "epoch": 2.2, "learning_rate": 1.8094519367805323e-07, "loss": 0.0, "step": 4478 }, { "epoch": 2.2, "logps_train/chosen": -79.19087219238281, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -310.029052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.550385594367981, "rewards_train/margins": 17.157795786857605, "rewards_train/rejected": -18.708181381225586, "step": 4478 }, { "epoch": 2.21, "learning_rate": 1.8073628636454846e-07, "loss": 0.0, "step": 4479 }, { "epoch": 2.21, "logps_train/chosen": -81.54212951660156, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -325.61376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.638441562652588, "rewards_train/margins": 18.356919765472412, "rewards_train/rejected": -19.995361328125, "step": 4479 }, { "epoch": 2.21, "learning_rate": 1.8052747310828793e-07, "loss": 0.0, "step": 4480 }, { "epoch": 2.21, "logps_train/chosen": -84.83577728271484, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -333.25958251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.735872507095337, "rewards_train/margins": 18.48583960533142, "rewards_train/rejected": -20.221712112426758, "step": 4480 }, { "epoch": 2.21, "learning_rate": 1.8031875397078983e-07, "loss": 0.0, "step": 4481 }, { "epoch": 2.21, "logps_train/chosen": -79.94940185546875, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -314.81292724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6204774379730225, "rewards_train/margins": 17.74055314064026, "rewards_train/rejected": -19.36103057861328, "step": 4481 }, { "epoch": 2.21, "learning_rate": 1.8011012901354361e-07, "loss": 0.0001, "step": 4482 }, { "epoch": 2.21, "logps_train/chosen": -77.06636047363281, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -322.697021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1527788639068604, "rewards_train/margins": 18.225906133651733, "rewards_train/rejected": -19.378684997558594, "step": 4482 }, { "epoch": 2.21, "learning_rate": 1.7990159829801188e-07, "loss": 0.0, "step": 4483 }, { "epoch": 2.21, "logps_train/chosen": -82.84412384033203, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -326.5770263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.785876989364624, "rewards_train/margins": 18.131345987319946, "rewards_train/rejected": -19.91722297668457, "step": 4483 }, { "epoch": 2.21, "learning_rate": 1.7969316188562967e-07, "loss": 0.0, "step": 4484 }, { "epoch": 2.21, "logps_train/chosen": -80.9067611694336, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -329.574462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5176291465759277, "rewards_train/margins": 18.41823434829712, "rewards_train/rejected": -19.935863494873047, "step": 4484 }, { "epoch": 2.21, "learning_rate": 1.7948481983780288e-07, "loss": 0.0, "step": 4485 }, { "epoch": 2.21, "logps_train/chosen": -76.01776123046875, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -316.43603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0998234748840332, "rewards_train/margins": 17.99602460861206, "rewards_train/rejected": -19.095848083496094, "step": 4485 }, { "epoch": 2.21, "learning_rate": 1.792765722159112e-07, "loss": 0.0, "step": 4486 }, { "epoch": 2.21, "logps_train/chosen": -80.06028747558594, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -313.89398193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6731677055358887, "rewards_train/margins": 17.58039140701294, "rewards_train/rejected": -19.253559112548828, "step": 4486 }, { "epoch": 2.21, "learning_rate": 1.7906841908130543e-07, "loss": 0.0, "step": 4487 }, { "epoch": 2.21, "logps_train/chosen": -78.12666320800781, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -322.03167724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.436202049255371, "rewards_train/margins": 18.20544147491455, "rewards_train/rejected": -19.641643524169922, "step": 4487 }, { "epoch": 2.21, "learning_rate": 1.7886036049530883e-07, "loss": 0.0, "step": 4488 }, { "epoch": 2.21, "logps_train/chosen": -79.728271484375, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -320.8472900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.236694097518921, "rewards_train/margins": 18.040708780288696, "rewards_train/rejected": -19.277402877807617, "step": 4488 }, { "epoch": 2.21, "learning_rate": 1.786523965192172e-07, "loss": 0.0, "step": 4489 }, { "epoch": 2.21, "logps_train/chosen": -78.89117431640625, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -319.5151672363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4365293979644775, "rewards_train/margins": 18.049413442611694, "rewards_train/rejected": -19.485942840576172, "step": 4489 }, { "epoch": 2.21, "learning_rate": 1.7844452721429792e-07, "loss": 0.0004, "step": 4490 }, { "epoch": 2.21, "logps_train/chosen": -76.80747985839844, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -323.93341064453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0965678691864014, "rewards_train/margins": 18.862399339675903, "rewards_train/rejected": -19.958967208862305, "step": 4490 }, { "epoch": 2.21, "learning_rate": 1.7823675264179065e-07, "loss": 0.0, "step": 4491 }, { "epoch": 2.21, "logps_train/chosen": -79.68269348144531, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -333.57958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3652411699295044, "rewards_train/margins": 18.962836146354675, "rewards_train/rejected": -20.32807731628418, "step": 4491 }, { "epoch": 2.21, "learning_rate": 1.7802907286290715e-07, "loss": 0.0, "step": 4492 }, { "epoch": 2.21, "logps_train/chosen": -80.45361328125, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -331.21148681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5021969079971313, "rewards_train/margins": 18.643465399742126, "rewards_train/rejected": -20.145662307739258, "step": 4492 }, { "epoch": 2.21, "learning_rate": 1.7782148793883145e-07, "loss": 0.0, "step": 4493 }, { "epoch": 2.21, "logps_train/chosen": -79.95057678222656, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -318.4019470214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6445691585540771, "rewards_train/margins": 17.88039231300354, "rewards_train/rejected": -19.524961471557617, "step": 4493 }, { "epoch": 2.21, "learning_rate": 1.7761399793071947e-07, "loss": 0.0, "step": 4494 }, { "epoch": 2.21, "logps_train/chosen": -81.20868682861328, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -325.00531005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.722870111465454, "rewards_train/margins": 17.90226721763611, "rewards_train/rejected": -19.625137329101562, "step": 4494 }, { "epoch": 2.21, "learning_rate": 1.7740660289969884e-07, "loss": 0.0, "step": 4495 }, { "epoch": 2.21, "logps_train/chosen": -70.80035400390625, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -320.0316162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7164615392684937, "rewards_train/margins": 18.910186409950256, "rewards_train/rejected": -19.62664794921875, "step": 4495 }, { "epoch": 2.21, "learning_rate": 1.771993029068702e-07, "loss": 0.0, "step": 4496 }, { "epoch": 2.21, "logps_train/chosen": -78.22012329101562, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -317.1029052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4958407878875732, "rewards_train/margins": 17.983590364456177, "rewards_train/rejected": -19.47943115234375, "step": 4496 }, { "epoch": 2.21, "learning_rate": 1.7699209801330468e-07, "loss": 0.0, "step": 4497 }, { "epoch": 2.21, "logps_train/chosen": -80.46369171142578, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -315.8266906738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4358222484588623, "rewards_train/margins": 17.778390169143677, "rewards_train/rejected": -19.21421241760254, "step": 4497 }, { "epoch": 2.21, "learning_rate": 1.7678498828004668e-07, "loss": 0.0, "step": 4498 }, { "epoch": 2.21, "logps_train/chosen": -82.36659240722656, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -339.0944519042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4845107793807983, "rewards_train/margins": 19.236165642738342, "rewards_train/rejected": -20.72067642211914, "step": 4498 }, { "epoch": 2.22, "learning_rate": 1.7657797376811252e-07, "loss": 0.0, "step": 4499 }, { "epoch": 2.22, "logps_train/chosen": -81.27532958984375, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -311.8614501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7995784282684326, "rewards_train/margins": 17.24237561225891, "rewards_train/rejected": -19.041954040527344, "step": 4499 }, { "epoch": 2.22, "learning_rate": 1.7637105453848928e-07, "loss": 0.0, "step": 4500 }, { "epoch": 2.22, "logps_train/chosen": -80.91128540039062, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -306.1593017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6087560653686523, "rewards_train/margins": 16.987935066223145, "rewards_train/rejected": -18.596691131591797, "step": 4500 }, { "epoch": 2.22, "learning_rate": 1.7616423065213726e-07, "loss": 0.0003, "step": 4501 }, { "epoch": 2.22, "logps_train/chosen": -78.23115539550781, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -307.05450439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2794147729873657, "rewards_train/margins": 17.211437582969666, "rewards_train/rejected": -18.49085235595703, "step": 4501 }, { "epoch": 2.22, "learning_rate": 1.7595750216998812e-07, "loss": 0.0001, "step": 4502 }, { "epoch": 2.22, "logps_train/chosen": -76.51907348632812, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -325.4959716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2786407470703125, "rewards_train/margins": 18.564071655273438, "rewards_train/rejected": -19.84271240234375, "step": 4502 }, { "epoch": 2.22, "learning_rate": 1.7575086915294524e-07, "loss": 0.0, "step": 4503 }, { "epoch": 2.22, "logps_train/chosen": -80.64656066894531, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -321.57415771484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5101633071899414, "rewards_train/margins": 18.106532096862793, "rewards_train/rejected": -19.616695404052734, "step": 4503 }, { "epoch": 2.22, "learning_rate": 1.7554433166188437e-07, "loss": 0.0, "step": 4504 }, { "epoch": 2.22, "logps_train/chosen": -81.131591796875, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -320.2762756347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6491936445236206, "rewards_train/margins": 17.95348083972931, "rewards_train/rejected": -19.60267448425293, "step": 4504 }, { "epoch": 2.22, "learning_rate": 1.7533788975765279e-07, "loss": 0.0003, "step": 4505 }, { "epoch": 2.22, "logps_train/chosen": -79.20642852783203, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -325.101806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.504431962966919, "rewards_train/margins": 18.18323588371277, "rewards_train/rejected": -19.687667846679688, "step": 4505 }, { "epoch": 2.22, "learning_rate": 1.751315435010696e-07, "loss": 0.0, "step": 4506 }, { "epoch": 2.22, "logps_train/chosen": -74.59756469726562, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -118.9375, "logps_train/rejected": -299.35406494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.174551248550415, "rewards_train/margins": 16.867204427719116, "rewards_train/rejected": -18.04175567626953, "step": 4506 }, { "epoch": 2.22, "learning_rate": 1.7492529295292574e-07, "loss": 0.0, "step": 4507 }, { "epoch": 2.22, "logps_train/chosen": -74.79754638671875, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -309.4914245605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.031658411026001, "rewards_train/margins": 17.769534826278687, "rewards_train/rejected": -18.801193237304688, "step": 4507 }, { "epoch": 2.22, "learning_rate": 1.747191381739842e-07, "loss": 0.0, "step": 4508 }, { "epoch": 2.22, "logps_train/chosen": -82.79763793945312, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -334.6910400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.640165090560913, "rewards_train/margins": 18.804672479629517, "rewards_train/rejected": -20.44483757019043, "step": 4508 }, { "epoch": 2.22, "learning_rate": 1.7451307922497949e-07, "loss": 0.0, "step": 4509 }, { "epoch": 2.22, "logps_train/chosen": -77.17912292480469, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -323.82403564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1883713006973267, "rewards_train/margins": 18.384363532066345, "rewards_train/rejected": -19.572734832763672, "step": 4509 }, { "epoch": 2.22, "learning_rate": 1.7430711616661776e-07, "loss": 0.0, "step": 4510 }, { "epoch": 2.22, "logps_train/chosen": -77.44281768798828, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -318.387451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1227978467941284, "rewards_train/margins": 18.25564682483673, "rewards_train/rejected": -19.37844467163086, "step": 4510 }, { "epoch": 2.22, "learning_rate": 1.7410124905957769e-07, "loss": 0.0, "step": 4511 }, { "epoch": 2.22, "logps_train/chosen": -82.99817657470703, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -315.9486083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.653723955154419, "rewards_train/margins": 17.69318652153015, "rewards_train/rejected": -19.34691047668457, "step": 4511 }, { "epoch": 2.22, "learning_rate": 1.738954779645083e-07, "loss": 0.0, "step": 4512 }, { "epoch": 2.22, "logps_train/chosen": -79.99016571044922, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -325.8960876464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6218193769454956, "rewards_train/margins": 18.58282768726349, "rewards_train/rejected": -20.204647064208984, "step": 4512 }, { "epoch": 2.22, "learning_rate": 1.7368980294203183e-07, "loss": 0.0, "step": 4513 }, { "epoch": 2.22, "logps_train/chosen": -81.81407165527344, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -318.53765869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6025501489639282, "rewards_train/margins": 17.473873496055603, "rewards_train/rejected": -19.07642364501953, "step": 4513 }, { "epoch": 2.22, "learning_rate": 1.7348422405274126e-07, "loss": 0.0, "step": 4514 }, { "epoch": 2.22, "logps_train/chosen": -81.26602172851562, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -320.05010986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5871493816375732, "rewards_train/margins": 17.95428967475891, "rewards_train/rejected": -19.541439056396484, "step": 4514 }, { "epoch": 2.22, "learning_rate": 1.7327874135720138e-07, "loss": 0.0, "step": 4515 }, { "epoch": 2.22, "logps_train/chosen": -82.88739776611328, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -324.7545166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9121774435043335, "rewards_train/margins": 17.86927855014801, "rewards_train/rejected": -19.781455993652344, "step": 4515 }, { "epoch": 2.22, "learning_rate": 1.730733549159491e-07, "loss": 0.0001, "step": 4516 }, { "epoch": 2.22, "logps_train/chosen": -77.10137939453125, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -328.06622314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4172909259796143, "rewards_train/margins": 18.49411654472351, "rewards_train/rejected": -19.911407470703125, "step": 4516 }, { "epoch": 2.22, "learning_rate": 1.7286806478949244e-07, "loss": 0.0, "step": 4517 }, { "epoch": 2.22, "logps_train/chosen": -80.12293243408203, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -315.5849914550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3360722064971924, "rewards_train/margins": 17.458853483200073, "rewards_train/rejected": -18.794925689697266, "step": 4517 }, { "epoch": 2.22, "learning_rate": 1.7266287103831135e-07, "loss": 0.0, "step": 4518 }, { "epoch": 2.22, "logps_train/chosen": -88.59512329101562, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -334.9085693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1214752197265625, "rewards_train/margins": 18.195064544677734, "rewards_train/rejected": -20.316539764404297, "step": 4518 }, { "epoch": 2.23, "learning_rate": 1.724577737228571e-07, "loss": 0.0, "step": 4519 }, { "epoch": 2.23, "logps_train/chosen": -76.78494262695312, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -316.4254150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3365994691848755, "rewards_train/margins": 18.209850192070007, "rewards_train/rejected": -19.546449661254883, "step": 4519 }, { "epoch": 2.23, "learning_rate": 1.72252772903553e-07, "loss": 0.0001, "step": 4520 }, { "epoch": 2.23, "logps_train/chosen": -78.1234130859375, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -306.44512939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5315555334091187, "rewards_train/margins": 16.958123803138733, "rewards_train/rejected": -18.48967933654785, "step": 4520 }, { "epoch": 2.23, "learning_rate": 1.7204786864079356e-07, "loss": 0.0, "step": 4521 }, { "epoch": 2.23, "logps_train/chosen": -76.97477722167969, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -328.5364990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3259934186935425, "rewards_train/margins": 18.717990040779114, "rewards_train/rejected": -20.043983459472656, "step": 4521 }, { "epoch": 2.23, "learning_rate": 1.7184306099494477e-07, "loss": 0.0, "step": 4522 }, { "epoch": 2.23, "logps_train/chosen": -77.614013671875, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -319.4966125488281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4024162292480469, "rewards_train/margins": 18.03069305419922, "rewards_train/rejected": -19.433109283447266, "step": 4522 }, { "epoch": 2.23, "learning_rate": 1.716383500263448e-07, "loss": 0.0, "step": 4523 }, { "epoch": 2.23, "logps_train/chosen": -82.59173583984375, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -318.7243347167969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.786956548690796, "rewards_train/margins": 17.45335030555725, "rewards_train/rejected": -19.240306854248047, "step": 4523 }, { "epoch": 2.23, "learning_rate": 1.7143373579530252e-07, "loss": 0.0001, "step": 4524 }, { "epoch": 2.23, "logps_train/chosen": -82.85359191894531, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -331.8787841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.749348521232605, "rewards_train/margins": 18.479643940925598, "rewards_train/rejected": -20.228992462158203, "step": 4524 }, { "epoch": 2.23, "learning_rate": 1.7122921836209864e-07, "loss": 0.0, "step": 4525 }, { "epoch": 2.23, "logps_train/chosen": -79.34095764160156, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -321.7022705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5425926446914673, "rewards_train/margins": 18.257809042930603, "rewards_train/rejected": -19.80040168762207, "step": 4525 }, { "epoch": 2.23, "learning_rate": 1.7102479778698569e-07, "loss": 0.0, "step": 4526 }, { "epoch": 2.23, "logps_train/chosen": -81.40482330322266, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -317.0384216308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6095739603042603, "rewards_train/margins": 17.67659318447113, "rewards_train/rejected": -19.28616714477539, "step": 4526 }, { "epoch": 2.23, "learning_rate": 1.7082047413018713e-07, "loss": 0.0, "step": 4527 }, { "epoch": 2.23, "logps_train/chosen": -79.5010757446289, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -318.9520568847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.467441439628601, "rewards_train/margins": 17.843876004219055, "rewards_train/rejected": -19.311317443847656, "step": 4527 }, { "epoch": 2.23, "learning_rate": 1.7061624745189813e-07, "loss": 0.0002, "step": 4528 }, { "epoch": 2.23, "logps_train/chosen": -81.59070587158203, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -315.9950866699219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5428597927093506, "rewards_train/margins": 17.435604333877563, "rewards_train/rejected": -18.978464126586914, "step": 4528 }, { "epoch": 2.23, "learning_rate": 1.7041211781228504e-07, "loss": 0.0, "step": 4529 }, { "epoch": 2.23, "logps_train/chosen": -82.34362030029297, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -329.9754333496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4264514446258545, "rewards_train/margins": 18.635838747024536, "rewards_train/rejected": -20.06229019165039, "step": 4529 }, { "epoch": 2.23, "learning_rate": 1.7020808527148616e-07, "loss": 0.0, "step": 4530 }, { "epoch": 2.23, "logps_train/chosen": -76.92959594726562, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -310.91522216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0913485288619995, "rewards_train/margins": 17.705642819404602, "rewards_train/rejected": -18.7969913482666, "step": 4530 }, { "epoch": 2.23, "learning_rate": 1.700041498896108e-07, "loss": 0.0, "step": 4531 }, { "epoch": 2.23, "logps_train/chosen": -77.97772216796875, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -318.37469482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4973822832107544, "rewards_train/margins": 17.91303527355194, "rewards_train/rejected": -19.410417556762695, "step": 4531 }, { "epoch": 2.23, "learning_rate": 1.698003117267394e-07, "loss": 0.0, "step": 4532 }, { "epoch": 2.23, "logps_train/chosen": -76.83364868164062, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -321.3377685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0925939083099365, "rewards_train/margins": 18.316574335098267, "rewards_train/rejected": -19.409168243408203, "step": 4532 }, { "epoch": 2.23, "learning_rate": 1.695965708429246e-07, "loss": 0.0, "step": 4533 }, { "epoch": 2.23, "logps_train/chosen": -79.22200012207031, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -328.1972961425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.436068058013916, "rewards_train/margins": 18.486592769622803, "rewards_train/rejected": -19.92266082763672, "step": 4533 }, { "epoch": 2.23, "learning_rate": 1.6939292729818923e-07, "loss": 0.0, "step": 4534 }, { "epoch": 2.23, "logps_train/chosen": -80.52650451660156, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -319.9232177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5870745182037354, "rewards_train/margins": 17.79777503013611, "rewards_train/rejected": -19.384849548339844, "step": 4534 }, { "epoch": 2.23, "learning_rate": 1.6918938115252845e-07, "loss": 0.0, "step": 4535 }, { "epoch": 2.23, "logps_train/chosen": -79.81342315673828, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -342.7933349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3740421533584595, "rewards_train/margins": 19.708513617515564, "rewards_train/rejected": -21.082555770874023, "step": 4535 }, { "epoch": 2.23, "learning_rate": 1.6898593246590824e-07, "loss": 0.0, "step": 4536 }, { "epoch": 2.23, "logps_train/chosen": -80.28218078613281, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -340.2828674316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3603954315185547, "rewards_train/margins": 19.613691329956055, "rewards_train/rejected": -20.97408676147461, "step": 4536 }, { "epoch": 2.23, "learning_rate": 1.6878258129826574e-07, "loss": 0.0, "step": 4537 }, { "epoch": 2.23, "logps_train/chosen": -76.18096923828125, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -324.0285949707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.062139868736267, "rewards_train/margins": 18.45185625553131, "rewards_train/rejected": -19.513996124267578, "step": 4537 }, { "epoch": 2.23, "learning_rate": 1.6857932770950993e-07, "loss": 0.0, "step": 4538 }, { "epoch": 2.23, "logps_train/chosen": -79.57135009765625, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -332.07525634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4931211471557617, "rewards_train/margins": 18.93706226348877, "rewards_train/rejected": -20.43018341064453, "step": 4538 }, { "epoch": 2.23, "learning_rate": 1.6837617175952056e-07, "loss": 0.0, "step": 4539 }, { "epoch": 2.23, "logps_train/chosen": -77.04954528808594, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -324.105712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.455857515335083, "rewards_train/margins": 18.27214503288269, "rewards_train/rejected": -19.728002548217773, "step": 4539 }, { "epoch": 2.24, "learning_rate": 1.6817311350814856e-07, "loss": 0.0, "step": 4540 }, { "epoch": 2.24, "logps_train/chosen": -81.49024200439453, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -305.2659912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.702929973602295, "rewards_train/margins": 16.600523471832275, "rewards_train/rejected": -18.30345344543457, "step": 4540 }, { "epoch": 2.24, "learning_rate": 1.679701530152165e-07, "loss": 0.0002, "step": 4541 }, { "epoch": 2.24, "logps_train/chosen": -79.54930114746094, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -325.2052307128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2968734502792358, "rewards_train/margins": 18.372378945350647, "rewards_train/rejected": -19.669252395629883, "step": 4541 }, { "epoch": 2.24, "learning_rate": 1.6776729034051785e-07, "loss": 0.0, "step": 4542 }, { "epoch": 2.24, "logps_train/chosen": -81.87861633300781, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -316.5621643066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6586623191833496, "rewards_train/margins": 17.464449405670166, "rewards_train/rejected": -19.123111724853516, "step": 4542 }, { "epoch": 2.24, "learning_rate": 1.6756452554381734e-07, "loss": 0.0, "step": 4543 }, { "epoch": 2.24, "logps_train/chosen": -74.19452667236328, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -315.369384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015595555305481, "rewards_train/margins": 18.07607924938202, "rewards_train/rejected": -19.0916748046875, "step": 4543 }, { "epoch": 2.24, "learning_rate": 1.673618586848506e-07, "loss": 0.0, "step": 4544 }, { "epoch": 2.24, "logps_train/chosen": -74.91079711914062, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -319.96453857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8992341160774231, "rewards_train/margins": 18.254837691783905, "rewards_train/rejected": -19.154071807861328, "step": 4544 }, { "epoch": 2.24, "learning_rate": 1.67159289823325e-07, "loss": 0.0, "step": 4545 }, { "epoch": 2.24, "logps_train/chosen": -69.78543090820312, "logps_train/ref_chosen": -60.90625, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -316.5586242675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8867211937904358, "rewards_train/margins": 18.46240371465683, "rewards_train/rejected": -19.349124908447266, "step": 4545 }, { "epoch": 2.24, "learning_rate": 1.6695681901891857e-07, "loss": 0.0, "step": 4546 }, { "epoch": 2.24, "logps_train/chosen": -78.8878173828125, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -329.12139892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5027469396591187, "rewards_train/margins": 18.489619851112366, "rewards_train/rejected": -19.992366790771484, "step": 4546 }, { "epoch": 2.24, "learning_rate": 1.667544463312804e-07, "loss": 0.0001, "step": 4547 }, { "epoch": 2.24, "logps_train/chosen": -77.8321762084961, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -330.807373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3579250574111938, "rewards_train/margins": 18.61880910396576, "rewards_train/rejected": -19.976734161376953, "step": 4547 }, { "epoch": 2.24, "learning_rate": 1.665521718200313e-07, "loss": 0.0, "step": 4548 }, { "epoch": 2.24, "logps_train/chosen": -77.36186981201172, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -328.207763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2399470806121826, "rewards_train/margins": 18.757588624954224, "rewards_train/rejected": -19.997535705566406, "step": 4548 }, { "epoch": 2.24, "learning_rate": 1.6634999554476208e-07, "loss": 0.0003, "step": 4549 }, { "epoch": 2.24, "logps_train/chosen": -80.87393188476562, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -329.74725341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6344878673553467, "rewards_train/margins": 18.234330415725708, "rewards_train/rejected": -19.868818283081055, "step": 4549 }, { "epoch": 2.24, "learning_rate": 1.6614791756503572e-07, "loss": 0.0, "step": 4550 }, { "epoch": 2.24, "logps_train/chosen": -79.87481689453125, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -324.33807373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.430938959121704, "rewards_train/margins": 18.336707830429077, "rewards_train/rejected": -19.76764678955078, "step": 4550 }, { "epoch": 2.24, "learning_rate": 1.6594593794038565e-07, "loss": 0.0, "step": 4551 }, { "epoch": 2.24, "logps_train/chosen": -82.71624755859375, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -133.625, "logps_train/rejected": -328.6691589355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6264588832855225, "rewards_train/margins": 17.88118004798889, "rewards_train/rejected": -19.507638931274414, "step": 4551 }, { "epoch": 2.24, "learning_rate": 1.6574405673031617e-07, "loss": 0.0, "step": 4552 }, { "epoch": 2.24, "logps_train/chosen": -74.69718933105469, "logps_train/ref_chosen": -61.375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -317.4650573730469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.331681728363037, "rewards_train/margins": 18.270342350006104, "rewards_train/rejected": -19.60202407836914, "step": 4552 }, { "epoch": 2.24, "learning_rate": 1.6554227399430326e-07, "loss": 0.0, "step": 4553 }, { "epoch": 2.24, "logps_train/chosen": -79.60447692871094, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -328.6568908691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5555157661437988, "rewards_train/margins": 18.63048505783081, "rewards_train/rejected": -20.18600082397461, "step": 4553 }, { "epoch": 2.24, "learning_rate": 1.653405897917932e-07, "loss": 0.0001, "step": 4554 }, { "epoch": 2.24, "logps_train/chosen": -74.21916198730469, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -313.64752197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1601489782333374, "rewards_train/margins": 17.801623940467834, "rewards_train/rejected": -18.961772918701172, "step": 4554 }, { "epoch": 2.24, "learning_rate": 1.6513900418220366e-07, "loss": 0.0001, "step": 4555 }, { "epoch": 2.24, "logps_train/chosen": -79.9195556640625, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -324.07733154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.774621605873108, "rewards_train/margins": 18.009870886802673, "rewards_train/rejected": -19.78449249267578, "step": 4555 }, { "epoch": 2.24, "learning_rate": 1.649375172249229e-07, "loss": 0.0, "step": 4556 }, { "epoch": 2.24, "logps_train/chosen": -79.14286041259766, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -327.9930419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4093546867370605, "rewards_train/margins": 18.318077564239502, "rewards_train/rejected": -19.727432250976562, "step": 4556 }, { "epoch": 2.24, "learning_rate": 1.647361289793106e-07, "loss": 0.0, "step": 4557 }, { "epoch": 2.24, "logps_train/chosen": -77.65145111083984, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -317.8038330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3989341259002686, "rewards_train/margins": 17.779200792312622, "rewards_train/rejected": -19.17813491821289, "step": 4557 }, { "epoch": 2.24, "learning_rate": 1.64534839504697e-07, "loss": 0.0, "step": 4558 }, { "epoch": 2.24, "logps_train/chosen": -79.55897521972656, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -327.17181396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3559465408325195, "rewards_train/margins": 18.660258293151855, "rewards_train/rejected": -20.016204833984375, "step": 4558 }, { "epoch": 2.24, "learning_rate": 1.6433364886038315e-07, "loss": 0.0, "step": 4559 }, { "epoch": 2.24, "logps_train/chosen": -76.99016571044922, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -306.40020751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3244073390960693, "rewards_train/margins": 17.085684061050415, "rewards_train/rejected": -18.410091400146484, "step": 4559 }, { "epoch": 2.25, "learning_rate": 1.641325571056415e-07, "loss": 0.0003, "step": 4560 }, { "epoch": 2.25, "logps_train/chosen": -77.50787353515625, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -119.9375, "logps_train/rejected": -306.66632080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3017388582229614, "rewards_train/margins": 17.368995547294617, "rewards_train/rejected": -18.670734405517578, "step": 4560 }, { "epoch": 2.25, "learning_rate": 1.6393156429971487e-07, "loss": 0.0, "step": 4561 }, { "epoch": 2.25, "logps_train/chosen": -79.32087707519531, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -317.5577697753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4956622123718262, "rewards_train/margins": 17.96704912185669, "rewards_train/rejected": -19.462711334228516, "step": 4561 }, { "epoch": 2.25, "learning_rate": 1.6373067050181699e-07, "loss": 0.0, "step": 4562 }, { "epoch": 2.25, "logps_train/chosen": -81.87919616699219, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -320.087158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.54202139377594, "rewards_train/margins": 17.843160033226013, "rewards_train/rejected": -19.385181427001953, "step": 4562 }, { "epoch": 2.25, "learning_rate": 1.6352987577113292e-07, "loss": 0.0, "step": 4563 }, { "epoch": 2.25, "logps_train/chosen": -81.34193420410156, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -323.12225341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.571742057800293, "rewards_train/margins": 17.96802043914795, "rewards_train/rejected": -19.539762496948242, "step": 4563 }, { "epoch": 2.25, "learning_rate": 1.6332918016681756e-07, "loss": 0.0, "step": 4564 }, { "epoch": 2.25, "logps_train/chosen": -74.1414794921875, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -308.2677307128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2286500930786133, "rewards_train/margins": 17.314234733581543, "rewards_train/rejected": -18.542884826660156, "step": 4564 }, { "epoch": 2.25, "learning_rate": 1.631285837479977e-07, "loss": 0.0, "step": 4565 }, { "epoch": 2.25, "logps_train/chosen": -75.79365539550781, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -313.28863525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1560258865356445, "rewards_train/margins": 17.863558769226074, "rewards_train/rejected": -19.01958465576172, "step": 4565 }, { "epoch": 2.25, "learning_rate": 1.6292808657377017e-07, "loss": 0.0, "step": 4566 }, { "epoch": 2.25, "logps_train/chosen": -82.45098876953125, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -324.5844421386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8434394598007202, "rewards_train/margins": 17.95875322818756, "rewards_train/rejected": -19.80219268798828, "step": 4566 }, { "epoch": 2.25, "learning_rate": 1.6272768870320263e-07, "loss": 0.0001, "step": 4567 }, { "epoch": 2.25, "logps_train/chosen": -78.27901458740234, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -320.80615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0829548835754395, "rewards_train/margins": 18.22822904586792, "rewards_train/rejected": -19.31118392944336, "step": 4567 }, { "epoch": 2.25, "learning_rate": 1.6252739019533403e-07, "loss": 0.0, "step": 4568 }, { "epoch": 2.25, "logps_train/chosen": -76.98146057128906, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -316.9344177246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.395948886871338, "rewards_train/margins": 17.9077467918396, "rewards_train/rejected": -19.303695678710938, "step": 4568 }, { "epoch": 2.25, "learning_rate": 1.6232719110917341e-07, "loss": 0.0, "step": 4569 }, { "epoch": 2.25, "logps_train/chosen": -76.91638946533203, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -319.02374267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.280360221862793, "rewards_train/margins": 17.987248420715332, "rewards_train/rejected": -19.267608642578125, "step": 4569 }, { "epoch": 2.25, "learning_rate": 1.6212709150370092e-07, "loss": 0.0, "step": 4570 }, { "epoch": 2.25, "logps_train/chosen": -87.6068115234375, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -332.52685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0599493980407715, "rewards_train/margins": 18.198397159576416, "rewards_train/rejected": -20.258346557617188, "step": 4570 }, { "epoch": 2.25, "learning_rate": 1.6192709143786694e-07, "loss": 0.0, "step": 4571 }, { "epoch": 2.25, "logps_train/chosen": -75.56084442138672, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -314.9776611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2884573936462402, "rewards_train/margins": 17.736847400665283, "rewards_train/rejected": -19.025304794311523, "step": 4571 }, { "epoch": 2.25, "learning_rate": 1.6172719097059317e-07, "loss": 0.0001, "step": 4572 }, { "epoch": 2.25, "logps_train/chosen": -79.4710922241211, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -319.465087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5201560258865356, "rewards_train/margins": 18.153504252433777, "rewards_train/rejected": -19.673660278320312, "step": 4572 }, { "epoch": 2.25, "learning_rate": 1.615273901607716e-07, "loss": 0.0, "step": 4573 }, { "epoch": 2.25, "logps_train/chosen": -76.3946762084961, "logps_train/ref_chosen": -61.59375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -319.57550048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4806785583496094, "rewards_train/margins": 18.19122886657715, "rewards_train/rejected": -19.671907424926758, "step": 4573 }, { "epoch": 2.25, "learning_rate": 1.6132768906726462e-07, "loss": 0.0, "step": 4574 }, { "epoch": 2.25, "logps_train/chosen": -74.62797546386719, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -320.3576965332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2563031911849976, "rewards_train/margins": 18.25417387485504, "rewards_train/rejected": -19.51047706604004, "step": 4574 }, { "epoch": 2.25, "learning_rate": 1.611280877489059e-07, "loss": 0.0, "step": 4575 }, { "epoch": 2.25, "logps_train/chosen": -81.89501953125, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -333.2813415527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.827905535697937, "rewards_train/margins": 18.639877438545227, "rewards_train/rejected": -20.467782974243164, "step": 4575 }, { "epoch": 2.25, "learning_rate": 1.6092858626449912e-07, "loss": 0.0, "step": 4576 }, { "epoch": 2.25, "logps_train/chosen": -76.38243103027344, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -317.751708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3055766820907593, "rewards_train/margins": 18.306949257850647, "rewards_train/rejected": -19.612525939941406, "step": 4576 }, { "epoch": 2.25, "learning_rate": 1.6072918467281872e-07, "loss": 0.0, "step": 4577 }, { "epoch": 2.25, "logps_train/chosen": -81.07396697998047, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -316.14337158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7685295343399048, "rewards_train/margins": 17.405813574790955, "rewards_train/rejected": -19.17434310913086, "step": 4577 }, { "epoch": 2.25, "learning_rate": 1.6052988303260994e-07, "loss": 0.0002, "step": 4578 }, { "epoch": 2.25, "logps_train/chosen": -75.96160125732422, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -320.7650146484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1330256462097168, "rewards_train/margins": 18.492105960845947, "rewards_train/rejected": -19.625131607055664, "step": 4578 }, { "epoch": 2.25, "learning_rate": 1.6033068140258826e-07, "loss": 0.0, "step": 4579 }, { "epoch": 2.25, "logps_train/chosen": -74.70489501953125, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -310.38800048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0770080089569092, "rewards_train/margins": 17.516186952590942, "rewards_train/rejected": -18.59319496154785, "step": 4579 }, { "epoch": 2.26, "learning_rate": 1.6013157984143989e-07, "loss": 0.0001, "step": 4580 }, { "epoch": 2.26, "logps_train/chosen": -72.469482421875, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -312.91473388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9655030369758606, "rewards_train/margins": 18.10028713941574, "rewards_train/rejected": -19.0657901763916, "step": 4580 }, { "epoch": 2.26, "learning_rate": 1.5993257840782126e-07, "loss": 0.0, "step": 4581 }, { "epoch": 2.26, "logps_train/chosen": -76.16342163085938, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -319.356689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3266698122024536, "rewards_train/margins": 18.074037432670593, "rewards_train/rejected": -19.400707244873047, "step": 4581 }, { "epoch": 2.26, "learning_rate": 1.597336771603599e-07, "loss": 0.0, "step": 4582 }, { "epoch": 2.26, "logps_train/chosen": -79.53422546386719, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -324.2335205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.464066743850708, "rewards_train/margins": 18.02178454399109, "rewards_train/rejected": -19.485851287841797, "step": 4582 }, { "epoch": 2.26, "learning_rate": 1.5953487615765327e-07, "loss": 0.0, "step": 4583 }, { "epoch": 2.26, "logps_train/chosen": -77.7315902709961, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -332.95928955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4642233848571777, "rewards_train/margins": 19.2196946144104, "rewards_train/rejected": -20.683917999267578, "step": 4583 }, { "epoch": 2.26, "learning_rate": 1.5933617545826933e-07, "loss": 0.0, "step": 4584 }, { "epoch": 2.26, "logps_train/chosen": -83.2651596069336, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -326.933349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7734638452529907, "rewards_train/margins": 18.077782034873962, "rewards_train/rejected": -19.851245880126953, "step": 4584 }, { "epoch": 2.26, "learning_rate": 1.5913757512074722e-07, "loss": 0.0, "step": 4585 }, { "epoch": 2.26, "logps_train/chosen": -81.75001525878906, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -312.78662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.735402226448059, "rewards_train/margins": 17.289252161979675, "rewards_train/rejected": -19.024654388427734, "step": 4585 }, { "epoch": 2.26, "learning_rate": 1.5893907520359511e-07, "loss": 0.0, "step": 4586 }, { "epoch": 2.26, "logps_train/chosen": -74.52559661865234, "logps_train/ref_chosen": -61.0, "logps_train/ref_rejected": -117.4375, "logps_train/rejected": -299.5625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3535358905792236, "rewards_train/margins": 16.860674619674683, "rewards_train/rejected": -18.214210510253906, "step": 4586 }, { "epoch": 2.26, "learning_rate": 1.5874067576529304e-07, "loss": 0.0002, "step": 4587 }, { "epoch": 2.26, "logps_train/chosen": -78.7422866821289, "logps_train/ref_chosen": -60.96875, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -315.5852966308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7757670879364014, "rewards_train/margins": 17.553271532058716, "rewards_train/rejected": -19.329038619995117, "step": 4587 }, { "epoch": 2.26, "learning_rate": 1.5854237686429067e-07, "loss": 0.0, "step": 4588 }, { "epoch": 2.26, "logps_train/chosen": -76.91112518310547, "logps_train/ref_chosen": -61.21875, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -310.1878662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5685782432556152, "rewards_train/margins": 17.23961114883423, "rewards_train/rejected": -18.808189392089844, "step": 4588 }, { "epoch": 2.26, "learning_rate": 1.5834417855900796e-07, "loss": 0.0002, "step": 4589 }, { "epoch": 2.26, "logps_train/chosen": -78.10234069824219, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -317.88916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3731739521026611, "rewards_train/margins": 18.24201464653015, "rewards_train/rejected": -19.615188598632812, "step": 4589 }, { "epoch": 2.26, "learning_rate": 1.5814608090783582e-07, "loss": 0.0, "step": 4590 }, { "epoch": 2.26, "logps_train/chosen": -80.44562530517578, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -328.04302978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.445930004119873, "rewards_train/margins": 18.458031177520752, "rewards_train/rejected": -19.903961181640625, "step": 4590 }, { "epoch": 2.26, "learning_rate": 1.57948083969135e-07, "loss": 0.0, "step": 4591 }, { "epoch": 2.26, "logps_train/chosen": -81.8447036743164, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -119.0625, "logps_train/rejected": -309.3064270019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6785132884979248, "rewards_train/margins": 17.344220399856567, "rewards_train/rejected": -19.022733688354492, "step": 4591 }, { "epoch": 2.26, "learning_rate": 1.5775018780123672e-07, "loss": 0.0, "step": 4592 }, { "epoch": 2.26, "logps_train/chosen": -81.56475830078125, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -322.79168701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5692694187164307, "rewards_train/margins": 17.995595693588257, "rewards_train/rejected": -19.564865112304688, "step": 4592 }, { "epoch": 2.26, "learning_rate": 1.5755239246244235e-07, "loss": 0.0, "step": 4593 }, { "epoch": 2.26, "logps_train/chosen": -76.6768569946289, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -323.2691955566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2460057735443115, "rewards_train/margins": 18.25845170021057, "rewards_train/rejected": -19.504457473754883, "step": 4593 }, { "epoch": 2.26, "learning_rate": 1.5735469801102407e-07, "loss": 0.0, "step": 4594 }, { "epoch": 2.26, "logps_train/chosen": -74.6544189453125, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -315.7882995605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.118224859237671, "rewards_train/margins": 18.111582040786743, "rewards_train/rejected": -19.229806900024414, "step": 4594 }, { "epoch": 2.26, "learning_rate": 1.571571045052239e-07, "loss": 0.0, "step": 4595 }, { "epoch": 2.26, "logps_train/chosen": -80.32757568359375, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -318.17413330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4718691110610962, "rewards_train/margins": 17.892274260520935, "rewards_train/rejected": -19.36414337158203, "step": 4595 }, { "epoch": 2.26, "learning_rate": 1.5695961200325398e-07, "loss": 0.0, "step": 4596 }, { "epoch": 2.26, "logps_train/chosen": -78.95986938476562, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -323.5529479980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.42850661277771, "rewards_train/margins": 18.151888132095337, "rewards_train/rejected": -19.580394744873047, "step": 4596 }, { "epoch": 2.26, "learning_rate": 1.567622205632974e-07, "loss": 0.0, "step": 4597 }, { "epoch": 2.26, "logps_train/chosen": -78.33753204345703, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -315.3997802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2832157611846924, "rewards_train/margins": 17.974634885787964, "rewards_train/rejected": -19.257850646972656, "step": 4597 }, { "epoch": 2.26, "learning_rate": 1.5656493024350647e-07, "loss": 0.0, "step": 4598 }, { "epoch": 2.26, "logps_train/chosen": -85.74958801269531, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -332.6016845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.907429814338684, "rewards_train/margins": 18.556742548942566, "rewards_train/rejected": -20.46417236328125, "step": 4598 }, { "epoch": 2.26, "learning_rate": 1.5636774110200445e-07, "loss": 0.0, "step": 4599 }, { "epoch": 2.26, "logps_train/chosen": -79.54910278320312, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -314.48809814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6031525135040283, "rewards_train/margins": 17.49614405632019, "rewards_train/rejected": -19.09929656982422, "step": 4599 }, { "epoch": 2.26, "learning_rate": 1.5617065319688504e-07, "loss": 0.0, "step": 4600 }, { "epoch": 2.26, "logps_train/chosen": -80.23722076416016, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -310.65667724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5244786739349365, "rewards_train/margins": 17.27771019935608, "rewards_train/rejected": -18.802188873291016, "step": 4600 }, { "epoch": 2.27, "learning_rate": 1.5617065319688504e-07, "loss": 0.0046, "step": 4601 }, { "epoch": 2.27, "logps_train/chosen": -81.8740234375, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -334.04864501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6845703125, "rewards_train/margins": 18.739046096801758, "rewards_train/rejected": -20.423616409301758, "step": 4601 }, { "epoch": 2.27, "learning_rate": 1.559736665862109e-07, "loss": 0.0, "step": 4602 }, { "epoch": 2.27, "logps_train/chosen": -74.20801544189453, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -325.48431396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98525470495224, "rewards_train/margins": 18.800920069217682, "rewards_train/rejected": -19.786174774169922, "step": 4602 }, { "epoch": 2.27, "learning_rate": 1.557767813280162e-07, "loss": 0.0, "step": 4603 }, { "epoch": 2.27, "logps_train/chosen": -84.86432647705078, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -330.80255126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.931256890296936, "rewards_train/margins": 18.35095250606537, "rewards_train/rejected": -20.282209396362305, "step": 4603 }, { "epoch": 2.27, "learning_rate": 1.5557999748030443e-07, "loss": 0.0, "step": 4604 }, { "epoch": 2.27, "logps_train/chosen": -73.74653625488281, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -325.39215087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1387888193130493, "rewards_train/margins": 18.92454469203949, "rewards_train/rejected": -20.06333351135254, "step": 4604 }, { "epoch": 2.27, "learning_rate": 1.553833151010493e-07, "loss": 0.0, "step": 4605 }, { "epoch": 2.27, "logps_train/chosen": -82.97496032714844, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -333.093505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5318710803985596, "rewards_train/margins": 18.530117750167847, "rewards_train/rejected": -20.061988830566406, "step": 4605 }, { "epoch": 2.27, "learning_rate": 1.5518673424819507e-07, "loss": 0.0001, "step": 4606 }, { "epoch": 2.27, "logps_train/chosen": -77.62582397460938, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -324.4969787597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4694913625717163, "rewards_train/margins": 18.316389679908752, "rewards_train/rejected": -19.78588104248047, "step": 4606 }, { "epoch": 2.27, "learning_rate": 1.5499025497965568e-07, "loss": 0.0, "step": 4607 }, { "epoch": 2.27, "logps_train/chosen": -71.74838256835938, "logps_train/ref_chosen": -60.46875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -311.2213439941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.129037857055664, "rewards_train/margins": 17.897735595703125, "rewards_train/rejected": -19.02677345275879, "step": 4607 }, { "epoch": 2.27, "learning_rate": 1.5479387735331524e-07, "loss": 0.0, "step": 4608 }, { "epoch": 2.27, "logps_train/chosen": -82.03357696533203, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -327.0192565917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6171761751174927, "rewards_train/margins": 18.160629153251648, "rewards_train/rejected": -19.77780532836914, "step": 4608 }, { "epoch": 2.27, "learning_rate": 1.5459760142702766e-07, "loss": 0.0004, "step": 4609 }, { "epoch": 2.27, "logps_train/chosen": -75.6933364868164, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -326.9720458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.128806233406067, "rewards_train/margins": 18.567322611808777, "rewards_train/rejected": -19.696128845214844, "step": 4609 }, { "epoch": 2.27, "learning_rate": 1.544014272586176e-07, "loss": 0.0, "step": 4610 }, { "epoch": 2.27, "logps_train/chosen": -75.35107421875, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -321.0594482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1966304779052734, "rewards_train/margins": 18.310293197631836, "rewards_train/rejected": -19.50692367553711, "step": 4610 }, { "epoch": 2.27, "learning_rate": 1.5420535490587912e-07, "loss": 0.0, "step": 4611 }, { "epoch": 2.27, "logps_train/chosen": -76.88398742675781, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -326.75323486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.099776029586792, "rewards_train/margins": 18.6180260181427, "rewards_train/rejected": -19.717802047729492, "step": 4611 }, { "epoch": 2.27, "learning_rate": 1.5400938442657625e-07, "loss": 0.0002, "step": 4612 }, { "epoch": 2.27, "logps_train/chosen": -76.98450469970703, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -333.190185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.347034215927124, "rewards_train/margins": 18.994055032730103, "rewards_train/rejected": -20.341089248657227, "step": 4612 }, { "epoch": 2.27, "learning_rate": 1.538135158784435e-07, "loss": 0.0, "step": 4613 }, { "epoch": 2.27, "logps_train/chosen": -81.31929016113281, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -326.05029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6995069980621338, "rewards_train/margins": 18.554351568222046, "rewards_train/rejected": -20.25385856628418, "step": 4613 }, { "epoch": 2.27, "learning_rate": 1.5361774931918497e-07, "loss": 0.0, "step": 4614 }, { "epoch": 2.27, "logps_train/chosen": -78.48704528808594, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -314.6015930175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4705305099487305, "rewards_train/margins": 17.771660804748535, "rewards_train/rejected": -19.242191314697266, "step": 4614 }, { "epoch": 2.27, "learning_rate": 1.5342208480647468e-07, "loss": 0.0, "step": 4615 }, { "epoch": 2.27, "logps_train/chosen": -79.87260437011719, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -321.12261962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6324267387390137, "rewards_train/margins": 18.120556354522705, "rewards_train/rejected": -19.75298309326172, "step": 4615 }, { "epoch": 2.27, "learning_rate": 1.5322652239795713e-07, "loss": 0.0, "step": 4616 }, { "epoch": 2.27, "logps_train/chosen": -80.87980651855469, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -324.93316650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6787035465240479, "rewards_train/margins": 18.176528692245483, "rewards_train/rejected": -19.85523223876953, "step": 4616 }, { "epoch": 2.27, "learning_rate": 1.5303106215124573e-07, "loss": 0.0, "step": 4617 }, { "epoch": 2.27, "logps_train/chosen": -80.57927703857422, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -327.858642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5118343830108643, "rewards_train/margins": 18.587408781051636, "rewards_train/rejected": -20.0992431640625, "step": 4617 }, { "epoch": 2.27, "learning_rate": 1.5283570412392478e-07, "loss": 0.0, "step": 4618 }, { "epoch": 2.27, "logps_train/chosen": -76.92223358154297, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -323.29888916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3638052940368652, "rewards_train/margins": 18.569990634918213, "rewards_train/rejected": -19.933795928955078, "step": 4618 }, { "epoch": 2.27, "learning_rate": 1.52640448373548e-07, "loss": 0.0, "step": 4619 }, { "epoch": 2.27, "logps_train/chosen": -78.61207580566406, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -315.1575622558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4294202327728271, "rewards_train/margins": 17.712116479873657, "rewards_train/rejected": -19.141536712646484, "step": 4619 }, { "epoch": 2.27, "learning_rate": 1.5244529495763893e-07, "loss": 0.0, "step": 4620 }, { "epoch": 2.27, "logps_train/chosen": -79.48560333251953, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -311.78936767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4914803504943848, "rewards_train/margins": 17.180963039398193, "rewards_train/rejected": -18.672443389892578, "step": 4620 }, { "epoch": 2.28, "learning_rate": 1.5225024393369128e-07, "loss": 0.0, "step": 4621 }, { "epoch": 2.28, "logps_train/chosen": -72.62074279785156, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -309.14678955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9896625280380249, "rewards_train/margins": 17.65006411075592, "rewards_train/rejected": -18.639726638793945, "step": 4621 }, { "epoch": 2.28, "learning_rate": 1.5205529535916834e-07, "loss": 0.0, "step": 4622 }, { "epoch": 2.28, "logps_train/chosen": -74.17666625976562, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -313.98638916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9715725183486938, "rewards_train/margins": 18.028290152549744, "rewards_train/rejected": -18.999862670898438, "step": 4622 }, { "epoch": 2.28, "learning_rate": 1.5186044929150316e-07, "loss": 0.0, "step": 4623 }, { "epoch": 2.28, "logps_train/chosen": -81.81793212890625, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -341.4329833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4349186420440674, "rewards_train/margins": 19.477034330368042, "rewards_train/rejected": -20.91195297241211, "step": 4623 }, { "epoch": 2.28, "learning_rate": 1.5166570578809866e-07, "loss": 0.0, "step": 4624 }, { "epoch": 2.28, "logps_train/chosen": -82.39912414550781, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -332.1758728027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7040729522705078, "rewards_train/margins": 18.850086212158203, "rewards_train/rejected": -20.55415916442871, "step": 4624 }, { "epoch": 2.28, "learning_rate": 1.5147106490632794e-07, "loss": 0.0, "step": 4625 }, { "epoch": 2.28, "logps_train/chosen": -71.15052795410156, "logps_train/ref_chosen": -61.40625, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -309.8201599121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9732800722122192, "rewards_train/margins": 17.849607348442078, "rewards_train/rejected": -18.822887420654297, "step": 4625 }, { "epoch": 2.28, "learning_rate": 1.512765267035332e-07, "loss": 0.0001, "step": 4626 }, { "epoch": 2.28, "logps_train/chosen": -78.52171325683594, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -313.146240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3409652709960938, "rewards_train/margins": 17.565166473388672, "rewards_train/rejected": -18.906131744384766, "step": 4626 }, { "epoch": 2.28, "learning_rate": 1.510820912370267e-07, "loss": 0.0, "step": 4627 }, { "epoch": 2.28, "logps_train/chosen": -82.9300308227539, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -310.5580749511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6980321407318115, "rewards_train/margins": 17.097179651260376, "rewards_train/rejected": -18.795211791992188, "step": 4627 }, { "epoch": 2.28, "learning_rate": 1.5088775856409063e-07, "loss": 0.0, "step": 4628 }, { "epoch": 2.28, "logps_train/chosen": -78.94329071044922, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -315.54791259765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.603411316871643, "rewards_train/margins": 17.491711974143982, "rewards_train/rejected": -19.095123291015625, "step": 4628 }, { "epoch": 2.28, "learning_rate": 1.5069352874197666e-07, "loss": 0.0, "step": 4629 }, { "epoch": 2.28, "logps_train/chosen": -84.53639221191406, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -329.183837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.62883460521698, "rewards_train/margins": 18.273730397224426, "rewards_train/rejected": -19.902565002441406, "step": 4629 }, { "epoch": 2.28, "learning_rate": 1.50499401827906e-07, "loss": 0.0, "step": 4630 }, { "epoch": 2.28, "logps_train/chosen": -79.61735534667969, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -313.5920715332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6697916984558105, "rewards_train/margins": 17.347910404205322, "rewards_train/rejected": -19.017702102661133, "step": 4630 }, { "epoch": 2.28, "learning_rate": 1.5030537787907005e-07, "loss": 0.0, "step": 4631 }, { "epoch": 2.28, "logps_train/chosen": -74.1663589477539, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -119.4375, "logps_train/rejected": -301.9888610839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2153663635253906, "rewards_train/margins": 17.038501739501953, "rewards_train/rejected": -18.253868103027344, "step": 4631 }, { "epoch": 2.28, "learning_rate": 1.5011145695262945e-07, "loss": 0.0, "step": 4632 }, { "epoch": 2.28, "logps_train/chosen": -75.86158752441406, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -317.39581298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4046154022216797, "rewards_train/margins": 18.13213539123535, "rewards_train/rejected": -19.53675079345703, "step": 4632 }, { "epoch": 2.28, "learning_rate": 1.4991763910571452e-07, "loss": 0.0, "step": 4633 }, { "epoch": 2.28, "logps_train/chosen": -77.39509582519531, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -317.99334716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.302107810974121, "rewards_train/margins": 18.02413272857666, "rewards_train/rejected": -19.32624053955078, "step": 4633 }, { "epoch": 2.28, "learning_rate": 1.497239243954253e-07, "loss": 0.0001, "step": 4634 }, { "epoch": 2.28, "logps_train/chosen": -80.60813903808594, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -329.02044677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4336895942687988, "rewards_train/margins": 18.253608226776123, "rewards_train/rejected": -19.687297821044922, "step": 4634 }, { "epoch": 2.28, "learning_rate": 1.4953031287883184e-07, "loss": 0.0001, "step": 4635 }, { "epoch": 2.28, "logps_train/chosen": -86.69752502441406, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -334.0382385253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0289807319641113, "rewards_train/margins": 18.449452877044678, "rewards_train/rejected": -20.47843360900879, "step": 4635 }, { "epoch": 2.28, "learning_rate": 1.4933680461297277e-07, "loss": 0.0, "step": 4636 }, { "epoch": 2.28, "logps_train/chosen": -72.20044708251953, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -303.40765380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9843026399612427, "rewards_train/margins": 17.256214022636414, "rewards_train/rejected": -18.240516662597656, "step": 4636 }, { "epoch": 2.28, "learning_rate": 1.4914339965485728e-07, "loss": 0.0, "step": 4637 }, { "epoch": 2.28, "logps_train/chosen": -75.51487731933594, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -308.89727783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3809316158294678, "rewards_train/margins": 17.38770318031311, "rewards_train/rejected": -18.768634796142578, "step": 4637 }, { "epoch": 2.28, "learning_rate": 1.4895009806146402e-07, "loss": 0.0001, "step": 4638 }, { "epoch": 2.28, "logps_train/chosen": -79.18873596191406, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -306.4814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3555917739868164, "rewards_train/margins": 16.945094108581543, "rewards_train/rejected": -18.30068588256836, "step": 4638 }, { "epoch": 2.28, "learning_rate": 1.487568998897404e-07, "loss": 0.0, "step": 4639 }, { "epoch": 2.28, "logps_train/chosen": -77.9343490600586, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -323.3852233886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0493923425674438, "rewards_train/margins": 18.280683875083923, "rewards_train/rejected": -19.330076217651367, "step": 4639 }, { "epoch": 2.28, "learning_rate": 1.4856380519660427e-07, "loss": 0.0, "step": 4640 }, { "epoch": 2.28, "logps_train/chosen": -76.34561157226562, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -331.5816955566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0408596992492676, "rewards_train/margins": 18.875415325164795, "rewards_train/rejected": -19.916275024414062, "step": 4640 }, { "epoch": 2.29, "learning_rate": 1.483708140389426e-07, "loss": 0.0001, "step": 4641 }, { "epoch": 2.29, "logps_train/chosen": -78.54255676269531, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -320.0661315917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5836011171340942, "rewards_train/margins": 18.008214831352234, "rewards_train/rejected": -19.591815948486328, "step": 4641 }, { "epoch": 2.29, "learning_rate": 1.4817792647361166e-07, "loss": 0.0, "step": 4642 }, { "epoch": 2.29, "logps_train/chosen": -78.72105407714844, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -310.9339294433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4017446041107178, "rewards_train/margins": 17.09213662147522, "rewards_train/rejected": -18.493881225585938, "step": 4642 }, { "epoch": 2.29, "learning_rate": 1.4798514255743773e-07, "loss": 0.0, "step": 4643 }, { "epoch": 2.29, "logps_train/chosen": -79.37071228027344, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -322.689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2801613807678223, "rewards_train/margins": 18.03727102279663, "rewards_train/rejected": -19.317432403564453, "step": 4643 }, { "epoch": 2.29, "learning_rate": 1.477924623472161e-07, "loss": 0.0, "step": 4644 }, { "epoch": 2.29, "logps_train/chosen": -71.0456771850586, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -310.9341735839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8093531727790833, "rewards_train/margins": 17.94773644208908, "rewards_train/rejected": -18.757089614868164, "step": 4644 }, { "epoch": 2.29, "learning_rate": 1.4759988589971168e-07, "loss": 0.0, "step": 4645 }, { "epoch": 2.29, "logps_train/chosen": -77.90060424804688, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -318.9143981933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4525600671768188, "rewards_train/margins": 18.048351645469666, "rewards_train/rejected": -19.500911712646484, "step": 4645 }, { "epoch": 2.29, "learning_rate": 1.4740741327165867e-07, "loss": 0.0001, "step": 4646 }, { "epoch": 2.29, "logps_train/chosen": -78.3903579711914, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -327.57098388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3290749788284302, "rewards_train/margins": 18.833297610282898, "rewards_train/rejected": -20.162372589111328, "step": 4646 }, { "epoch": 2.29, "learning_rate": 1.4721504451976103e-07, "loss": 0.0, "step": 4647 }, { "epoch": 2.29, "logps_train/chosen": -76.07550048828125, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -119.6875, "logps_train/rejected": -306.5372314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.258819341659546, "rewards_train/margins": 17.425567865371704, "rewards_train/rejected": -18.68438720703125, "step": 4647 }, { "epoch": 2.29, "learning_rate": 1.4702277970069183e-07, "loss": 0.0, "step": 4648 }, { "epoch": 2.29, "logps_train/chosen": -77.67887115478516, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -322.8929443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2750401496887207, "rewards_train/margins": 18.398484706878662, "rewards_train/rejected": -19.673524856567383, "step": 4648 }, { "epoch": 2.29, "learning_rate": 1.4683061887109333e-07, "loss": 0.0, "step": 4649 }, { "epoch": 2.29, "logps_train/chosen": -73.2938461303711, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -304.76055908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1105856895446777, "rewards_train/margins": 17.292328357696533, "rewards_train/rejected": -18.40291404724121, "step": 4649 }, { "epoch": 2.29, "learning_rate": 1.4663856208757796e-07, "loss": 0.0001, "step": 4650 }, { "epoch": 2.29, "logps_train/chosen": -78.74638366699219, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -316.1415710449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.439774751663208, "rewards_train/margins": 17.774088621139526, "rewards_train/rejected": -19.213863372802734, "step": 4650 }, { "epoch": 2.29, "learning_rate": 1.4644660940672627e-07, "loss": 0.0, "step": 4651 }, { "epoch": 2.29, "logps_train/chosen": -77.74458312988281, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -312.1642761230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3935019969940186, "rewards_train/margins": 17.706713914871216, "rewards_train/rejected": -19.100215911865234, "step": 4651 }, { "epoch": 2.29, "learning_rate": 1.4625476088508914e-07, "loss": 0.0, "step": 4652 }, { "epoch": 2.29, "logps_train/chosen": -79.95269775390625, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -319.04345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4408750534057617, "rewards_train/margins": 17.819281578063965, "rewards_train/rejected": -19.260156631469727, "step": 4652 }, { "epoch": 2.29, "learning_rate": 1.4606301657918674e-07, "loss": 0.0, "step": 4653 }, { "epoch": 2.29, "logps_train/chosen": -82.46865844726562, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -325.7606201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7675443887710571, "rewards_train/margins": 18.145190596580505, "rewards_train/rejected": -19.912734985351562, "step": 4653 }, { "epoch": 2.29, "learning_rate": 1.4587137654550768e-07, "loss": 0.0, "step": 4654 }, { "epoch": 2.29, "logps_train/chosen": -82.14491271972656, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -316.08721923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7289927005767822, "rewards_train/margins": 17.349311590194702, "rewards_train/rejected": -19.078304290771484, "step": 4654 }, { "epoch": 2.29, "learning_rate": 1.4567984084051076e-07, "loss": 0.0, "step": 4655 }, { "epoch": 2.29, "logps_train/chosen": -77.1261215209961, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -308.0016174316406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5063623189926147, "rewards_train/margins": 17.170754075050354, "rewards_train/rejected": -18.67711639404297, "step": 4655 }, { "epoch": 2.29, "learning_rate": 1.4548840952062363e-07, "loss": 0.0, "step": 4656 }, { "epoch": 2.29, "logps_train/chosen": -76.72673034667969, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -338.8099670410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1668626070022583, "rewards_train/margins": 19.608177065849304, "rewards_train/rejected": -20.775039672851562, "step": 4656 }, { "epoch": 2.29, "learning_rate": 1.4529708264224305e-07, "loss": 0.0, "step": 4657 }, { "epoch": 2.29, "logps_train/chosen": -76.46458435058594, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -310.3472900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4895732402801514, "rewards_train/margins": 17.29793667793274, "rewards_train/rejected": -18.78750991821289, "step": 4657 }, { "epoch": 2.29, "learning_rate": 1.4510586026173553e-07, "loss": 0.0, "step": 4658 }, { "epoch": 2.29, "logps_train/chosen": -83.00373840332031, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -330.6558837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6474928855895996, "rewards_train/margins": 18.593437671661377, "rewards_train/rejected": -20.240930557250977, "step": 4658 }, { "epoch": 2.29, "learning_rate": 1.4491474243543633e-07, "loss": 0.0001, "step": 4659 }, { "epoch": 2.29, "logps_train/chosen": -71.78881072998047, "logps_train/ref_chosen": -61.28125, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -314.82159423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0513414144515991, "rewards_train/margins": 17.91563093662262, "rewards_train/rejected": -18.96697235107422, "step": 4659 }, { "epoch": 2.29, "learning_rate": 1.4472372921965003e-07, "loss": 0.0, "step": 4660 }, { "epoch": 2.29, "logps_train/chosen": -78.86347961425781, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -315.694580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5062694549560547, "rewards_train/margins": 17.658987045288086, "rewards_train/rejected": -19.16525650024414, "step": 4660 }, { "epoch": 2.29, "learning_rate": 1.4453282067065024e-07, "loss": 0.0, "step": 4661 }, { "epoch": 2.29, "logps_train/chosen": -79.00987243652344, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -329.04083251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.121787667274475, "rewards_train/margins": 18.90709912776947, "rewards_train/rejected": -20.028886795043945, "step": 4661 }, { "epoch": 2.3, "learning_rate": 1.4434201684468028e-07, "loss": 0.0, "step": 4662 }, { "epoch": 2.3, "logps_train/chosen": -78.82463073730469, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -325.29583740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.490959644317627, "rewards_train/margins": 18.386964321136475, "rewards_train/rejected": -19.8779239654541, "step": 4662 }, { "epoch": 2.3, "learning_rate": 1.441513177979521e-07, "loss": 0.0, "step": 4663 }, { "epoch": 2.3, "logps_train/chosen": -77.05908203125, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -317.78131103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2297855615615845, "rewards_train/margins": 17.887701392173767, "rewards_train/rejected": -19.11748695373535, "step": 4663 }, { "epoch": 2.3, "learning_rate": 1.4396072358664663e-07, "loss": 0.0, "step": 4664 }, { "epoch": 2.3, "logps_train/chosen": -85.09940338134766, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -330.0334777832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7020301818847656, "rewards_train/margins": 18.27328872680664, "rewards_train/rejected": -19.975318908691406, "step": 4664 }, { "epoch": 2.3, "learning_rate": 1.4377023426691482e-07, "loss": 0.0, "step": 4665 }, { "epoch": 2.3, "logps_train/chosen": -76.81071472167969, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -329.0109558105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3198907375335693, "rewards_train/margins": 18.66079545021057, "rewards_train/rejected": -19.98068618774414, "step": 4665 }, { "epoch": 2.3, "learning_rate": 1.4357984989487543e-07, "loss": 0.0, "step": 4666 }, { "epoch": 2.3, "logps_train/chosen": -78.80540466308594, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -118.8125, "logps_train/rejected": -305.04010009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6403298377990723, "rewards_train/margins": 16.983163356781006, "rewards_train/rejected": -18.623493194580078, "step": 4666 }, { "epoch": 2.3, "learning_rate": 1.4338957052661726e-07, "loss": 0.0, "step": 4667 }, { "epoch": 2.3, "logps_train/chosen": -76.52000427246094, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -312.2055969238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2820783853530884, "rewards_train/margins": 17.612555861473083, "rewards_train/rejected": -18.894634246826172, "step": 4667 }, { "epoch": 2.3, "learning_rate": 1.4319939621819833e-07, "loss": 0.0, "step": 4668 }, { "epoch": 2.3, "logps_train/chosen": -78.16971588134766, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -331.489990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.361283540725708, "rewards_train/margins": 18.6010959148407, "rewards_train/rejected": -19.962379455566406, "step": 4668 }, { "epoch": 2.3, "learning_rate": 1.430093270256445e-07, "loss": 0.0, "step": 4669 }, { "epoch": 2.3, "logps_train/chosen": -81.58619689941406, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -327.7318115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5192155838012695, "rewards_train/margins": 18.295228004455566, "rewards_train/rejected": -19.814443588256836, "step": 4669 }, { "epoch": 2.3, "learning_rate": 1.4281936300495196e-07, "loss": 0.0, "step": 4670 }, { "epoch": 2.3, "logps_train/chosen": -82.63017272949219, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -338.07452392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.584355354309082, "rewards_train/margins": 19.023486137390137, "rewards_train/rejected": -20.60784149169922, "step": 4670 }, { "epoch": 2.3, "learning_rate": 1.426295042120853e-07, "loss": 0.0, "step": 4671 }, { "epoch": 2.3, "logps_train/chosen": -83.56846618652344, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -325.8621826171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7867779731750488, "rewards_train/margins": 18.241729259490967, "rewards_train/rejected": -20.028507232666016, "step": 4671 }, { "epoch": 2.3, "learning_rate": 1.4243975070297815e-07, "loss": 0.0, "step": 4672 }, { "epoch": 2.3, "logps_train/chosen": -84.13694763183594, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -320.2254638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7560292482376099, "rewards_train/margins": 17.579801440238953, "rewards_train/rejected": -19.335830688476562, "step": 4672 }, { "epoch": 2.3, "learning_rate": 1.4225010253353297e-07, "loss": 0.0, "step": 4673 }, { "epoch": 2.3, "logps_train/chosen": -75.41004180908203, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -320.9263916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2846565246582031, "rewards_train/margins": 18.287185668945312, "rewards_train/rejected": -19.571842193603516, "step": 4673 }, { "epoch": 2.3, "learning_rate": 1.4206055975962177e-07, "loss": 0.0, "step": 4674 }, { "epoch": 2.3, "logps_train/chosen": -80.1279296875, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -327.6265869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1849122047424316, "rewards_train/margins": 18.593663692474365, "rewards_train/rejected": -19.778575897216797, "step": 4674 }, { "epoch": 2.3, "learning_rate": 1.41871122437085e-07, "loss": 0.0, "step": 4675 }, { "epoch": 2.3, "logps_train/chosen": -72.5292739868164, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -315.13116455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9548804759979248, "rewards_train/margins": 18.11243176460266, "rewards_train/rejected": -19.067312240600586, "step": 4675 }, { "epoch": 2.3, "learning_rate": 1.4168179062173191e-07, "loss": 0.0, "step": 4676 }, { "epoch": 2.3, "logps_train/chosen": -75.98448944091797, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -309.2566223144531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2550406455993652, "rewards_train/margins": 17.638638019561768, "rewards_train/rejected": -18.893678665161133, "step": 4676 }, { "epoch": 2.3, "learning_rate": 1.4149256436934132e-07, "loss": 0.0, "step": 4677 }, { "epoch": 2.3, "logps_train/chosen": -73.32612609863281, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -331.1497802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9419876933097839, "rewards_train/margins": 19.35985404253006, "rewards_train/rejected": -20.301841735839844, "step": 4677 }, { "epoch": 2.3, "learning_rate": 1.413034437356604e-07, "loss": 0.0, "step": 4678 }, { "epoch": 2.3, "logps_train/chosen": -79.50886535644531, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -308.59515380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.350203514099121, "rewards_train/margins": 17.207409858703613, "rewards_train/rejected": -18.557613372802734, "step": 4678 }, { "epoch": 2.3, "learning_rate": 1.4111442877640522e-07, "loss": 0.0001, "step": 4679 }, { "epoch": 2.3, "logps_train/chosen": -82.04939270019531, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -327.01904296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.409529447555542, "rewards_train/margins": 18.210835695266724, "rewards_train/rejected": -19.620365142822266, "step": 4679 }, { "epoch": 2.3, "learning_rate": 1.409255195472611e-07, "loss": 0.0, "step": 4680 }, { "epoch": 2.3, "logps_train/chosen": -81.99434661865234, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -305.6976623535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8748496770858765, "rewards_train/margins": 16.737739205360413, "rewards_train/rejected": -18.61258888244629, "step": 4680 }, { "epoch": 2.3, "learning_rate": 1.4073671610388195e-07, "loss": 0.0, "step": 4681 }, { "epoch": 2.3, "logps_train/chosen": -73.75099182128906, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -313.27239990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.191603422164917, "rewards_train/margins": 17.788127183914185, "rewards_train/rejected": -18.9797306060791, "step": 4681 }, { "epoch": 2.31, "learning_rate": 1.4054801850189035e-07, "loss": 0.0, "step": 4682 }, { "epoch": 2.31, "logps_train/chosen": -76.39883422851562, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -306.3443603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1759679317474365, "rewards_train/margins": 17.20734429359436, "rewards_train/rejected": -18.383312225341797, "step": 4682 }, { "epoch": 2.31, "learning_rate": 1.403594267968779e-07, "loss": 0.0001, "step": 4683 }, { "epoch": 2.31, "logps_train/chosen": -75.99517822265625, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -318.7891845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3885321617126465, "rewards_train/margins": 18.011040210723877, "rewards_train/rejected": -19.399572372436523, "step": 4683 }, { "epoch": 2.31, "learning_rate": 1.4017094104440524e-07, "loss": 0.0, "step": 4684 }, { "epoch": 2.31, "logps_train/chosen": -80.4326171875, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -311.1885070800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5105952024459839, "rewards_train/margins": 17.19814884662628, "rewards_train/rejected": -18.708744049072266, "step": 4684 }, { "epoch": 2.31, "learning_rate": 1.399825613000014e-07, "loss": 0.0, "step": 4685 }, { "epoch": 2.31, "logps_train/chosen": -78.9376220703125, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -323.8648681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7258669137954712, "rewards_train/margins": 18.145827651023865, "rewards_train/rejected": -19.871694564819336, "step": 4685 }, { "epoch": 2.31, "learning_rate": 1.3979428761916417e-07, "loss": 0.0, "step": 4686 }, { "epoch": 2.31, "logps_train/chosen": -84.64959716796875, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -339.49951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7803889513015747, "rewards_train/margins": 18.989096522331238, "rewards_train/rejected": -20.769485473632812, "step": 4686 }, { "epoch": 2.31, "learning_rate": 1.3960612005736067e-07, "loss": 0.0, "step": 4687 }, { "epoch": 2.31, "logps_train/chosen": -81.40223693847656, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -329.4844055175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4813861846923828, "rewards_train/margins": 18.641469955444336, "rewards_train/rejected": -20.12285614013672, "step": 4687 }, { "epoch": 2.31, "learning_rate": 1.3941805867002576e-07, "loss": 0.0, "step": 4688 }, { "epoch": 2.31, "logps_train/chosen": -75.2267837524414, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -329.49615478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1515848636627197, "rewards_train/margins": 19.133968114852905, "rewards_train/rejected": -20.285552978515625, "step": 4688 }, { "epoch": 2.31, "learning_rate": 1.3923010351256403e-07, "loss": 0.0, "step": 4689 }, { "epoch": 2.31, "logps_train/chosen": -76.73081970214844, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -325.1831359863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1666860580444336, "rewards_train/margins": 18.488884925842285, "rewards_train/rejected": -19.65557098388672, "step": 4689 }, { "epoch": 2.31, "learning_rate": 1.3904225464034818e-07, "loss": 0.0002, "step": 4690 }, { "epoch": 2.31, "logps_train/chosen": -80.6056900024414, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -337.37652587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5327367782592773, "rewards_train/margins": 18.94534397125244, "rewards_train/rejected": -20.47808074951172, "step": 4690 }, { "epoch": 2.31, "learning_rate": 1.3885451210871963e-07, "loss": 0.0, "step": 4691 }, { "epoch": 2.31, "logps_train/chosen": -82.10457611083984, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -319.679931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.967635154724121, "rewards_train/margins": 17.86378574371338, "rewards_train/rejected": -19.8314208984375, "step": 4691 }, { "epoch": 2.31, "learning_rate": 1.3866687597298887e-07, "loss": 0.0, "step": 4692 }, { "epoch": 2.31, "logps_train/chosen": -78.20481872558594, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -319.33819580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2713606357574463, "rewards_train/margins": 18.07105040550232, "rewards_train/rejected": -19.342411041259766, "step": 4692 }, { "epoch": 2.31, "learning_rate": 1.3847934628843467e-07, "loss": 0.0, "step": 4693 }, { "epoch": 2.31, "logps_train/chosen": -75.2962646484375, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -314.91302490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0749390125274658, "rewards_train/margins": 17.844780683517456, "rewards_train/rejected": -18.919719696044922, "step": 4693 }, { "epoch": 2.31, "learning_rate": 1.3829192311030435e-07, "loss": 0.0, "step": 4694 }, { "epoch": 2.31, "logps_train/chosen": -76.92225646972656, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -309.463134765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4000872373580933, "rewards_train/margins": 17.265417218208313, "rewards_train/rejected": -18.665504455566406, "step": 4694 }, { "epoch": 2.31, "learning_rate": 1.3810460649381434e-07, "loss": 0.0, "step": 4695 }, { "epoch": 2.31, "logps_train/chosen": -80.66178894042969, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -319.67254638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7737963199615479, "rewards_train/margins": 17.681983709335327, "rewards_train/rejected": -19.455780029296875, "step": 4695 }, { "epoch": 2.31, "learning_rate": 1.3791739649414924e-07, "loss": 0.0003, "step": 4696 }, { "epoch": 2.31, "logps_train/chosen": -83.54435729980469, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -317.90728759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.873429298400879, "rewards_train/margins": 17.25294780731201, "rewards_train/rejected": -19.12637710571289, "step": 4696 }, { "epoch": 2.31, "learning_rate": 1.377302931664624e-07, "loss": 0.0, "step": 4697 }, { "epoch": 2.31, "logps_train/chosen": -79.46968078613281, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -323.53125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.450239658355713, "rewards_train/margins": 18.021636486053467, "rewards_train/rejected": -19.47187614440918, "step": 4697 }, { "epoch": 2.31, "learning_rate": 1.3754329656587555e-07, "loss": 0.0, "step": 4698 }, { "epoch": 2.31, "logps_train/chosen": -75.76434326171875, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -318.0325927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4195493459701538, "rewards_train/margins": 18.201042771339417, "rewards_train/rejected": -19.62059211730957, "step": 4698 }, { "epoch": 2.31, "learning_rate": 1.3735640674747944e-07, "loss": 0.0007, "step": 4699 }, { "epoch": 2.31, "logps_train/chosen": -77.47216796875, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -326.38433837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.155517339706421, "rewards_train/margins": 18.545076608657837, "rewards_train/rejected": -19.700593948364258, "step": 4699 }, { "epoch": 2.31, "learning_rate": 1.3716962376633295e-07, "loss": 0.0, "step": 4700 }, { "epoch": 2.31, "logps_train/chosen": -84.41305541992188, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -329.4473876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.022653102874756, "rewards_train/margins": 18.232143878936768, "rewards_train/rejected": -20.254796981811523, "step": 4700 }, { "epoch": 2.31, "learning_rate": 1.3698294767746343e-07, "loss": 0.0, "step": 4701 }, { "epoch": 2.31, "logps_train/chosen": -82.35404968261719, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -325.78216552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5827196836471558, "rewards_train/margins": 18.301647305488586, "rewards_train/rejected": -19.884366989135742, "step": 4701 }, { "epoch": 2.32, "learning_rate": 1.367963785358674e-07, "loss": 0.0001, "step": 4702 }, { "epoch": 2.32, "logps_train/chosen": -79.75645446777344, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -313.36553955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5873156785964966, "rewards_train/margins": 17.738983988761902, "rewards_train/rejected": -19.3262996673584, "step": 4702 }, { "epoch": 2.32, "learning_rate": 1.366099163965087e-07, "loss": 0.0, "step": 4703 }, { "epoch": 2.32, "logps_train/chosen": -77.9075927734375, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -319.2406921386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.308435082435608, "rewards_train/margins": 18.162996649742126, "rewards_train/rejected": -19.471431732177734, "step": 4703 }, { "epoch": 2.32, "learning_rate": 1.3642356131432075e-07, "loss": 0.0, "step": 4704 }, { "epoch": 2.32, "logps_train/chosen": -77.23887634277344, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -318.029052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4076275825500488, "rewards_train/margins": 17.83527135848999, "rewards_train/rejected": -19.24289894104004, "step": 4704 }, { "epoch": 2.32, "learning_rate": 1.3623731334420523e-07, "loss": 0.0, "step": 4705 }, { "epoch": 2.32, "logps_train/chosen": -78.99884033203125, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -322.904052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3273253440856934, "rewards_train/margins": 18.405704975128174, "rewards_train/rejected": -19.733030319213867, "step": 4705 }, { "epoch": 2.32, "learning_rate": 1.3605117254103154e-07, "loss": 0.0004, "step": 4706 }, { "epoch": 2.32, "logps_train/chosen": -75.22361755371094, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -325.628173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1551257371902466, "rewards_train/margins": 18.893141627311707, "rewards_train/rejected": -20.048267364501953, "step": 4706 }, { "epoch": 2.32, "learning_rate": 1.3586513895963836e-07, "loss": 0.0, "step": 4707 }, { "epoch": 2.32, "logps_train/chosen": -77.81254577636719, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -322.7771911621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2664600610733032, "rewards_train/margins": 18.375515341758728, "rewards_train/rejected": -19.64197540283203, "step": 4707 }, { "epoch": 2.32, "learning_rate": 1.356792126548324e-07, "loss": 0.0, "step": 4708 }, { "epoch": 2.32, "logps_train/chosen": -79.72860717773438, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -322.8985595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.682138204574585, "rewards_train/margins": 17.918461084365845, "rewards_train/rejected": -19.60059928894043, "step": 4708 }, { "epoch": 2.32, "learning_rate": 1.354933936813885e-07, "loss": 0.0, "step": 4709 }, { "epoch": 2.32, "logps_train/chosen": -74.27973937988281, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -313.92633056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.191352367401123, "rewards_train/margins": 17.936684131622314, "rewards_train/rejected": -19.128036499023438, "step": 4709 }, { "epoch": 2.32, "learning_rate": 1.3530768209405064e-07, "loss": 0.0006, "step": 4710 }, { "epoch": 2.32, "logps_train/chosen": -74.9146728515625, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -325.63922119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3614873886108398, "rewards_train/margins": 18.62861156463623, "rewards_train/rejected": -19.99009895324707, "step": 4710 }, { "epoch": 2.32, "learning_rate": 1.351220779475305e-07, "loss": 0.0, "step": 4711 }, { "epoch": 2.32, "logps_train/chosen": -80.92977905273438, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -335.73004150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5897555351257324, "rewards_train/margins": 18.946922779083252, "rewards_train/rejected": -20.536678314208984, "step": 4711 }, { "epoch": 2.32, "learning_rate": 1.3493658129650825e-07, "loss": 0.0, "step": 4712 }, { "epoch": 2.32, "logps_train/chosen": -76.72686004638672, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -316.2696533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2702442407608032, "rewards_train/margins": 18.146904349327087, "rewards_train/rejected": -19.41714859008789, "step": 4712 }, { "epoch": 2.32, "learning_rate": 1.3475119219563242e-07, "loss": 0.0001, "step": 4713 }, { "epoch": 2.32, "logps_train/chosen": -83.19376373291016, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -328.5247802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.626554250717163, "rewards_train/margins": 18.14447808265686, "rewards_train/rejected": -19.771032333374023, "step": 4713 }, { "epoch": 2.32, "learning_rate": 1.3456591069952005e-07, "loss": 0.0, "step": 4714 }, { "epoch": 2.32, "logps_train/chosen": -74.74847412109375, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -317.25860595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2694761753082275, "rewards_train/margins": 17.876012563705444, "rewards_train/rejected": -19.145488739013672, "step": 4714 }, { "epoch": 2.32, "learning_rate": 1.3438073686275635e-07, "loss": 0.0, "step": 4715 }, { "epoch": 2.32, "logps_train/chosen": -76.63896942138672, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -319.9916687011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2347471714019775, "rewards_train/margins": 18.10773253440857, "rewards_train/rejected": -19.342479705810547, "step": 4715 }, { "epoch": 2.32, "learning_rate": 1.3419567073989446e-07, "loss": 0.0, "step": 4716 }, { "epoch": 2.32, "logps_train/chosen": -89.26962280273438, "logps_train/ref_chosen": -67.9375, "logps_train/ref_rejected": -133.25, "logps_train/rejected": -346.4248352050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.132479190826416, "rewards_train/margins": 19.19008207321167, "rewards_train/rejected": -21.322561264038086, "step": 4716 }, { "epoch": 2.32, "learning_rate": 1.340107123854567e-07, "loss": 0.0, "step": 4717 }, { "epoch": 2.32, "logps_train/chosen": -83.26605224609375, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -331.26641845703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8258237838745117, "rewards_train/margins": 18.299891471862793, "rewards_train/rejected": -20.125715255737305, "step": 4717 }, { "epoch": 2.32, "learning_rate": 1.338258618539323e-07, "loss": 0.0001, "step": 4718 }, { "epoch": 2.32, "logps_train/chosen": -77.18867492675781, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -315.481689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4576369524002075, "rewards_train/margins": 17.832035899162292, "rewards_train/rejected": -19.2896728515625, "step": 4718 }, { "epoch": 2.32, "learning_rate": 1.3364111919978006e-07, "loss": 0.0, "step": 4719 }, { "epoch": 2.32, "logps_train/chosen": -81.17009735107422, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -329.41107177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.697746992111206, "rewards_train/margins": 18.59746479988098, "rewards_train/rejected": -20.295211791992188, "step": 4719 }, { "epoch": 2.32, "learning_rate": 1.3345648447742619e-07, "loss": 0.0, "step": 4720 }, { "epoch": 2.32, "logps_train/chosen": -71.4466323852539, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -320.4873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7585306167602539, "rewards_train/margins": 18.525160789489746, "rewards_train/rejected": -19.28369140625, "step": 4720 }, { "epoch": 2.32, "learning_rate": 1.3327195774126514e-07, "loss": 0.0, "step": 4721 }, { "epoch": 2.32, "logps_train/chosen": -76.95196533203125, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -327.61199951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3568668365478516, "rewards_train/margins": 18.961462020874023, "rewards_train/rejected": -20.318328857421875, "step": 4721 }, { "epoch": 2.32, "learning_rate": 1.3308753904566016e-07, "loss": 0.0, "step": 4722 }, { "epoch": 2.32, "logps_train/chosen": -80.93116760253906, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -322.8283386230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8197529315948486, "rewards_train/margins": 18.072848081588745, "rewards_train/rejected": -19.892601013183594, "step": 4722 }, { "epoch": 2.33, "learning_rate": 1.3290322844494195e-07, "loss": 0.0, "step": 4723 }, { "epoch": 2.33, "logps_train/chosen": -74.94174194335938, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -317.6812744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.135703206062317, "rewards_train/margins": 18.16338050365448, "rewards_train/rejected": -19.299083709716797, "step": 4723 }, { "epoch": 2.33, "learning_rate": 1.327190259934098e-07, "loss": 0.0, "step": 4724 }, { "epoch": 2.33, "logps_train/chosen": -75.15654754638672, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -320.0565490722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0908011198043823, "rewards_train/margins": 18.45645534992218, "rewards_train/rejected": -19.547256469726562, "step": 4724 }, { "epoch": 2.33, "learning_rate": 1.325349317453307e-07, "loss": 0.0, "step": 4725 }, { "epoch": 2.33, "logps_train/chosen": -80.35945892333984, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -317.32000732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5721757411956787, "rewards_train/margins": 17.84883713722229, "rewards_train/rejected": -19.42101287841797, "step": 4725 }, { "epoch": 2.33, "learning_rate": 1.3235094575494043e-07, "loss": 0.0, "step": 4726 }, { "epoch": 2.33, "logps_train/chosen": -79.28990173339844, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -320.794189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.504624605178833, "rewards_train/margins": 18.13729166984558, "rewards_train/rejected": -19.641916275024414, "step": 4726 }, { "epoch": 2.33, "learning_rate": 1.321670680764424e-07, "loss": 0.0, "step": 4727 }, { "epoch": 2.33, "logps_train/chosen": -78.6512451171875, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -322.5064392089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3502322435379028, "rewards_train/margins": 18.390843272209167, "rewards_train/rejected": -19.74107551574707, "step": 4727 }, { "epoch": 2.33, "learning_rate": 1.3198329876400794e-07, "loss": 0.0, "step": 4728 }, { "epoch": 2.33, "logps_train/chosen": -78.22932434082031, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -317.4764099121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.562361717224121, "rewards_train/margins": 18.182202339172363, "rewards_train/rejected": -19.744564056396484, "step": 4728 }, { "epoch": 2.33, "learning_rate": 1.3179963787177717e-07, "loss": 0.0, "step": 4729 }, { "epoch": 2.33, "logps_train/chosen": -81.79930114746094, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -332.3939208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7345688343048096, "rewards_train/margins": 18.524845838546753, "rewards_train/rejected": -20.259414672851562, "step": 4729 }, { "epoch": 2.33, "learning_rate": 1.3161608545385756e-07, "loss": 0.0, "step": 4730 }, { "epoch": 2.33, "logps_train/chosen": -73.29179382324219, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -317.07366943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9678017497062683, "rewards_train/margins": 18.36012488603592, "rewards_train/rejected": -19.327926635742188, "step": 4730 }, { "epoch": 2.33, "learning_rate": 1.3143264156432483e-07, "loss": 0.0, "step": 4731 }, { "epoch": 2.33, "logps_train/chosen": -76.19171142578125, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -316.4966735839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.299224853515625, "rewards_train/margins": 18.106496810913086, "rewards_train/rejected": -19.40572166442871, "step": 4731 }, { "epoch": 2.33, "learning_rate": 1.3124930625722303e-07, "loss": 0.0, "step": 4732 }, { "epoch": 2.33, "logps_train/chosen": -81.41045379638672, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -332.7396545410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6088676452636719, "rewards_train/margins": 18.727596282958984, "rewards_train/rejected": -20.336463928222656, "step": 4732 }, { "epoch": 2.33, "learning_rate": 1.3106607958656384e-07, "loss": 0.0, "step": 4733 }, { "epoch": 2.33, "logps_train/chosen": -75.31263732910156, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -328.6018371582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1788225173950195, "rewards_train/margins": 18.9754056930542, "rewards_train/rejected": -20.15422821044922, "step": 4733 }, { "epoch": 2.33, "learning_rate": 1.308829616063271e-07, "loss": 0.0, "step": 4734 }, { "epoch": 2.33, "logps_train/chosen": -78.92817687988281, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -317.2646179199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5079059600830078, "rewards_train/margins": 17.669580459594727, "rewards_train/rejected": -19.177486419677734, "step": 4734 }, { "epoch": 2.33, "learning_rate": 1.3069995237046043e-07, "loss": 0.0, "step": 4735 }, { "epoch": 2.33, "logps_train/chosen": -77.42608642578125, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -324.48785400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2683403491973877, "rewards_train/margins": 18.567845106124878, "rewards_train/rejected": -19.836185455322266, "step": 4735 }, { "epoch": 2.33, "learning_rate": 1.3051705193287992e-07, "loss": 0.0, "step": 4736 }, { "epoch": 2.33, "logps_train/chosen": -78.73338317871094, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -326.2913513183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4379867315292358, "rewards_train/margins": 18.603355050086975, "rewards_train/rejected": -20.04134178161621, "step": 4736 }, { "epoch": 2.33, "learning_rate": 1.3033426034746903e-07, "loss": 0.0, "step": 4737 }, { "epoch": 2.33, "logps_train/chosen": -77.54388427734375, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -315.05450439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3113713264465332, "rewards_train/margins": 17.909120082855225, "rewards_train/rejected": -19.220491409301758, "step": 4737 }, { "epoch": 2.33, "learning_rate": 1.3015157766807938e-07, "loss": 0.0, "step": 4738 }, { "epoch": 2.33, "logps_train/chosen": -79.75172424316406, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -318.65655517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5850356817245483, "rewards_train/margins": 17.751275658607483, "rewards_train/rejected": -19.33631134033203, "step": 4738 }, { "epoch": 2.33, "learning_rate": 1.2996900394853084e-07, "loss": 0.0001, "step": 4739 }, { "epoch": 2.33, "logps_train/chosen": -86.20429992675781, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -331.9585876464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0154497623443604, "rewards_train/margins": 18.520545721054077, "rewards_train/rejected": -20.535995483398438, "step": 4739 }, { "epoch": 2.33, "learning_rate": 1.2978653924261034e-07, "loss": 0.0, "step": 4740 }, { "epoch": 2.33, "logps_train/chosen": -73.69744873046875, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -319.202392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0407414436340332, "rewards_train/margins": 18.408551692962646, "rewards_train/rejected": -19.44929313659668, "step": 4740 }, { "epoch": 2.33, "learning_rate": 1.2960418360407365e-07, "loss": 0.0, "step": 4741 }, { "epoch": 2.33, "logps_train/chosen": -81.98521423339844, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -325.4739990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5647814273834229, "rewards_train/margins": 18.30752158164978, "rewards_train/rejected": -19.872303009033203, "step": 4741 }, { "epoch": 2.33, "learning_rate": 1.2942193708664378e-07, "loss": 0.0, "step": 4742 }, { "epoch": 2.33, "logps_train/chosen": -81.34383392333984, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -339.1268310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4590413570404053, "rewards_train/margins": 19.381717920303345, "rewards_train/rejected": -20.84075927734375, "step": 4742 }, { "epoch": 2.34, "learning_rate": 1.2923979974401166e-07, "loss": 0.0, "step": 4743 }, { "epoch": 2.34, "logps_train/chosen": -81.29261016845703, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -321.6623840332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.434241533279419, "rewards_train/margins": 17.944397687911987, "rewards_train/rejected": -19.378639221191406, "step": 4743 }, { "epoch": 2.34, "learning_rate": 1.2905777162983655e-07, "loss": 0.0, "step": 4744 }, { "epoch": 2.34, "logps_train/chosen": -77.26029968261719, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -307.0316162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1596486568450928, "rewards_train/margins": 17.446979761123657, "rewards_train/rejected": -18.60662841796875, "step": 4744 }, { "epoch": 2.34, "learning_rate": 1.2887585279774494e-07, "loss": 0.0, "step": 4745 }, { "epoch": 2.34, "logps_train/chosen": -79.05133056640625, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -333.25518798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3571350574493408, "rewards_train/margins": 18.787134885787964, "rewards_train/rejected": -20.144269943237305, "step": 4745 }, { "epoch": 2.34, "learning_rate": 1.2869404330133115e-07, "loss": 0.0, "step": 4746 }, { "epoch": 2.34, "logps_train/chosen": -81.74940490722656, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -323.5325927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.751650094985962, "rewards_train/margins": 18.11801314353943, "rewards_train/rejected": -19.86966323852539, "step": 4746 }, { "epoch": 2.34, "learning_rate": 1.2851234319415788e-07, "loss": 0.0, "step": 4747 }, { "epoch": 2.34, "logps_train/chosen": -77.61001586914062, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -333.936279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4533355236053467, "rewards_train/margins": 18.927497148513794, "rewards_train/rejected": -20.38083267211914, "step": 4747 }, { "epoch": 2.34, "learning_rate": 1.28330752529755e-07, "loss": 0.0, "step": 4748 }, { "epoch": 2.34, "logps_train/chosen": -84.73722839355469, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -325.3157958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8859294652938843, "rewards_train/margins": 17.672065377235413, "rewards_train/rejected": -19.557994842529297, "step": 4748 }, { "epoch": 2.34, "learning_rate": 1.2814927136162036e-07, "loss": 0.0, "step": 4749 }, { "epoch": 2.34, "logps_train/chosen": -85.84707641601562, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -330.93292236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.140469551086426, "rewards_train/margins": 18.05399227142334, "rewards_train/rejected": -20.194461822509766, "step": 4749 }, { "epoch": 2.34, "learning_rate": 1.2796789974321937e-07, "loss": 0.0, "step": 4750 }, { "epoch": 2.34, "logps_train/chosen": -77.6620864868164, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -312.0325927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6362285614013672, "rewards_train/margins": 17.469327926635742, "rewards_train/rejected": -19.10555648803711, "step": 4750 }, { "epoch": 2.34, "learning_rate": 1.2778663772798565e-07, "loss": 0.0001, "step": 4751 }, { "epoch": 2.34, "logps_train/chosen": -75.86724090576172, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -309.9718322753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.157818078994751, "rewards_train/margins": 17.610021352767944, "rewards_train/rejected": -18.767839431762695, "step": 4751 }, { "epoch": 2.34, "learning_rate": 1.276054853693201e-07, "loss": 0.0, "step": 4752 }, { "epoch": 2.34, "logps_train/chosen": -82.87181091308594, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -336.9390869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7181878089904785, "rewards_train/margins": 18.986560344696045, "rewards_train/rejected": -20.704748153686523, "step": 4752 }, { "epoch": 2.34, "learning_rate": 1.274244427205912e-07, "loss": 0.0, "step": 4753 }, { "epoch": 2.34, "logps_train/chosen": -85.50355529785156, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -328.76800537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.019007921218872, "rewards_train/margins": 18.25437617301941, "rewards_train/rejected": -20.27338409423828, "step": 4753 }, { "epoch": 2.34, "learning_rate": 1.2724350983513583e-07, "loss": 0.0, "step": 4754 }, { "epoch": 2.34, "logps_train/chosen": -82.35848236083984, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -321.53033447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8243247270584106, "rewards_train/margins": 17.925827860832214, "rewards_train/rejected": -19.750152587890625, "step": 4754 }, { "epoch": 2.34, "learning_rate": 1.2706268676625748e-07, "loss": 0.0, "step": 4755 }, { "epoch": 2.34, "logps_train/chosen": -82.42630004882812, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -321.1512756347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6949251890182495, "rewards_train/margins": 17.83289921283722, "rewards_train/rejected": -19.52782440185547, "step": 4755 }, { "epoch": 2.34, "learning_rate": 1.2688197356722818e-07, "loss": 0.0, "step": 4756 }, { "epoch": 2.34, "logps_train/chosen": -75.93878173828125, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -323.6705017089844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2969061136245728, "rewards_train/margins": 18.479813933372498, "rewards_train/rejected": -19.77672004699707, "step": 4756 }, { "epoch": 2.34, "learning_rate": 1.2670137029128726e-07, "loss": 0.0001, "step": 4757 }, { "epoch": 2.34, "logps_train/chosen": -81.33338928222656, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -327.5404052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.625209093093872, "rewards_train/margins": 18.59870409965515, "rewards_train/rejected": -20.223913192749023, "step": 4757 }, { "epoch": 2.34, "learning_rate": 1.2652087699164138e-07, "loss": 0.0, "step": 4758 }, { "epoch": 2.34, "logps_train/chosen": -72.9943618774414, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -309.2799987792969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0324196815490723, "rewards_train/margins": 17.81335210800171, "rewards_train/rejected": -18.84577178955078, "step": 4758 }, { "epoch": 2.34, "learning_rate": 1.2634049372146544e-07, "loss": 0.0, "step": 4759 }, { "epoch": 2.34, "logps_train/chosen": -84.16448211669922, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -331.6452331542969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9121510982513428, "rewards_train/margins": 18.459842920303345, "rewards_train/rejected": -20.371994018554688, "step": 4759 }, { "epoch": 2.34, "learning_rate": 1.261602205339014e-07, "loss": 0.0, "step": 4760 }, { "epoch": 2.34, "logps_train/chosen": -80.54911041259766, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -333.4203796386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.431912899017334, "rewards_train/margins": 18.862959384918213, "rewards_train/rejected": -20.294872283935547, "step": 4760 }, { "epoch": 2.34, "learning_rate": 1.2598005748205904e-07, "loss": 0.0, "step": 4761 }, { "epoch": 2.34, "logps_train/chosen": -80.67929077148438, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -334.5126647949219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6515228748321533, "rewards_train/margins": 18.820738554000854, "rewards_train/rejected": -20.472261428833008, "step": 4761 }, { "epoch": 2.34, "learning_rate": 1.258000046190153e-07, "loss": 0.0, "step": 4762 }, { "epoch": 2.34, "logps_train/chosen": -85.91395568847656, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -325.30438232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8671278953552246, "rewards_train/margins": 18.070536136627197, "rewards_train/rejected": -19.937664031982422, "step": 4762 }, { "epoch": 2.35, "learning_rate": 1.2562006199781544e-07, "loss": 0.0, "step": 4763 }, { "epoch": 2.35, "logps_train/chosen": -78.52101135253906, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -313.60113525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5130877494812012, "rewards_train/margins": 17.821388721466064, "rewards_train/rejected": -19.334476470947266, "step": 4763 }, { "epoch": 2.35, "learning_rate": 1.254402296714715e-07, "loss": 0.0, "step": 4764 }, { "epoch": 2.35, "logps_train/chosen": -74.74781799316406, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -326.222900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2991712093353271, "rewards_train/margins": 18.592748880386353, "rewards_train/rejected": -19.89192008972168, "step": 4764 }, { "epoch": 2.35, "learning_rate": 1.252605076929632e-07, "loss": 0.0, "step": 4765 }, { "epoch": 2.35, "logps_train/chosen": -81.40165710449219, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -338.05340576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.732475757598877, "rewards_train/margins": 19.35763120651245, "rewards_train/rejected": -21.090106964111328, "step": 4765 }, { "epoch": 2.35, "learning_rate": 1.2508089611523814e-07, "loss": 0.0, "step": 4766 }, { "epoch": 2.35, "logps_train/chosen": -75.15901184082031, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -324.38165283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0368239879608154, "rewards_train/margins": 18.55709958076477, "rewards_train/rejected": -19.593923568725586, "step": 4766 }, { "epoch": 2.35, "learning_rate": 1.2490139499121094e-07, "loss": 0.0, "step": 4767 }, { "epoch": 2.35, "logps_train/chosen": -84.31442260742188, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -335.6343994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8068814277648926, "rewards_train/margins": 19.040931224822998, "rewards_train/rejected": -20.84781265258789, "step": 4767 }, { "epoch": 2.35, "learning_rate": 1.2472200437376368e-07, "loss": 0.0, "step": 4768 }, { "epoch": 2.35, "logps_train/chosen": -82.50882720947266, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -326.8837585449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.661136507987976, "rewards_train/margins": 18.239837765693665, "rewards_train/rejected": -19.90097427368164, "step": 4768 }, { "epoch": 2.35, "learning_rate": 1.2454272431574658e-07, "loss": 0.0, "step": 4769 }, { "epoch": 2.35, "logps_train/chosen": -80.29437255859375, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -325.5904541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6614196300506592, "rewards_train/margins": 17.84352421760559, "rewards_train/rejected": -19.50494384765625, "step": 4769 }, { "epoch": 2.35, "learning_rate": 1.24363554869976e-07, "loss": 0.0, "step": 4770 }, { "epoch": 2.35, "logps_train/chosen": -81.30187225341797, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -320.2129821777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6750849485397339, "rewards_train/margins": 18.11960232257843, "rewards_train/rejected": -19.794687271118164, "step": 4770 }, { "epoch": 2.35, "learning_rate": 1.2418449608923709e-07, "loss": 0.0, "step": 4771 }, { "epoch": 2.35, "logps_train/chosen": -81.19307708740234, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -324.71636962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.622969627380371, "rewards_train/margins": 18.429381370544434, "rewards_train/rejected": -20.052350997924805, "step": 4771 }, { "epoch": 2.35, "learning_rate": 1.2400554802628156e-07, "loss": 0.0, "step": 4772 }, { "epoch": 2.35, "logps_train/chosen": -80.61357879638672, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -319.2716369628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4293756484985352, "rewards_train/margins": 17.729819297790527, "rewards_train/rejected": -19.159194946289062, "step": 4772 }, { "epoch": 2.35, "learning_rate": 1.238267107338285e-07, "loss": 0.0, "step": 4773 }, { "epoch": 2.35, "logps_train/chosen": -77.0999984741211, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -319.088134765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3029193878173828, "rewards_train/margins": 18.477476119995117, "rewards_train/rejected": -19.7803955078125, "step": 4773 }, { "epoch": 2.35, "learning_rate": 1.2364798426456496e-07, "loss": 0.0, "step": 4774 }, { "epoch": 2.35, "logps_train/chosen": -85.49909973144531, "logps_train/ref_chosen": -67.875, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -323.99774169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7596752643585205, "rewards_train/margins": 17.946202039718628, "rewards_train/rejected": -19.70587730407715, "step": 4774 }, { "epoch": 2.35, "learning_rate": 1.2346936867114477e-07, "loss": 0.0, "step": 4775 }, { "epoch": 2.35, "logps_train/chosen": -75.33440399169922, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -318.4019470214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2248468399047852, "rewards_train/margins": 18.038687705993652, "rewards_train/rejected": -19.263534545898438, "step": 4775 }, { "epoch": 2.35, "learning_rate": 1.2329086400618932e-07, "loss": 0.0004, "step": 4776 }, { "epoch": 2.35, "logps_train/chosen": -82.24807739257812, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -326.7208251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5142852067947388, "rewards_train/margins": 18.225720286369324, "rewards_train/rejected": -19.740005493164062, "step": 4776 }, { "epoch": 2.35, "learning_rate": 1.231124703222871e-07, "loss": 0.0, "step": 4777 }, { "epoch": 2.35, "logps_train/chosen": -75.36167907714844, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -315.76806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.327866792678833, "rewards_train/margins": 17.967446088790894, "rewards_train/rejected": -19.295312881469727, "step": 4777 }, { "epoch": 2.35, "learning_rate": 1.2293418767199449e-07, "loss": 0.0, "step": 4778 }, { "epoch": 2.35, "logps_train/chosen": -78.439453125, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -322.4530029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.503296136856079, "rewards_train/margins": 18.09122061729431, "rewards_train/rejected": -19.59451675415039, "step": 4778 }, { "epoch": 2.35, "learning_rate": 1.227560161078345e-07, "loss": 0.0, "step": 4779 }, { "epoch": 2.35, "logps_train/chosen": -85.94623565673828, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -327.0921936035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.909858226776123, "rewards_train/margins": 17.755024433135986, "rewards_train/rejected": -19.66488265991211, "step": 4779 }, { "epoch": 2.35, "learning_rate": 1.2257795568229756e-07, "loss": 0.0, "step": 4780 }, { "epoch": 2.35, "logps_train/chosen": -79.80009460449219, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -118.4375, "logps_train/rejected": -306.5723876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7702444791793823, "rewards_train/margins": 17.045639157295227, "rewards_train/rejected": -18.81588363647461, "step": 4780 }, { "epoch": 2.35, "learning_rate": 1.2240000644784176e-07, "loss": 0.0002, "step": 4781 }, { "epoch": 2.35, "logps_train/chosen": -79.87022399902344, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -322.3305969238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6219346523284912, "rewards_train/margins": 18.229192972183228, "rewards_train/rejected": -19.85112762451172, "step": 4781 }, { "epoch": 2.35, "learning_rate": 1.2222216845689203e-07, "loss": 0.0, "step": 4782 }, { "epoch": 2.35, "logps_train/chosen": -78.55328369140625, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -324.05615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3418030738830566, "rewards_train/margins": 18.601510524749756, "rewards_train/rejected": -19.943313598632812, "step": 4782 }, { "epoch": 2.35, "learning_rate": 1.2204444176184048e-07, "loss": 0.0, "step": 4783 }, { "epoch": 2.35, "logps_train/chosen": -80.69203186035156, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -325.15325927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6784312725067139, "rewards_train/margins": 18.226250886917114, "rewards_train/rejected": -19.904682159423828, "step": 4783 }, { "epoch": 2.36, "learning_rate": 1.2186682641504693e-07, "loss": 0.0, "step": 4784 }, { "epoch": 2.36, "logps_train/chosen": -77.00997924804688, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -315.5302429199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.323238730430603, "rewards_train/margins": 17.73798906803131, "rewards_train/rejected": -19.061227798461914, "step": 4784 }, { "epoch": 2.36, "learning_rate": 1.2168932246883785e-07, "loss": 0.0, "step": 4785 }, { "epoch": 2.36, "logps_train/chosen": -79.7773666381836, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -334.5639953613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.327004313468933, "rewards_train/margins": 19.202149510383606, "rewards_train/rejected": -20.52915382385254, "step": 4785 }, { "epoch": 2.36, "learning_rate": 1.2151192997550708e-07, "loss": 0.0001, "step": 4786 }, { "epoch": 2.36, "logps_train/chosen": -78.66932678222656, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -325.3469543457031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.318056583404541, "rewards_train/margins": 18.359120845794678, "rewards_train/rejected": -19.67717742919922, "step": 4786 }, { "epoch": 2.36, "learning_rate": 1.213346489873156e-07, "loss": 0.0002, "step": 4787 }, { "epoch": 2.36, "logps_train/chosen": -75.45809173583984, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -320.55474853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9822348356246948, "rewards_train/margins": 18.619674801826477, "rewards_train/rejected": -19.601909637451172, "step": 4787 }, { "epoch": 2.36, "learning_rate": 1.2115747955649175e-07, "loss": 0.0, "step": 4788 }, { "epoch": 2.36, "logps_train/chosen": -75.7249755859375, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -328.0069885253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2699586153030396, "rewards_train/margins": 18.599193453788757, "rewards_train/rejected": -19.869152069091797, "step": 4788 }, { "epoch": 2.36, "learning_rate": 1.2098042173523083e-07, "loss": 0.0, "step": 4789 }, { "epoch": 2.36, "logps_train/chosen": -82.03103637695312, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -329.65496826171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5881131887435913, "rewards_train/margins": 18.564493775367737, "rewards_train/rejected": -20.152606964111328, "step": 4789 }, { "epoch": 2.36, "learning_rate": 1.2080347557569497e-07, "loss": 0.0, "step": 4790 }, { "epoch": 2.36, "logps_train/chosen": -79.32865905761719, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -334.4797058105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.263139009475708, "rewards_train/margins": 19.19430422782898, "rewards_train/rejected": -20.457443237304688, "step": 4790 }, { "epoch": 2.36, "learning_rate": 1.206266411300143e-07, "loss": 0.0, "step": 4791 }, { "epoch": 2.36, "logps_train/chosen": -82.04905700683594, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -321.6893310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.742649793624878, "rewards_train/margins": 17.95353055000305, "rewards_train/rejected": -19.69618034362793, "step": 4791 }, { "epoch": 2.36, "learning_rate": 1.204499184502848e-07, "loss": 0.0, "step": 4792 }, { "epoch": 2.36, "logps_train/chosen": -80.80674743652344, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -338.5312194824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4865833520889282, "rewards_train/margins": 19.10794484615326, "rewards_train/rejected": -20.594528198242188, "step": 4792 }, { "epoch": 2.36, "learning_rate": 1.2027330758857051e-07, "loss": 0.0, "step": 4793 }, { "epoch": 2.36, "logps_train/chosen": -76.33136749267578, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -314.47332763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4530582427978516, "rewards_train/margins": 17.828649520874023, "rewards_train/rejected": -19.281707763671875, "step": 4793 }, { "epoch": 2.36, "learning_rate": 1.2009680859690214e-07, "loss": 0.0, "step": 4794 }, { "epoch": 2.36, "logps_train/chosen": -82.05116271972656, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -331.16552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6546766757965088, "rewards_train/margins": 18.53916907310486, "rewards_train/rejected": -20.193845748901367, "step": 4794 }, { "epoch": 2.36, "learning_rate": 1.1992042152727732e-07, "loss": 0.0, "step": 4795 }, { "epoch": 2.36, "logps_train/chosen": -78.55890655517578, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -319.8342590332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3733956813812256, "rewards_train/margins": 18.31676697731018, "rewards_train/rejected": -19.690162658691406, "step": 4795 }, { "epoch": 2.36, "learning_rate": 1.1974414643166114e-07, "loss": 0.0, "step": 4796 }, { "epoch": 2.36, "logps_train/chosen": -75.53459167480469, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -305.1312561035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2991138696670532, "rewards_train/margins": 17.21191442012787, "rewards_train/rejected": -18.511028289794922, "step": 4796 }, { "epoch": 2.36, "learning_rate": 1.1956798336198532e-07, "loss": 0.0, "step": 4797 }, { "epoch": 2.36, "logps_train/chosen": -75.1078109741211, "logps_train/ref_chosen": -61.71875, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -307.9970397949219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.339540719985962, "rewards_train/margins": 17.350736379623413, "rewards_train/rejected": -18.690277099609375, "step": 4797 }, { "epoch": 2.36, "learning_rate": 1.193919323701486e-07, "loss": 0.0, "step": 4798 }, { "epoch": 2.36, "logps_train/chosen": -77.28807067871094, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -318.5202941894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4103013277053833, "rewards_train/margins": 18.106181979179382, "rewards_train/rejected": -19.516483306884766, "step": 4798 }, { "epoch": 2.36, "learning_rate": 1.192159935080168e-07, "loss": 0.0, "step": 4799 }, { "epoch": 2.36, "logps_train/chosen": -74.00979614257812, "logps_train/ref_chosen": -60.96875, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -312.8055419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3032253980636597, "rewards_train/margins": 17.945199131965637, "rewards_train/rejected": -19.248424530029297, "step": 4799 }, { "epoch": 2.36, "learning_rate": 1.1904016682742284e-07, "loss": 0.0, "step": 4800 }, { "epoch": 2.36, "logps_train/chosen": -85.44141387939453, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -320.6905822753906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.822852611541748, "rewards_train/margins": 17.72545289993286, "rewards_train/rejected": -19.54830551147461, "step": 4800 }, { "epoch": 2.36, "learning_rate": 1.188644523801664e-07, "loss": 0.0001, "step": 4801 }, { "epoch": 2.36, "logps_train/chosen": -75.0020751953125, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -323.02557373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.050647497177124, "rewards_train/margins": 18.234331369400024, "rewards_train/rejected": -19.28497886657715, "step": 4801 }, { "epoch": 2.36, "learning_rate": 1.1868885021801389e-07, "loss": 0.0, "step": 4802 }, { "epoch": 2.36, "logps_train/chosen": -79.14775848388672, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -332.6261291503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3628225326538086, "rewards_train/margins": 18.64583683013916, "rewards_train/rejected": -20.00865936279297, "step": 4802 }, { "epoch": 2.36, "learning_rate": 1.1851336039269944e-07, "loss": 0.0, "step": 4803 }, { "epoch": 2.36, "logps_train/chosen": -80.4238510131836, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -322.1609191894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3995623588562012, "rewards_train/margins": 18.103397846221924, "rewards_train/rejected": -19.502960205078125, "step": 4803 }, { "epoch": 2.37, "learning_rate": 1.183379829559229e-07, "loss": 0.0, "step": 4804 }, { "epoch": 2.37, "logps_train/chosen": -80.81764221191406, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -330.8449401855469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5379159450531006, "rewards_train/margins": 18.73021912574768, "rewards_train/rejected": -20.26813507080078, "step": 4804 }, { "epoch": 2.37, "learning_rate": 1.1816271795935195e-07, "loss": 0.0, "step": 4805 }, { "epoch": 2.37, "logps_train/chosen": -82.12692260742188, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -341.4703369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6746553182601929, "rewards_train/margins": 19.28341543674469, "rewards_train/rejected": -20.958070755004883, "step": 4805 }, { "epoch": 2.37, "learning_rate": 1.1798756545462113e-07, "loss": 0.0, "step": 4806 }, { "epoch": 2.37, "logps_train/chosen": -80.2027587890625, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -320.33380126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7257933616638184, "rewards_train/margins": 18.044695377349854, "rewards_train/rejected": -19.770488739013672, "step": 4806 }, { "epoch": 2.37, "learning_rate": 1.1781252549333092e-07, "loss": 0.0, "step": 4807 }, { "epoch": 2.37, "logps_train/chosen": -77.07444763183594, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -334.3708190917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1989980936050415, "rewards_train/margins": 19.375975966453552, "rewards_train/rejected": -20.574974060058594, "step": 4807 }, { "epoch": 2.37, "learning_rate": 1.1763759812704982e-07, "loss": 0.0, "step": 4808 }, { "epoch": 2.37, "logps_train/chosen": -80.45895385742188, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -323.12652587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3239717483520508, "rewards_train/margins": 18.231989860534668, "rewards_train/rejected": -19.55596160888672, "step": 4808 }, { "epoch": 2.37, "learning_rate": 1.1746278340731235e-07, "loss": 0.0, "step": 4809 }, { "epoch": 2.37, "logps_train/chosen": -85.96217346191406, "logps_train/ref_chosen": -68.3125, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -331.19140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7610116004943848, "rewards_train/margins": 18.225022792816162, "rewards_train/rejected": -19.986034393310547, "step": 4809 }, { "epoch": 2.37, "learning_rate": 1.1728808138562007e-07, "loss": 0.0, "step": 4810 }, { "epoch": 2.37, "logps_train/chosen": -76.26439666748047, "logps_train/ref_chosen": -61.40625, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -318.0760498046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4843988418579102, "rewards_train/margins": 18.346156120300293, "rewards_train/rejected": -19.830554962158203, "step": 4810 }, { "epoch": 2.37, "learning_rate": 1.1711349211344157e-07, "loss": 0.0, "step": 4811 }, { "epoch": 2.37, "logps_train/chosen": -81.41765594482422, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -326.49639892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6120779514312744, "rewards_train/margins": 18.361682653427124, "rewards_train/rejected": -19.9737606048584, "step": 4811 }, { "epoch": 2.37, "learning_rate": 1.1693901564221192e-07, "loss": 0.0, "step": 4812 }, { "epoch": 2.37, "logps_train/chosen": -77.53495788574219, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -318.16943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0352824926376343, "rewards_train/margins": 18.28459131717682, "rewards_train/rejected": -19.319873809814453, "step": 4812 }, { "epoch": 2.37, "learning_rate": 1.1676465202333308e-07, "loss": 0.0, "step": 4813 }, { "epoch": 2.37, "logps_train/chosen": -81.37593841552734, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -337.75079345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5087854862213135, "rewards_train/margins": 19.15199065208435, "rewards_train/rejected": -20.660776138305664, "step": 4813 }, { "epoch": 2.37, "learning_rate": 1.165904013081736e-07, "loss": 0.0, "step": 4814 }, { "epoch": 2.37, "logps_train/chosen": -74.82099914550781, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -316.25, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0940626859664917, "rewards_train/margins": 18.068437695503235, "rewards_train/rejected": -19.162500381469727, "step": 4814 }, { "epoch": 2.37, "learning_rate": 1.1641626354806916e-07, "loss": 0.0, "step": 4815 }, { "epoch": 2.37, "logps_train/chosen": -79.80835723876953, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -324.80364990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.554663896560669, "rewards_train/margins": 18.39689326286316, "rewards_train/rejected": -19.951557159423828, "step": 4815 }, { "epoch": 2.37, "learning_rate": 1.1624223879432183e-07, "loss": 0.0001, "step": 4816 }, { "epoch": 2.37, "logps_train/chosen": -79.87840270996094, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -321.8857727050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.454197883605957, "rewards_train/margins": 18.20498561859131, "rewards_train/rejected": -19.659183502197266, "step": 4816 }, { "epoch": 2.37, "learning_rate": 1.160683270982003e-07, "loss": 0.0, "step": 4817 }, { "epoch": 2.37, "logps_train/chosen": -74.69770812988281, "logps_train/ref_chosen": -60.375, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -316.4273986816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4307574033737183, "rewards_train/margins": 18.143186688423157, "rewards_train/rejected": -19.573944091796875, "step": 4817 }, { "epoch": 2.37, "learning_rate": 1.1589452851094061e-07, "loss": 0.0, "step": 4818 }, { "epoch": 2.37, "logps_train/chosen": -75.91719055175781, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -323.0630187988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2023154497146606, "rewards_train/margins": 18.51775634288788, "rewards_train/rejected": -19.72007179260254, "step": 4818 }, { "epoch": 2.37, "learning_rate": 1.1572084308374435e-07, "loss": 0.0, "step": 4819 }, { "epoch": 2.37, "logps_train/chosen": -83.96116638183594, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -332.398193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.80109703540802, "rewards_train/margins": 18.593506693840027, "rewards_train/rejected": -20.394603729248047, "step": 4819 }, { "epoch": 2.37, "learning_rate": 1.1554727086778076e-07, "loss": 0.0, "step": 4820 }, { "epoch": 2.37, "logps_train/chosen": -75.66168212890625, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -321.21142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1437559127807617, "rewards_train/margins": 18.48353672027588, "rewards_train/rejected": -19.62729263305664, "step": 4820 }, { "epoch": 2.37, "learning_rate": 1.1537381191418565e-07, "loss": 0.0, "step": 4821 }, { "epoch": 2.37, "logps_train/chosen": -76.63765716552734, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -315.5787353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4075647592544556, "rewards_train/margins": 17.82100999355316, "rewards_train/rejected": -19.228574752807617, "step": 4821 }, { "epoch": 2.37, "learning_rate": 1.152004662740606e-07, "loss": 0.0, "step": 4822 }, { "epoch": 2.37, "logps_train/chosen": -73.97328186035156, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -332.42913818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9120736718177795, "rewards_train/margins": 19.19451278448105, "rewards_train/rejected": -20.106586456298828, "step": 4822 }, { "epoch": 2.37, "learning_rate": 1.1502723399847492e-07, "loss": 0.0, "step": 4823 }, { "epoch": 2.37, "logps_train/chosen": -77.38365173339844, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -317.50335693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3952991962432861, "rewards_train/margins": 18.12310290336609, "rewards_train/rejected": -19.518402099609375, "step": 4823 }, { "epoch": 2.38, "learning_rate": 1.1485411513846377e-07, "loss": 0.0001, "step": 4824 }, { "epoch": 2.38, "logps_train/chosen": -76.17500305175781, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -322.6134033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2081252336502075, "rewards_train/margins": 18.439053416252136, "rewards_train/rejected": -19.647178649902344, "step": 4824 }, { "epoch": 2.38, "learning_rate": 1.1468110974502904e-07, "loss": 0.0, "step": 4825 }, { "epoch": 2.38, "logps_train/chosen": -69.37059020996094, "logps_train/ref_chosen": -60.71875, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -308.4190673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8658673763275146, "rewards_train/margins": 17.877308130264282, "rewards_train/rejected": -18.743175506591797, "step": 4825 }, { "epoch": 2.38, "learning_rate": 1.1450821786913956e-07, "loss": 0.0001, "step": 4826 }, { "epoch": 2.38, "logps_train/chosen": -74.546875, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -319.7230224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.135155439376831, "rewards_train/margins": 18.39163899421692, "rewards_train/rejected": -19.52679443359375, "step": 4826 }, { "epoch": 2.38, "learning_rate": 1.1433543956173031e-07, "loss": 0.0, "step": 4827 }, { "epoch": 2.38, "logps_train/chosen": -80.52198791503906, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -315.9756164550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8057141304016113, "rewards_train/margins": 17.349368572235107, "rewards_train/rejected": -19.15508270263672, "step": 4827 }, { "epoch": 2.38, "learning_rate": 1.141627748737029e-07, "loss": 0.0001, "step": 4828 }, { "epoch": 2.38, "logps_train/chosen": -80.71788787841797, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -323.35577392578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9291369915008545, "rewards_train/margins": 17.921090364456177, "rewards_train/rejected": -19.85022735595703, "step": 4828 }, { "epoch": 2.38, "learning_rate": 1.1399022385592544e-07, "loss": 0.0001, "step": 4829 }, { "epoch": 2.38, "logps_train/chosen": -75.84225463867188, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -316.676025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3141083717346191, "rewards_train/margins": 17.859357357025146, "rewards_train/rejected": -19.173465728759766, "step": 4829 }, { "epoch": 2.38, "learning_rate": 1.1381778655923291e-07, "loss": 0.0, "step": 4830 }, { "epoch": 2.38, "logps_train/chosen": -72.38685607910156, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -313.1102294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0367811918258667, "rewards_train/margins": 18.008957028388977, "rewards_train/rejected": -19.045738220214844, "step": 4830 }, { "epoch": 2.38, "learning_rate": 1.1364546303442623e-07, "loss": 0.0, "step": 4831 }, { "epoch": 2.38, "logps_train/chosen": -80.86499786376953, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -324.72808837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5770031213760376, "rewards_train/margins": 18.19834554195404, "rewards_train/rejected": -19.775348663330078, "step": 4831 }, { "epoch": 2.38, "learning_rate": 1.1347325333227315e-07, "loss": 0.0, "step": 4832 }, { "epoch": 2.38, "logps_train/chosen": -80.70993041992188, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -321.6602478027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5757782459259033, "rewards_train/margins": 18.10538363456726, "rewards_train/rejected": -19.681161880493164, "step": 4832 }, { "epoch": 2.38, "learning_rate": 1.1330115750350789e-07, "loss": 0.0001, "step": 4833 }, { "epoch": 2.38, "logps_train/chosen": -74.96855163574219, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -316.73699951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3296184539794922, "rewards_train/margins": 18.14769172668457, "rewards_train/rejected": -19.477310180664062, "step": 4833 }, { "epoch": 2.38, "learning_rate": 1.13129175598831e-07, "loss": 0.0, "step": 4834 }, { "epoch": 2.38, "logps_train/chosen": -77.75126647949219, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -326.42449951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1959757804870605, "rewards_train/margins": 18.89872407913208, "rewards_train/rejected": -20.09469985961914, "step": 4834 }, { "epoch": 2.38, "learning_rate": 1.1295730766890949e-07, "loss": 0.0, "step": 4835 }, { "epoch": 2.38, "logps_train/chosen": -80.27525329589844, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -327.2896423339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6495954990386963, "rewards_train/margins": 18.637277364730835, "rewards_train/rejected": -20.28687286376953, "step": 4835 }, { "epoch": 2.38, "learning_rate": 1.1278555376437665e-07, "loss": 0.0, "step": 4836 }, { "epoch": 2.38, "logps_train/chosen": -77.17401123046875, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -334.06170654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2535829544067383, "rewards_train/margins": 19.29584789276123, "rewards_train/rejected": -20.54943084716797, "step": 4836 }, { "epoch": 2.38, "learning_rate": 1.1261391393583263e-07, "loss": 0.0, "step": 4837 }, { "epoch": 2.38, "logps_train/chosen": -80.98765563964844, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -325.23699951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6694691181182861, "rewards_train/margins": 18.304327726364136, "rewards_train/rejected": -19.973796844482422, "step": 4837 }, { "epoch": 2.38, "learning_rate": 1.1244238823384361e-07, "loss": 0.0, "step": 4838 }, { "epoch": 2.38, "logps_train/chosen": -77.28112030029297, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -322.02728271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3498408794403076, "rewards_train/margins": 18.430768251419067, "rewards_train/rejected": -19.780609130859375, "step": 4838 }, { "epoch": 2.38, "learning_rate": 1.1227097670894203e-07, "loss": 0.0, "step": 4839 }, { "epoch": 2.38, "logps_train/chosen": -74.27091979980469, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -118.9375, "logps_train/rejected": -316.47174072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3205984830856323, "rewards_train/margins": 18.43116581439972, "rewards_train/rejected": -19.75176429748535, "step": 4839 }, { "epoch": 2.38, "learning_rate": 1.1209967941162723e-07, "loss": 0.0002, "step": 4840 }, { "epoch": 2.38, "logps_train/chosen": -75.15487670898438, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -311.79437255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3821382522583008, "rewards_train/margins": 17.7723970413208, "rewards_train/rejected": -19.1545352935791, "step": 4840 }, { "epoch": 2.38, "learning_rate": 1.1192849639236407e-07, "loss": 0.0, "step": 4841 }, { "epoch": 2.38, "logps_train/chosen": -82.9575424194336, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -340.5157775878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7329614162445068, "rewards_train/margins": 19.087852716445923, "rewards_train/rejected": -20.82081413269043, "step": 4841 }, { "epoch": 2.38, "learning_rate": 1.117574277015847e-07, "loss": 0.0, "step": 4842 }, { "epoch": 2.38, "logps_train/chosen": -78.78790283203125, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -328.00714111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.412872076034546, "rewards_train/margins": 18.7349112033844, "rewards_train/rejected": -20.147783279418945, "step": 4842 }, { "epoch": 2.38, "learning_rate": 1.115864733896868e-07, "loss": 0.0, "step": 4843 }, { "epoch": 2.38, "logps_train/chosen": -86.04290771484375, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -327.94622802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9449152946472168, "rewards_train/margins": 18.224316120147705, "rewards_train/rejected": -20.169231414794922, "step": 4843 }, { "epoch": 2.39, "learning_rate": 1.114156335070347e-07, "loss": 0.0, "step": 4844 }, { "epoch": 2.39, "logps_train/chosen": -85.9074478149414, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -322.58575439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0515851974487305, "rewards_train/margins": 17.746493339538574, "rewards_train/rejected": -19.798078536987305, "step": 4844 }, { "epoch": 2.39, "learning_rate": 1.1124490810395914e-07, "loss": 0.0, "step": 4845 }, { "epoch": 2.39, "logps_train/chosen": -76.2542724609375, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -117.25, "logps_train/rejected": -310.7882385253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3782596588134766, "rewards_train/margins": 17.977222442626953, "rewards_train/rejected": -19.35548210144043, "step": 4845 }, { "epoch": 2.39, "learning_rate": 1.1107429723075684e-07, "loss": 0.0, "step": 4846 }, { "epoch": 2.39, "logps_train/chosen": -73.11626434326172, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -306.7344970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0828423500061035, "rewards_train/margins": 17.451348781585693, "rewards_train/rejected": -18.534191131591797, "step": 4846 }, { "epoch": 2.39, "learning_rate": 1.1090380093769086e-07, "loss": 0.0001, "step": 4847 }, { "epoch": 2.39, "logps_train/chosen": -80.5297622680664, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -319.0688781738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6787090301513672, "rewards_train/margins": 18.06304359436035, "rewards_train/rejected": -19.74175262451172, "step": 4847 }, { "epoch": 2.39, "learning_rate": 1.1073341927499081e-07, "loss": 0.0, "step": 4848 }, { "epoch": 2.39, "logps_train/chosen": -78.04603576660156, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -321.5889892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.573133945465088, "rewards_train/margins": 18.22292470932007, "rewards_train/rejected": -19.796058654785156, "step": 4848 }, { "epoch": 2.39, "learning_rate": 1.1056315229285213e-07, "loss": 0.0, "step": 4849 }, { "epoch": 2.39, "logps_train/chosen": -77.0947036743164, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -117.6875, "logps_train/rejected": -310.07281494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.498532772064209, "rewards_train/margins": 17.740435123443604, "rewards_train/rejected": -19.238967895507812, "step": 4849 }, { "epoch": 2.39, "learning_rate": 1.1039300004143654e-07, "loss": 0.0, "step": 4850 }, { "epoch": 2.39, "logps_train/chosen": -75.25592041015625, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -335.9974060058594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0933902263641357, "rewards_train/margins": 19.658889055252075, "rewards_train/rejected": -20.75227928161621, "step": 4850 }, { "epoch": 2.39, "learning_rate": 1.1022296257087204e-07, "loss": 0.0, "step": 4851 }, { "epoch": 2.39, "logps_train/chosen": -75.5153579711914, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -314.5602111816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3696019649505615, "rewards_train/margins": 17.979192972183228, "rewards_train/rejected": -19.34879493713379, "step": 4851 }, { "epoch": 2.39, "learning_rate": 1.1005303993125298e-07, "loss": 0.0, "step": 4852 }, { "epoch": 2.39, "logps_train/chosen": -75.74221801757812, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -320.275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3094274997711182, "rewards_train/margins": 18.46644949913025, "rewards_train/rejected": -19.775876998901367, "step": 4852 }, { "epoch": 2.39, "learning_rate": 1.0988323217263967e-07, "loss": 0.0, "step": 4853 }, { "epoch": 2.39, "logps_train/chosen": -85.68718719482422, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -330.93646240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9249687194824219, "rewards_train/margins": 18.21555519104004, "rewards_train/rejected": -20.14052391052246, "step": 4853 }, { "epoch": 2.39, "learning_rate": 1.0971353934505839e-07, "loss": 0.0, "step": 4854 }, { "epoch": 2.39, "logps_train/chosen": -86.07489776611328, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -318.62115478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8908884525299072, "rewards_train/margins": 17.467612981796265, "rewards_train/rejected": -19.358501434326172, "step": 4854 }, { "epoch": 2.39, "learning_rate": 1.0954396149850231e-07, "loss": 0.0001, "step": 4855 }, { "epoch": 2.39, "logps_train/chosen": -80.6048583984375, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -324.801025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.671618938446045, "rewards_train/margins": 18.19798707962036, "rewards_train/rejected": -19.869606018066406, "step": 4855 }, { "epoch": 2.39, "learning_rate": 1.0937449868292958e-07, "loss": 0.0, "step": 4856 }, { "epoch": 2.39, "logps_train/chosen": -81.1705551147461, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -328.7013244628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4524073600769043, "rewards_train/margins": 18.36528444290161, "rewards_train/rejected": -19.817691802978516, "step": 4856 }, { "epoch": 2.39, "learning_rate": 1.0920515094826538e-07, "loss": 0.0, "step": 4857 }, { "epoch": 2.39, "logps_train/chosen": -75.19690704345703, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -331.4794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.131019115447998, "rewards_train/margins": 19.559410572052002, "rewards_train/rejected": -20.6904296875, "step": 4857 }, { "epoch": 2.39, "learning_rate": 1.0903591834440095e-07, "loss": 0.0003, "step": 4858 }, { "epoch": 2.39, "logps_train/chosen": -83.43305969238281, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -329.95135498046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.910395860671997, "rewards_train/margins": 18.61789584159851, "rewards_train/rejected": -20.528291702270508, "step": 4858 }, { "epoch": 2.39, "learning_rate": 1.0886680092119289e-07, "loss": 0.0, "step": 4859 }, { "epoch": 2.39, "logps_train/chosen": -81.66316223144531, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -324.9729309082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.576960802078247, "rewards_train/margins": 18.330782651901245, "rewards_train/rejected": -19.907743453979492, "step": 4859 }, { "epoch": 2.39, "learning_rate": 1.0869779872846463e-07, "loss": 0.0, "step": 4860 }, { "epoch": 2.39, "logps_train/chosen": -81.10022735595703, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -334.5828552246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5658822059631348, "rewards_train/margins": 19.16848134994507, "rewards_train/rejected": -20.734363555908203, "step": 4860 }, { "epoch": 2.39, "learning_rate": 1.0852891181600532e-07, "loss": 0.0, "step": 4861 }, { "epoch": 2.39, "logps_train/chosen": -75.79491424560547, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -314.13116455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2564444541931152, "rewards_train/margins": 18.033236026763916, "rewards_train/rejected": -19.28968048095703, "step": 4861 }, { "epoch": 2.39, "learning_rate": 1.0836014023356999e-07, "loss": 0.0001, "step": 4862 }, { "epoch": 2.39, "logps_train/chosen": -74.79925537109375, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -319.7217102050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0785582065582275, "rewards_train/margins": 18.428083181381226, "rewards_train/rejected": -19.506641387939453, "step": 4862 }, { "epoch": 2.39, "learning_rate": 1.0819148403088019e-07, "loss": 0.0, "step": 4863 }, { "epoch": 2.39, "logps_train/chosen": -81.214111328125, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -322.73004150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6623046398162842, "rewards_train/margins": 17.99141240119934, "rewards_train/rejected": -19.653717041015625, "step": 4863 }, { "epoch": 2.39, "learning_rate": 1.0802294325762301e-07, "loss": 0.0, "step": 4864 }, { "epoch": 2.39, "logps_train/chosen": -78.603759765625, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -320.7685852050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.198681116104126, "rewards_train/margins": 18.477298498153687, "rewards_train/rejected": -19.675979614257812, "step": 4864 }, { "epoch": 2.4, "learning_rate": 1.0785451796345174e-07, "loss": 0.0, "step": 4865 }, { "epoch": 2.4, "logps_train/chosen": -83.11006927490234, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -316.50457763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8478227853775024, "rewards_train/margins": 17.278563857078552, "rewards_train/rejected": -19.126386642456055, "step": 4865 }, { "epoch": 2.4, "learning_rate": 1.076862081979854e-07, "loss": 0.0005, "step": 4866 }, { "epoch": 2.4, "logps_train/chosen": -83.84695434570312, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -333.1654968261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6493922472000122, "rewards_train/margins": 18.743719935417175, "rewards_train/rejected": -20.393112182617188, "step": 4866 }, { "epoch": 2.4, "learning_rate": 1.0751801401080956e-07, "loss": 0.0, "step": 4867 }, { "epoch": 2.4, "logps_train/chosen": -78.94204711914062, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -325.4647216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4894195795059204, "rewards_train/margins": 18.53380811214447, "rewards_train/rejected": -20.02322769165039, "step": 4867 }, { "epoch": 2.4, "learning_rate": 1.0734993545147513e-07, "loss": 0.0, "step": 4868 }, { "epoch": 2.4, "logps_train/chosen": -80.60243225097656, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -320.8970031738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.661513328552246, "rewards_train/margins": 17.947230339050293, "rewards_train/rejected": -19.60874366760254, "step": 4868 }, { "epoch": 2.4, "learning_rate": 1.0718197256949907e-07, "loss": 0.0001, "step": 4869 }, { "epoch": 2.4, "logps_train/chosen": -81.32040405273438, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -317.1141357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7030367851257324, "rewards_train/margins": 17.847588062286377, "rewards_train/rejected": -19.55062484741211, "step": 4869 }, { "epoch": 2.4, "learning_rate": 1.0701412541436484e-07, "loss": 0.0002, "step": 4870 }, { "epoch": 2.4, "logps_train/chosen": -86.02826690673828, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -333.407470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9092477560043335, "rewards_train/margins": 18.596638321876526, "rewards_train/rejected": -20.50588607788086, "step": 4870 }, { "epoch": 2.4, "learning_rate": 1.0684639403552077e-07, "loss": 0.0, "step": 4871 }, { "epoch": 2.4, "logps_train/chosen": -85.14944458007812, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -333.52349853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0299837589263916, "rewards_train/margins": 18.33892035484314, "rewards_train/rejected": -20.36890411376953, "step": 4871 }, { "epoch": 2.4, "learning_rate": 1.0667877848238188e-07, "loss": 0.0, "step": 4872 }, { "epoch": 2.4, "logps_train/chosen": -80.44084167480469, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -321.9195556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5085371732711792, "rewards_train/margins": 17.973358273506165, "rewards_train/rejected": -19.481895446777344, "step": 4872 }, { "epoch": 2.4, "learning_rate": 1.065112788043292e-07, "loss": 0.0002, "step": 4873 }, { "epoch": 2.4, "logps_train/chosen": -77.48426818847656, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -324.3019714355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5338268280029297, "rewards_train/margins": 18.484262466430664, "rewards_train/rejected": -20.018089294433594, "step": 4873 }, { "epoch": 2.4, "learning_rate": 1.0634389505070868e-07, "loss": 0.0, "step": 4874 }, { "epoch": 2.4, "logps_train/chosen": -71.65292358398438, "logps_train/ref_chosen": -60.75, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -315.30914306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0889736413955688, "rewards_train/margins": 18.118696570396423, "rewards_train/rejected": -19.207670211791992, "step": 4874 }, { "epoch": 2.4, "learning_rate": 1.061766272708331e-07, "loss": 0.0, "step": 4875 }, { "epoch": 2.4, "logps_train/chosen": -83.40818786621094, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -325.4447937011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.895237922668457, "rewards_train/margins": 18.13049030303955, "rewards_train/rejected": -20.025728225708008, "step": 4875 }, { "epoch": 2.4, "learning_rate": 1.0600947551398054e-07, "loss": 0.0005, "step": 4876 }, { "epoch": 2.4, "logps_train/chosen": -84.61457824707031, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -336.78558349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7686116695404053, "rewards_train/margins": 18.77908730506897, "rewards_train/rejected": -20.547698974609375, "step": 4876 }, { "epoch": 2.4, "learning_rate": 1.0584243982939505e-07, "loss": 0.0, "step": 4877 }, { "epoch": 2.4, "logps_train/chosen": -80.59320068359375, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -336.51519775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.473041296005249, "rewards_train/margins": 19.112951517105103, "rewards_train/rejected": -20.58599281311035, "step": 4877 }, { "epoch": 2.4, "learning_rate": 1.0567552026628635e-07, "loss": 0.0, "step": 4878 }, { "epoch": 2.4, "logps_train/chosen": -75.44686889648438, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -317.4046630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1586518287658691, "rewards_train/margins": 17.901200771331787, "rewards_train/rejected": -19.059852600097656, "step": 4878 }, { "epoch": 2.4, "learning_rate": 1.0550871687383029e-07, "loss": 0.0, "step": 4879 }, { "epoch": 2.4, "logps_train/chosen": -76.21864318847656, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -319.3946533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2124403715133667, "rewards_train/margins": 18.41989529132843, "rewards_train/rejected": -19.632335662841797, "step": 4879 }, { "epoch": 2.4, "learning_rate": 1.0534202970116824e-07, "loss": 0.0, "step": 4880 }, { "epoch": 2.4, "logps_train/chosen": -82.87386322021484, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -336.76220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7651206254959106, "rewards_train/margins": 18.757927775382996, "rewards_train/rejected": -20.523048400878906, "step": 4880 }, { "epoch": 2.4, "learning_rate": 1.0517545879740708e-07, "loss": 0.0001, "step": 4881 }, { "epoch": 2.4, "logps_train/chosen": -79.57254028320312, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -326.064208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6169712543487549, "rewards_train/margins": 18.17187190055847, "rewards_train/rejected": -19.788843154907227, "step": 4881 }, { "epoch": 2.4, "learning_rate": 1.0500900421162012e-07, "loss": 0.0, "step": 4882 }, { "epoch": 2.4, "logps_train/chosen": -75.32544708251953, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -320.9837646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1850841045379639, "rewards_train/margins": 18.480382204055786, "rewards_train/rejected": -19.66546630859375, "step": 4882 }, { "epoch": 2.4, "learning_rate": 1.048426659928458e-07, "loss": 0.0, "step": 4883 }, { "epoch": 2.4, "logps_train/chosen": -79.5748291015625, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -312.8414611816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6870728731155396, "rewards_train/margins": 17.22031581401825, "rewards_train/rejected": -18.90738868713379, "step": 4883 }, { "epoch": 2.4, "learning_rate": 1.0467644419008842e-07, "loss": 0.0001, "step": 4884 }, { "epoch": 2.4, "logps_train/chosen": -78.9155502319336, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -326.4478759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6969503164291382, "rewards_train/margins": 18.25076735019684, "rewards_train/rejected": -19.947717666625977, "step": 4884 }, { "epoch": 2.41, "learning_rate": 1.0451033885231819e-07, "loss": 0.0, "step": 4885 }, { "epoch": 2.41, "logps_train/chosen": -76.34622192382812, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -324.2546691894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1840360164642334, "rewards_train/margins": 18.665502786636353, "rewards_train/rejected": -19.849538803100586, "step": 4885 }, { "epoch": 2.41, "learning_rate": 1.0434435002847087e-07, "loss": 0.0, "step": 4886 }, { "epoch": 2.41, "logps_train/chosen": -78.54849243164062, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -336.0177307128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6430330276489258, "rewards_train/margins": 19.26039981842041, "rewards_train/rejected": -20.903432846069336, "step": 4886 }, { "epoch": 2.41, "learning_rate": 1.0417847776744781e-07, "loss": 0.0, "step": 4887 }, { "epoch": 2.41, "logps_train/chosen": -80.84263610839844, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -319.8912658691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.594761848449707, "rewards_train/margins": 17.84753704071045, "rewards_train/rejected": -19.442298889160156, "step": 4887 }, { "epoch": 2.41, "learning_rate": 1.0401272211811596e-07, "loss": 0.0, "step": 4888 }, { "epoch": 2.41, "logps_train/chosen": -76.61564636230469, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -324.4165954589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.170939326286316, "rewards_train/margins": 18.86530029773712, "rewards_train/rejected": -20.036239624023438, "step": 4888 }, { "epoch": 2.41, "learning_rate": 1.0384708312930834e-07, "loss": 0.0, "step": 4889 }, { "epoch": 2.41, "logps_train/chosen": -78.13261413574219, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -337.31524658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2038373947143555, "rewards_train/margins": 19.65200138092041, "rewards_train/rejected": -20.855838775634766, "step": 4889 }, { "epoch": 2.41, "learning_rate": 1.0368156084982316e-07, "loss": 0.0, "step": 4890 }, { "epoch": 2.41, "logps_train/chosen": -79.01374816894531, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -316.4268493652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4392166137695312, "rewards_train/margins": 17.790287017822266, "rewards_train/rejected": -19.229503631591797, "step": 4890 }, { "epoch": 2.41, "learning_rate": 1.0351615532842434e-07, "loss": 0.0, "step": 4891 }, { "epoch": 2.41, "logps_train/chosen": -81.59445190429688, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -331.9229431152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5134490728378296, "rewards_train/margins": 18.705405831336975, "rewards_train/rejected": -20.218854904174805, "step": 4891 }, { "epoch": 2.41, "learning_rate": 1.0335086661384173e-07, "loss": 0.0, "step": 4892 }, { "epoch": 2.41, "logps_train/chosen": -83.33257293701172, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -324.33001708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9159237146377563, "rewards_train/margins": 17.925867438316345, "rewards_train/rejected": -19.8417911529541, "step": 4892 }, { "epoch": 2.41, "learning_rate": 1.0318569475477007e-07, "loss": 0.0, "step": 4893 }, { "epoch": 2.41, "logps_train/chosen": -75.31835174560547, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -117.875, "logps_train/rejected": -299.8963317871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3868156671524048, "rewards_train/margins": 16.81273114681244, "rewards_train/rejected": -18.199546813964844, "step": 4893 }, { "epoch": 2.41, "learning_rate": 1.0302063979987052e-07, "loss": 0.0002, "step": 4894 }, { "epoch": 2.41, "logps_train/chosen": -79.37751007080078, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -314.2757873535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.300788164138794, "rewards_train/margins": 17.81341052055359, "rewards_train/rejected": -19.114198684692383, "step": 4894 }, { "epoch": 2.41, "learning_rate": 1.0285570179776915e-07, "loss": 0.0, "step": 4895 }, { "epoch": 2.41, "logps_train/chosen": -76.87300109863281, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -323.8861083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0982378721237183, "rewards_train/margins": 18.652532696723938, "rewards_train/rejected": -19.750770568847656, "step": 4895 }, { "epoch": 2.41, "learning_rate": 1.0269088079705773e-07, "loss": 0.0, "step": 4896 }, { "epoch": 2.41, "logps_train/chosen": -78.43809509277344, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -316.2418212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.358335018157959, "rewards_train/margins": 17.804129123687744, "rewards_train/rejected": -19.162464141845703, "step": 4896 }, { "epoch": 2.41, "learning_rate": 1.0252617684629394e-07, "loss": 0.0, "step": 4897 }, { "epoch": 2.41, "logps_train/chosen": -72.19868469238281, "logps_train/ref_chosen": -60.96875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -312.5618591308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1231404542922974, "rewards_train/margins": 17.599841952323914, "rewards_train/rejected": -18.72298240661621, "step": 4897 }, { "epoch": 2.41, "learning_rate": 1.0236158999400051e-07, "loss": 0.0, "step": 4898 }, { "epoch": 2.41, "logps_train/chosen": -82.29261779785156, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -332.3485412597656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6504039764404297, "rewards_train/margins": 18.549880981445312, "rewards_train/rejected": -20.200284957885742, "step": 4898 }, { "epoch": 2.41, "learning_rate": 1.0219712028866568e-07, "loss": 0.0, "step": 4899 }, { "epoch": 2.41, "logps_train/chosen": -80.01167297363281, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -320.513916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6673296689987183, "rewards_train/margins": 17.92473804950714, "rewards_train/rejected": -19.59206771850586, "step": 4899 }, { "epoch": 2.41, "learning_rate": 1.0203276777874364e-07, "loss": 0.0, "step": 4900 }, { "epoch": 2.41, "logps_train/chosen": -78.78797912597656, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -311.0633544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7453508377075195, "rewards_train/margins": 17.378710746765137, "rewards_train/rejected": -19.124061584472656, "step": 4900 }, { "epoch": 2.41, "learning_rate": 1.018685325126536e-07, "loss": 0.0, "step": 4901 }, { "epoch": 2.41, "logps_train/chosen": -81.36375427246094, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -314.10260009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.746361255645752, "rewards_train/margins": 17.51326322555542, "rewards_train/rejected": -19.259624481201172, "step": 4901 }, { "epoch": 2.41, "learning_rate": 1.0170441453878037e-07, "loss": 0.0, "step": 4902 }, { "epoch": 2.41, "logps_train/chosen": -84.22901916503906, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -323.8326416015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.922023057937622, "rewards_train/margins": 17.765293836593628, "rewards_train/rejected": -19.68731689453125, "step": 4902 }, { "epoch": 2.41, "learning_rate": 1.0154041390547413e-07, "loss": 0.0, "step": 4903 }, { "epoch": 2.41, "logps_train/chosen": -83.0909194946289, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -330.9716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6551368236541748, "rewards_train/margins": 18.77875065803528, "rewards_train/rejected": -20.433887481689453, "step": 4903 }, { "epoch": 2.41, "learning_rate": 1.0137653066105073e-07, "loss": 0.0, "step": 4904 }, { "epoch": 2.41, "logps_train/chosen": -78.92103576660156, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -323.7236022949219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.401576042175293, "rewards_train/margins": 18.34910297393799, "rewards_train/rejected": -19.75067901611328, "step": 4904 }, { "epoch": 2.42, "learning_rate": 1.0121276485379126e-07, "loss": 0.0001, "step": 4905 }, { "epoch": 2.42, "logps_train/chosen": -78.52629089355469, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -333.8779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4332436323165894, "rewards_train/margins": 19.05836021900177, "rewards_train/rejected": -20.49160385131836, "step": 4905 }, { "epoch": 2.42, "learning_rate": 1.0104911653194204e-07, "loss": 0.0, "step": 4906 }, { "epoch": 2.42, "logps_train/chosen": -80.00953674316406, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -322.3985595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4353289604187012, "rewards_train/margins": 18.275619983673096, "rewards_train/rejected": -19.710948944091797, "step": 4906 }, { "epoch": 2.42, "learning_rate": 1.0088558574371536e-07, "loss": 0.0, "step": 4907 }, { "epoch": 2.42, "logps_train/chosen": -79.38505554199219, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -315.6800537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.793144702911377, "rewards_train/margins": 17.6625075340271, "rewards_train/rejected": -19.455652236938477, "step": 4907 }, { "epoch": 2.42, "learning_rate": 1.0072217253728804e-07, "loss": 0.0, "step": 4908 }, { "epoch": 2.42, "logps_train/chosen": -80.1121597290039, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -327.271728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.606381893157959, "rewards_train/margins": 18.439932346343994, "rewards_train/rejected": -20.046314239501953, "step": 4908 }, { "epoch": 2.42, "learning_rate": 1.0055887696080285e-07, "loss": 0.0, "step": 4909 }, { "epoch": 2.42, "logps_train/chosen": -73.93186950683594, "logps_train/ref_chosen": -62.0625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -321.2099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1876211166381836, "rewards_train/margins": 18.548365592956543, "rewards_train/rejected": -19.735986709594727, "step": 4909 }, { "epoch": 2.42, "learning_rate": 1.0039569906236816e-07, "loss": 0.0001, "step": 4910 }, { "epoch": 2.42, "logps_train/chosen": -84.18218994140625, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -333.1025085449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8010311126708984, "rewards_train/margins": 18.653945922851562, "rewards_train/rejected": -20.45497703552246, "step": 4910 }, { "epoch": 2.42, "learning_rate": 1.0023263889005668e-07, "loss": 0.0, "step": 4911 }, { "epoch": 2.42, "logps_train/chosen": -77.9923095703125, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -326.1090087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4789679050445557, "rewards_train/margins": 18.364646673202515, "rewards_train/rejected": -19.84361457824707, "step": 4911 }, { "epoch": 2.42, "learning_rate": 1.0006969649190744e-07, "loss": 0.0, "step": 4912 }, { "epoch": 2.42, "logps_train/chosen": -85.45146179199219, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -325.6313171386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.061601161956787, "rewards_train/margins": 17.886295795440674, "rewards_train/rejected": -19.94789695739746, "step": 4912 }, { "epoch": 2.42, "learning_rate": 9.990687191592423e-08, "loss": 0.0, "step": 4913 }, { "epoch": 2.42, "logps_train/chosen": -77.90135192871094, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -316.22283935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.407029151916504, "rewards_train/margins": 18.209099769592285, "rewards_train/rejected": -19.61612892150879, "step": 4913 }, { "epoch": 2.42, "learning_rate": 9.974416521007633e-08, "loss": 0.0, "step": 4914 }, { "epoch": 2.42, "logps_train/chosen": -79.74173736572266, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -328.957763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2757364511489868, "rewards_train/margins": 18.49953019618988, "rewards_train/rejected": -19.775266647338867, "step": 4914 }, { "epoch": 2.42, "learning_rate": 9.958157642229803e-08, "loss": 0.0, "step": 4915 }, { "epoch": 2.42, "logps_train/chosen": -77.07569885253906, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -323.60479736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4354989528656006, "rewards_train/margins": 18.21833634376526, "rewards_train/rejected": -19.65383529663086, "step": 4915 }, { "epoch": 2.42, "learning_rate": 9.941910560048938e-08, "loss": 0.0, "step": 4916 }, { "epoch": 2.42, "logps_train/chosen": -79.3700942993164, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -320.5859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6330057382583618, "rewards_train/margins": 18.144383788108826, "rewards_train/rejected": -19.777389526367188, "step": 4916 }, { "epoch": 2.42, "learning_rate": 9.925675279251527e-08, "loss": 0.0, "step": 4917 }, { "epoch": 2.42, "logps_train/chosen": -80.35429382324219, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -327.99945068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.735184907913208, "rewards_train/margins": 18.575793027877808, "rewards_train/rejected": -20.310977935791016, "step": 4917 }, { "epoch": 2.42, "learning_rate": 9.909451804620578e-08, "loss": 0.0, "step": 4918 }, { "epoch": 2.42, "logps_train/chosen": -77.62942504882812, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -323.17572021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4505398273468018, "rewards_train/margins": 18.31683611869812, "rewards_train/rejected": -19.767375946044922, "step": 4918 }, { "epoch": 2.42, "learning_rate": 9.893240140935672e-08, "loss": 0.0, "step": 4919 }, { "epoch": 2.42, "logps_train/chosen": -82.55047607421875, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -325.94110107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5962097644805908, "rewards_train/margins": 18.360939741134644, "rewards_train/rejected": -19.957149505615234, "step": 4919 }, { "epoch": 2.42, "learning_rate": 9.877040292972821e-08, "loss": 0.0, "step": 4920 }, { "epoch": 2.42, "logps_train/chosen": -80.91325378417969, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -332.28717041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6142746210098267, "rewards_train/margins": 18.733532309532166, "rewards_train/rejected": -20.347806930541992, "step": 4920 }, { "epoch": 2.42, "learning_rate": 9.860852265504649e-08, "loss": 0.0, "step": 4921 }, { "epoch": 2.42, "logps_train/chosen": -76.88398742675781, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -324.5606384277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1897164583206177, "rewards_train/margins": 18.676309466362, "rewards_train/rejected": -19.866025924682617, "step": 4921 }, { "epoch": 2.42, "learning_rate": 9.844676063300266e-08, "loss": 0.0, "step": 4922 }, { "epoch": 2.42, "logps_train/chosen": -73.57550811767578, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -321.20965576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0578930377960205, "rewards_train/margins": 18.63191866874695, "rewards_train/rejected": -19.68981170654297, "step": 4922 }, { "epoch": 2.42, "learning_rate": 9.828511691125252e-08, "loss": 0.0, "step": 4923 }, { "epoch": 2.42, "logps_train/chosen": -76.92636108398438, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -310.61114501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4604581594467163, "rewards_train/margins": 17.675852417945862, "rewards_train/rejected": -19.136310577392578, "step": 4923 }, { "epoch": 2.42, "learning_rate": 9.812359153741779e-08, "loss": 0.0, "step": 4924 }, { "epoch": 2.42, "logps_train/chosen": -81.34310913085938, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -325.6982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8787933588027954, "rewards_train/margins": 18.03790509700775, "rewards_train/rejected": -19.916698455810547, "step": 4924 }, { "epoch": 2.42, "learning_rate": 9.796218455908467e-08, "loss": 0.0, "step": 4925 }, { "epoch": 2.42, "logps_train/chosen": -78.43966674804688, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -332.6097717285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5262415409088135, "rewards_train/margins": 19.019500494003296, "rewards_train/rejected": -20.54574203491211, "step": 4925 }, { "epoch": 2.43, "learning_rate": 9.780089602380476e-08, "loss": 0.0, "step": 4926 }, { "epoch": 2.43, "logps_train/chosen": -88.84037017822266, "logps_train/ref_chosen": -68.375, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -339.55914306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.04951548576355, "rewards_train/margins": 18.878859758377075, "rewards_train/rejected": -20.928375244140625, "step": 4926 }, { "epoch": 2.43, "learning_rate": 9.763972597909493e-08, "loss": 0.0, "step": 4927 }, { "epoch": 2.43, "logps_train/chosen": -73.53701782226562, "logps_train/ref_chosen": -60.375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -320.7866516113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3163484334945679, "rewards_train/margins": 18.50230848789215, "rewards_train/rejected": -19.81865692138672, "step": 4927 }, { "epoch": 2.43, "learning_rate": 9.74786744724369e-08, "loss": 0.0, "step": 4928 }, { "epoch": 2.43, "logps_train/chosen": -82.91120910644531, "logps_train/ref_chosen": -68.625, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -320.3534240722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4265698194503784, "rewards_train/margins": 18.138951420783997, "rewards_train/rejected": -19.565521240234375, "step": 4928 }, { "epoch": 2.43, "learning_rate": 9.731774155127742e-08, "loss": 0.0, "step": 4929 }, { "epoch": 2.43, "logps_train/chosen": -81.11707305908203, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -334.84954833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7603890895843506, "rewards_train/margins": 18.774370431900024, "rewards_train/rejected": -20.534759521484375, "step": 4929 }, { "epoch": 2.43, "learning_rate": 9.715692726302843e-08, "loss": 0.0, "step": 4930 }, { "epoch": 2.43, "logps_train/chosen": -84.99418640136719, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -323.39019775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9593305587768555, "rewards_train/margins": 17.738381385803223, "rewards_train/rejected": -19.697711944580078, "step": 4930 }, { "epoch": 2.43, "learning_rate": 9.699623165506705e-08, "loss": 0.0, "step": 4931 }, { "epoch": 2.43, "logps_train/chosen": -74.90962219238281, "logps_train/ref_chosen": -61.46875, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -323.3074645996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3452595472335815, "rewards_train/margins": 18.753506779670715, "rewards_train/rejected": -20.098766326904297, "step": 4931 }, { "epoch": 2.43, "learning_rate": 9.683565477473515e-08, "loss": 0.0, "step": 4932 }, { "epoch": 2.43, "logps_train/chosen": -79.69541931152344, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -338.0181579589844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.423008918762207, "rewards_train/margins": 19.385056495666504, "rewards_train/rejected": -20.80806541442871, "step": 4932 }, { "epoch": 2.43, "learning_rate": 9.667519666933976e-08, "loss": 0.0001, "step": 4933 }, { "epoch": 2.43, "logps_train/chosen": -72.64630126953125, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -321.2666931152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0379698276519775, "rewards_train/margins": 18.562333822250366, "rewards_train/rejected": -19.600303649902344, "step": 4933 }, { "epoch": 2.43, "learning_rate": 9.651485738615305e-08, "loss": 0.0, "step": 4934 }, { "epoch": 2.43, "logps_train/chosen": -86.34898376464844, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -340.43695068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8910510540008545, "rewards_train/margins": 18.819539308547974, "rewards_train/rejected": -20.710590362548828, "step": 4934 }, { "epoch": 2.43, "learning_rate": 9.635463697241203e-08, "loss": 0.0, "step": 4935 }, { "epoch": 2.43, "logps_train/chosen": -78.62899780273438, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -320.67425537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3078460693359375, "rewards_train/margins": 18.33819580078125, "rewards_train/rejected": -19.646041870117188, "step": 4935 }, { "epoch": 2.43, "learning_rate": 9.619453547531847e-08, "loss": 0.0, "step": 4936 }, { "epoch": 2.43, "logps_train/chosen": -74.41995239257812, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -326.1346130371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9043978452682495, "rewards_train/margins": 18.902179837226868, "rewards_train/rejected": -19.806577682495117, "step": 4936 }, { "epoch": 2.43, "learning_rate": 9.603455294203971e-08, "loss": 0.0, "step": 4937 }, { "epoch": 2.43, "logps_train/chosen": -80.21017456054688, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -332.02532958984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5701141357421875, "rewards_train/margins": 19.057613372802734, "rewards_train/rejected": -20.627727508544922, "step": 4937 }, { "epoch": 2.43, "learning_rate": 9.58746894197075e-08, "loss": 0.0, "step": 4938 }, { "epoch": 2.43, "logps_train/chosen": -78.79000854492188, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -338.65704345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.430368423461914, "rewards_train/margins": 19.76014518737793, "rewards_train/rejected": -21.190513610839844, "step": 4938 }, { "epoch": 2.43, "learning_rate": 9.57149449554187e-08, "loss": 0.0, "step": 4939 }, { "epoch": 2.43, "logps_train/chosen": -71.26854705810547, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -319.8358154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8674311637878418, "rewards_train/margins": 18.969570636749268, "rewards_train/rejected": -19.83700180053711, "step": 4939 }, { "epoch": 2.43, "learning_rate": 9.555531959623503e-08, "loss": 0.0001, "step": 4940 }, { "epoch": 2.43, "logps_train/chosen": -75.90401458740234, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -319.5691223144531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2223351001739502, "rewards_train/margins": 18.298493146896362, "rewards_train/rejected": -19.520828247070312, "step": 4940 }, { "epoch": 2.43, "learning_rate": 9.539581338918345e-08, "loss": 0.0, "step": 4941 }, { "epoch": 2.43, "logps_train/chosen": -80.19108581542969, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -329.91351318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3852219581604004, "rewards_train/margins": 18.691094875335693, "rewards_train/rejected": -20.076316833496094, "step": 4941 }, { "epoch": 2.43, "learning_rate": 9.52364263812554e-08, "loss": 0.0, "step": 4942 }, { "epoch": 2.43, "logps_train/chosen": -77.95603942871094, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -319.9856872558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3283679485321045, "rewards_train/margins": 18.04061245918274, "rewards_train/rejected": -19.368980407714844, "step": 4942 }, { "epoch": 2.43, "learning_rate": 9.50771586194074e-08, "loss": 0.0, "step": 4943 }, { "epoch": 2.43, "logps_train/chosen": -88.52963256835938, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -336.47723388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.027817487716675, "rewards_train/margins": 18.752227067947388, "rewards_train/rejected": -20.780044555664062, "step": 4943 }, { "epoch": 2.43, "learning_rate": 9.491801015056078e-08, "loss": 0.0, "step": 4944 }, { "epoch": 2.43, "logps_train/chosen": -79.00247192382812, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -315.37567138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4506869316101074, "rewards_train/margins": 17.88922357559204, "rewards_train/rejected": -19.33991050720215, "step": 4944 }, { "epoch": 2.43, "learning_rate": 9.475898102160162e-08, "loss": 0.0, "step": 4945 }, { "epoch": 2.43, "logps_train/chosen": -80.53431701660156, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -314.8768310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7034556865692139, "rewards_train/margins": 17.49712109565735, "rewards_train/rejected": -19.200576782226562, "step": 4945 }, { "epoch": 2.44, "learning_rate": 9.46000712793813e-08, "loss": 0.0001, "step": 4946 }, { "epoch": 2.44, "logps_train/chosen": -79.28776550292969, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -312.0345458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4317065477371216, "rewards_train/margins": 17.658469080924988, "rewards_train/rejected": -19.09017562866211, "step": 4946 }, { "epoch": 2.44, "learning_rate": 9.444128097071552e-08, "loss": 0.0, "step": 4947 }, { "epoch": 2.44, "logps_train/chosen": -78.43704223632812, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -327.3609619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5022495985031128, "rewards_train/margins": 18.718125462532043, "rewards_train/rejected": -20.220375061035156, "step": 4947 }, { "epoch": 2.44, "learning_rate": 9.428261014238481e-08, "loss": 0.0, "step": 4948 }, { "epoch": 2.44, "logps_train/chosen": -81.29316711425781, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -324.78411865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6626416444778442, "rewards_train/margins": 18.41381824016571, "rewards_train/rejected": -20.076459884643555, "step": 4948 }, { "epoch": 2.44, "learning_rate": 9.4124058841135e-08, "loss": 0.0, "step": 4949 }, { "epoch": 2.44, "logps_train/chosen": -74.77001190185547, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -326.75274658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2216787338256836, "rewards_train/margins": 18.81624126434326, "rewards_train/rejected": -20.037919998168945, "step": 4949 }, { "epoch": 2.44, "learning_rate": 9.396562711367617e-08, "loss": 0.0, "step": 4950 }, { "epoch": 2.44, "logps_train/chosen": -75.19786071777344, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -314.3403625488281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3276467323303223, "rewards_train/margins": 17.584563732147217, "rewards_train/rejected": -18.91221046447754, "step": 4950 }, { "epoch": 2.44, "learning_rate": 9.380731500668332e-08, "loss": 0.0, "step": 4951 }, { "epoch": 2.44, "logps_train/chosen": -79.21115112304688, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -333.83837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3951390981674194, "rewards_train/margins": 19.082790970802307, "rewards_train/rejected": -20.477930068969727, "step": 4951 }, { "epoch": 2.44, "learning_rate": 9.364912256679646e-08, "loss": 0.0, "step": 4952 }, { "epoch": 2.44, "logps_train/chosen": -84.22410583496094, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -329.42755126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.764206886291504, "rewards_train/margins": 18.492758750915527, "rewards_train/rejected": -20.25696563720703, "step": 4952 }, { "epoch": 2.44, "learning_rate": 9.349104984062007e-08, "loss": 0.0, "step": 4953 }, { "epoch": 2.44, "logps_train/chosen": -78.39810180664062, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -326.3910217285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.51920485496521, "rewards_train/margins": 18.546850442886353, "rewards_train/rejected": -20.066055297851562, "step": 4953 }, { "epoch": 2.44, "learning_rate": 9.33330968747234e-08, "loss": 0.0, "step": 4954 }, { "epoch": 2.44, "logps_train/chosen": -80.1512451171875, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -328.22015380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4150264263153076, "rewards_train/margins": 18.375348329544067, "rewards_train/rejected": -19.790374755859375, "step": 4954 }, { "epoch": 2.44, "learning_rate": 9.317526371564039e-08, "loss": 0.0, "step": 4955 }, { "epoch": 2.44, "logps_train/chosen": -81.57958984375, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -341.08514404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7050787210464478, "rewards_train/margins": 19.227410674095154, "rewards_train/rejected": -20.9324893951416, "step": 4955 }, { "epoch": 2.44, "learning_rate": 9.301755040987009e-08, "loss": 0.0, "step": 4956 }, { "epoch": 2.44, "logps_train/chosen": -72.53308868408203, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -320.1264343261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7846078872680664, "rewards_train/margins": 18.523737907409668, "rewards_train/rejected": -19.308345794677734, "step": 4956 }, { "epoch": 2.44, "learning_rate": 9.285995700387539e-08, "loss": 0.0, "step": 4957 }, { "epoch": 2.44, "logps_train/chosen": -80.45032501220703, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -340.8180847167969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5269662141799927, "rewards_train/margins": 19.539756655693054, "rewards_train/rejected": -21.066722869873047, "step": 4957 }, { "epoch": 2.44, "learning_rate": 9.270248354408466e-08, "loss": 0.0002, "step": 4958 }, { "epoch": 2.44, "logps_train/chosen": -82.07975769042969, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -332.0203857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7619801759719849, "rewards_train/margins": 18.84767711162567, "rewards_train/rejected": -20.609657287597656, "step": 4958 }, { "epoch": 2.44, "learning_rate": 9.254513007689085e-08, "loss": 0.0, "step": 4959 }, { "epoch": 2.44, "logps_train/chosen": -77.7222900390625, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -331.44073486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.182922124862671, "rewards_train/margins": 19.014864206314087, "rewards_train/rejected": -20.197786331176758, "step": 4959 }, { "epoch": 2.44, "learning_rate": 9.238789664865094e-08, "loss": 0.0, "step": 4960 }, { "epoch": 2.44, "logps_train/chosen": -78.30027770996094, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -334.87164306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.326658010482788, "rewards_train/margins": 19.112850427627563, "rewards_train/rejected": -20.43950843811035, "step": 4960 }, { "epoch": 2.44, "learning_rate": 9.223078330568717e-08, "loss": 0.0, "step": 4961 }, { "epoch": 2.44, "logps_train/chosen": -79.52347564697266, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -308.7664794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4733437299728394, "rewards_train/margins": 17.341878056526184, "rewards_train/rejected": -18.815221786499023, "step": 4961 }, { "epoch": 2.44, "learning_rate": 9.207379009428623e-08, "loss": 0.0001, "step": 4962 }, { "epoch": 2.44, "logps_train/chosen": -80.06682586669922, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -309.6024475097656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5428153276443481, "rewards_train/margins": 17.24067199230194, "rewards_train/rejected": -18.78348731994629, "step": 4962 }, { "epoch": 2.44, "learning_rate": 9.191691706069921e-08, "loss": 0.0, "step": 4963 }, { "epoch": 2.44, "logps_train/chosen": -76.46146392822266, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -319.73773193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3788608312606812, "rewards_train/margins": 18.234267830848694, "rewards_train/rejected": -19.613128662109375, "step": 4963 }, { "epoch": 2.44, "learning_rate": 9.17601642511422e-08, "loss": 0.0001, "step": 4964 }, { "epoch": 2.44, "logps_train/chosen": -71.99028015136719, "logps_train/ref_chosen": -60.25, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -312.837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1738568544387817, "rewards_train/margins": 17.79220759868622, "rewards_train/rejected": -18.966064453125, "step": 4964 }, { "epoch": 2.44, "learning_rate": 9.160353171179547e-08, "loss": 0.0, "step": 4965 }, { "epoch": 2.44, "logps_train/chosen": -76.97544860839844, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -321.07611083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1309430599212646, "rewards_train/margins": 18.16660714149475, "rewards_train/rejected": -19.297550201416016, "step": 4965 }, { "epoch": 2.45, "learning_rate": 9.144701948880407e-08, "loss": 0.0001, "step": 4966 }, { "epoch": 2.45, "logps_train/chosen": -79.97087097167969, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -329.3517150878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4630539417266846, "rewards_train/margins": 18.76601481437683, "rewards_train/rejected": -20.229068756103516, "step": 4966 }, { "epoch": 2.45, "learning_rate": 9.129062762827739e-08, "loss": 0.0, "step": 4967 }, { "epoch": 2.45, "logps_train/chosen": -78.79451751708984, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -332.265869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4135340452194214, "rewards_train/margins": 19.12809383869171, "rewards_train/rejected": -20.541627883911133, "step": 4967 }, { "epoch": 2.45, "learning_rate": 9.113435617628984e-08, "loss": 0.0, "step": 4968 }, { "epoch": 2.45, "logps_train/chosen": -77.81005096435547, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -318.53662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5563955307006836, "rewards_train/margins": 17.806349754333496, "rewards_train/rejected": -19.36274528503418, "step": 4968 }, { "epoch": 2.45, "learning_rate": 9.097820517887988e-08, "loss": 0.0001, "step": 4969 }, { "epoch": 2.45, "logps_train/chosen": -78.3157958984375, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -133.125, "logps_train/rejected": -333.3409423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2187374830245972, "rewards_train/margins": 18.79807198047638, "rewards_train/rejected": -20.016809463500977, "step": 4969 }, { "epoch": 2.45, "learning_rate": 9.082217468205056e-08, "loss": 0.0, "step": 4970 }, { "epoch": 2.45, "logps_train/chosen": -74.5340347290039, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -320.9471435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2247905731201172, "rewards_train/margins": 18.479494094848633, "rewards_train/rejected": -19.70428466796875, "step": 4970 }, { "epoch": 2.45, "learning_rate": 9.066626473176986e-08, "loss": 0.0, "step": 4971 }, { "epoch": 2.45, "logps_train/chosen": -79.157958984375, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -333.47735595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.402515172958374, "rewards_train/margins": 19.046684503555298, "rewards_train/rejected": -20.449199676513672, "step": 4971 }, { "epoch": 2.45, "learning_rate": 9.051047537396938e-08, "loss": 0.0, "step": 4972 }, { "epoch": 2.45, "logps_train/chosen": -77.23689270019531, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -327.402587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4721747636795044, "rewards_train/margins": 18.377751231193542, "rewards_train/rejected": -19.849925994873047, "step": 4972 }, { "epoch": 2.45, "learning_rate": 9.035480665454598e-08, "loss": 0.0, "step": 4973 }, { "epoch": 2.45, "logps_train/chosen": -82.62934875488281, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -328.32342529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6355421543121338, "rewards_train/margins": 18.23039412498474, "rewards_train/rejected": -19.865936279296875, "step": 4973 }, { "epoch": 2.45, "learning_rate": 9.019925861936101e-08, "loss": 0.0, "step": 4974 }, { "epoch": 2.45, "logps_train/chosen": -78.93637084960938, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -314.994873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.528159499168396, "rewards_train/margins": 17.602139115333557, "rewards_train/rejected": -19.130298614501953, "step": 4974 }, { "epoch": 2.45, "learning_rate": 9.004383131423942e-08, "loss": 0.0, "step": 4975 }, { "epoch": 2.45, "logps_train/chosen": -76.2772216796875, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -316.9549865722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0979373455047607, "rewards_train/margins": 17.811429262161255, "rewards_train/rejected": -18.909366607666016, "step": 4975 }, { "epoch": 2.45, "learning_rate": 8.988852478497156e-08, "loss": 0.0, "step": 4976 }, { "epoch": 2.45, "logps_train/chosen": -71.05927276611328, "logps_train/ref_chosen": -59.125, "logps_train/ref_rejected": -118.8125, "logps_train/rejected": -306.7077331542969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1919376850128174, "rewards_train/margins": 17.599833250045776, "rewards_train/rejected": -18.791770935058594, "step": 4976 }, { "epoch": 2.45, "learning_rate": 8.973333907731162e-08, "loss": 0.0003, "step": 4977 }, { "epoch": 2.45, "logps_train/chosen": -84.59038543701172, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -324.50201416015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8589895963668823, "rewards_train/margins": 17.95263683795929, "rewards_train/rejected": -19.811626434326172, "step": 4977 }, { "epoch": 2.45, "learning_rate": 8.957827423697823e-08, "loss": 0.0002, "step": 4978 }, { "epoch": 2.45, "logps_train/chosen": -81.42322540283203, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -332.8237609863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4828499555587769, "rewards_train/margins": 18.65860664844513, "rewards_train/rejected": -20.141456604003906, "step": 4978 }, { "epoch": 2.45, "learning_rate": 8.942333030965482e-08, "loss": 0.0, "step": 4979 }, { "epoch": 2.45, "logps_train/chosen": -83.15254211425781, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -323.6455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8131552934646606, "rewards_train/margins": 18.194268107414246, "rewards_train/rejected": -20.007423400878906, "step": 4979 }, { "epoch": 2.45, "learning_rate": 8.926850734098873e-08, "loss": 0.0, "step": 4980 }, { "epoch": 2.45, "logps_train/chosen": -86.72903442382812, "logps_train/ref_chosen": -68.375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -326.8369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8348665237426758, "rewards_train/margins": 17.977438926696777, "rewards_train/rejected": -19.812305450439453, "step": 4980 }, { "epoch": 2.45, "learning_rate": 8.911380537659197e-08, "loss": 0.0, "step": 4981 }, { "epoch": 2.45, "logps_train/chosen": -81.09004211425781, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -333.41693115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4610061645507812, "rewards_train/margins": 19.074731826782227, "rewards_train/rejected": -20.535737991333008, "step": 4981 }, { "epoch": 2.45, "learning_rate": 8.895922446204051e-08, "loss": 0.0, "step": 4982 }, { "epoch": 2.45, "logps_train/chosen": -82.33670806884766, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -327.4820251464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8387000560760498, "rewards_train/margins": 18.11726689338684, "rewards_train/rejected": -19.95596694946289, "step": 4982 }, { "epoch": 2.45, "learning_rate": 8.880476464287535e-08, "loss": 0.0002, "step": 4983 }, { "epoch": 2.45, "logps_train/chosen": -83.15667724609375, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -324.6523132324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8956971168518066, "rewards_train/margins": 18.046878337860107, "rewards_train/rejected": -19.942575454711914, "step": 4983 }, { "epoch": 2.45, "learning_rate": 8.865042596460109e-08, "loss": 0.0, "step": 4984 }, { "epoch": 2.45, "logps_train/chosen": -78.54434967041016, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -324.8042297363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4767980575561523, "rewards_train/margins": 18.56734561920166, "rewards_train/rejected": -20.044143676757812, "step": 4984 }, { "epoch": 2.45, "learning_rate": 8.849620847268691e-08, "loss": 0.0, "step": 4985 }, { "epoch": 2.45, "logps_train/chosen": -76.09410858154297, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -319.91680908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3430049419403076, "rewards_train/margins": 18.04135251045227, "rewards_train/rejected": -19.384357452392578, "step": 4985 }, { "epoch": 2.45, "learning_rate": 8.83421122125666e-08, "loss": 0.0001, "step": 4986 }, { "epoch": 2.45, "logps_train/chosen": -75.06465148925781, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -329.01727294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0516798496246338, "rewards_train/margins": 18.835206747055054, "rewards_train/rejected": -19.886886596679688, "step": 4986 }, { "epoch": 2.46, "learning_rate": 8.818813722963781e-08, "loss": 0.0, "step": 4987 }, { "epoch": 2.46, "logps_train/chosen": -77.90184020996094, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -312.1574401855469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4215075969696045, "rewards_train/margins": 17.51191258430481, "rewards_train/rejected": -18.933420181274414, "step": 4987 }, { "epoch": 2.46, "learning_rate": 8.80342835692624e-08, "loss": 0.0, "step": 4988 }, { "epoch": 2.46, "logps_train/chosen": -77.83087921142578, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -318.6722412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4675116539001465, "rewards_train/margins": 17.93740701675415, "rewards_train/rejected": -19.404918670654297, "step": 4988 }, { "epoch": 2.46, "learning_rate": 8.788055127676708e-08, "loss": 0.0, "step": 4989 }, { "epoch": 2.46, "logps_train/chosen": -81.64483642578125, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -337.6790771484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.455524206161499, "rewards_train/margins": 19.37376046180725, "rewards_train/rejected": -20.82928466796875, "step": 4989 }, { "epoch": 2.46, "learning_rate": 8.772694039744227e-08, "loss": 0.0, "step": 4990 }, { "epoch": 2.46, "logps_train/chosen": -80.84980773925781, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -339.0914001464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4316117763519287, "rewards_train/margins": 19.435831308364868, "rewards_train/rejected": -20.867443084716797, "step": 4990 }, { "epoch": 2.46, "learning_rate": 8.757345097654267e-08, "loss": 0.0, "step": 4991 }, { "epoch": 2.46, "logps_train/chosen": -75.80657196044922, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -307.21405029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3122003078460693, "rewards_train/margins": 17.429222345352173, "rewards_train/rejected": -18.741422653198242, "step": 4991 }, { "epoch": 2.46, "learning_rate": 8.742008305928727e-08, "loss": 0.0, "step": 4992 }, { "epoch": 2.46, "logps_train/chosen": -82.17677307128906, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -332.13507080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7159199714660645, "rewards_train/margins": 18.947293758392334, "rewards_train/rejected": -20.6632137298584, "step": 4992 }, { "epoch": 2.46, "learning_rate": 8.726683669085966e-08, "loss": 0.0001, "step": 4993 }, { "epoch": 2.46, "logps_train/chosen": -77.60696411132812, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -319.9007873535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3891146183013916, "rewards_train/margins": 18.033584356307983, "rewards_train/rejected": -19.422698974609375, "step": 4993 }, { "epoch": 2.46, "learning_rate": 8.711371191640676e-08, "loss": 0.0, "step": 4994 }, { "epoch": 2.46, "logps_train/chosen": -80.52130126953125, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -325.26629638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7904598712921143, "rewards_train/margins": 18.42303729057312, "rewards_train/rejected": -20.213497161865234, "step": 4994 }, { "epoch": 2.46, "learning_rate": 8.696070878104044e-08, "loss": 0.0, "step": 4995 }, { "epoch": 2.46, "logps_train/chosen": -84.43942260742188, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -341.91387939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7601042985916138, "rewards_train/margins": 19.526448130607605, "rewards_train/rejected": -21.28655242919922, "step": 4995 }, { "epoch": 2.46, "learning_rate": 8.680782732983643e-08, "loss": 0.0, "step": 4996 }, { "epoch": 2.46, "logps_train/chosen": -73.28141021728516, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -312.5858154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0136878490447998, "rewards_train/margins": 18.165499925613403, "rewards_train/rejected": -19.179187774658203, "step": 4996 }, { "epoch": 2.46, "learning_rate": 8.665506760783453e-08, "loss": 0.0, "step": 4997 }, { "epoch": 2.46, "logps_train/chosen": -79.75177001953125, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -315.0606689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6643857955932617, "rewards_train/margins": 17.549593925476074, "rewards_train/rejected": -19.213979721069336, "step": 4997 }, { "epoch": 2.46, "learning_rate": 8.650242966003896e-08, "loss": 0.0, "step": 4998 }, { "epoch": 2.46, "logps_train/chosen": -77.07164764404297, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -131.75, "logps_train/rejected": -331.25469970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9519889950752258, "rewards_train/margins": 18.998484790325165, "rewards_train/rejected": -19.95047378540039, "step": 4998 }, { "epoch": 2.46, "learning_rate": 8.63499135314178e-08, "loss": 0.0, "step": 4999 }, { "epoch": 2.46, "logps_train/chosen": -77.64471435546875, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -317.1957092285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5444031953811646, "rewards_train/margins": 18.00680911540985, "rewards_train/rejected": -19.551212310791016, "step": 4999 }, { "epoch": 2.46, "learning_rate": 8.619751926690316e-08, "loss": 0.0001, "step": 5000 }, { "epoch": 2.46, "logps_train/chosen": -83.48698425292969, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -321.960205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.746110439300537, "rewards_train/margins": 17.913191318511963, "rewards_train/rejected": -19.6593017578125, "step": 5000 }, { "epoch": 2.46, "learning_rate": 8.604524691139176e-08, "loss": 0.0001, "step": 5001 }, { "epoch": 2.46, "logps_train/chosen": -77.42741394042969, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -317.37451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3272138833999634, "rewards_train/margins": 17.89646852016449, "rewards_train/rejected": -19.223682403564453, "step": 5001 }, { "epoch": 2.46, "learning_rate": 8.589309650974386e-08, "loss": 0.0, "step": 5002 }, { "epoch": 2.46, "logps_train/chosen": -79.39118957519531, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -327.35382080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3774001598358154, "rewards_train/margins": 18.575461626052856, "rewards_train/rejected": -19.952861785888672, "step": 5002 }, { "epoch": 2.46, "learning_rate": 8.574106810678405e-08, "loss": 0.0, "step": 5003 }, { "epoch": 2.46, "logps_train/chosen": -78.25853729248047, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -319.736572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4450922012329102, "rewards_train/margins": 18.037989616394043, "rewards_train/rejected": -19.483081817626953, "step": 5003 }, { "epoch": 2.46, "learning_rate": 8.558916174730075e-08, "loss": 0.0001, "step": 5004 }, { "epoch": 2.46, "logps_train/chosen": -78.40619659423828, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -323.7963562011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2123485803604126, "rewards_train/margins": 18.498634696006775, "rewards_train/rejected": -19.710983276367188, "step": 5004 }, { "epoch": 2.46, "learning_rate": 8.543737747604684e-08, "loss": 0.0, "step": 5005 }, { "epoch": 2.46, "logps_train/chosen": -79.96145629882812, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -331.6260986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4466828107833862, "rewards_train/margins": 18.844639897346497, "rewards_train/rejected": -20.291322708129883, "step": 5005 }, { "epoch": 2.46, "learning_rate": 8.528571533773893e-08, "loss": 0.0, "step": 5006 }, { "epoch": 2.46, "logps_train/chosen": -78.53555297851562, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -322.11114501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.499404788017273, "rewards_train/margins": 18.031627774238586, "rewards_train/rejected": -19.53103256225586, "step": 5006 }, { "epoch": 2.47, "learning_rate": 8.513417537705758e-08, "loss": 0.0001, "step": 5007 }, { "epoch": 2.47, "logps_train/chosen": -74.61161041259766, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -324.013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.282938003540039, "rewards_train/margins": 18.566038131713867, "rewards_train/rejected": -19.848976135253906, "step": 5007 }, { "epoch": 2.47, "learning_rate": 8.498275763864781e-08, "loss": 0.0, "step": 5008 }, { "epoch": 2.47, "logps_train/chosen": -83.08305358886719, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -321.10498046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.737504482269287, "rewards_train/margins": 17.760395526885986, "rewards_train/rejected": -19.497900009155273, "step": 5008 }, { "epoch": 2.47, "learning_rate": 8.483146216711778e-08, "loss": 0.0, "step": 5009 }, { "epoch": 2.47, "logps_train/chosen": -79.986572265625, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -331.1077880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6075927019119263, "rewards_train/margins": 18.824427485466003, "rewards_train/rejected": -20.43202018737793, "step": 5009 }, { "epoch": 2.47, "learning_rate": 8.468028900704049e-08, "loss": 0.0, "step": 5010 }, { "epoch": 2.47, "logps_train/chosen": -82.59107971191406, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -333.8710021972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6567153930664062, "rewards_train/margins": 18.676136016845703, "rewards_train/rejected": -20.33285140991211, "step": 5010 }, { "epoch": 2.47, "learning_rate": 8.452923820295282e-08, "loss": 0.0, "step": 5011 }, { "epoch": 2.47, "logps_train/chosen": -74.77599334716797, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -319.61309814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2891230583190918, "rewards_train/margins": 18.239372730255127, "rewards_train/rejected": -19.52849578857422, "step": 5011 }, { "epoch": 2.47, "learning_rate": 8.437830979935478e-08, "loss": 0.0002, "step": 5012 }, { "epoch": 2.47, "logps_train/chosen": -82.44096374511719, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -134.875, "logps_train/rejected": -344.50042724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5075733661651611, "rewards_train/margins": 19.457804441452026, "rewards_train/rejected": -20.965377807617188, "step": 5012 }, { "epoch": 2.47, "learning_rate": 8.422750384071132e-08, "loss": 0.0, "step": 5013 }, { "epoch": 2.47, "logps_train/chosen": -84.88300323486328, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -333.15631103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7211616039276123, "rewards_train/margins": 18.46390652656555, "rewards_train/rejected": -20.185068130493164, "step": 5013 }, { "epoch": 2.47, "learning_rate": 8.407682037145075e-08, "loss": 0.0002, "step": 5014 }, { "epoch": 2.47, "logps_train/chosen": -80.06306457519531, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -320.2171630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5958080291748047, "rewards_train/margins": 18.111501693725586, "rewards_train/rejected": -19.70730972290039, "step": 5014 }, { "epoch": 2.47, "learning_rate": 8.392625943596538e-08, "loss": 0.0, "step": 5015 }, { "epoch": 2.47, "logps_train/chosen": -80.13938903808594, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -320.76812744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5121077299118042, "rewards_train/margins": 18.114412426948547, "rewards_train/rejected": -19.62652015686035, "step": 5015 }, { "epoch": 2.47, "learning_rate": 8.377582107861159e-08, "loss": 0.0003, "step": 5016 }, { "epoch": 2.47, "logps_train/chosen": -85.28181457519531, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -335.7640075683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.839216947555542, "rewards_train/margins": 18.922486543655396, "rewards_train/rejected": -20.761703491210938, "step": 5016 }, { "epoch": 2.47, "learning_rate": 8.362550534370955e-08, "loss": 0.0, "step": 5017 }, { "epoch": 2.47, "logps_train/chosen": -86.50556945800781, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -338.9557189941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.898945927619934, "rewards_train/margins": 18.92895019054413, "rewards_train/rejected": -20.827896118164062, "step": 5017 }, { "epoch": 2.47, "learning_rate": 8.347531227554322e-08, "loss": 0.0, "step": 5018 }, { "epoch": 2.47, "logps_train/chosen": -79.447509765625, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -325.282470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6176512241363525, "rewards_train/margins": 18.6020028591156, "rewards_train/rejected": -20.219654083251953, "step": 5018 }, { "epoch": 2.47, "learning_rate": 8.332524191836037e-08, "loss": 0.0, "step": 5019 }, { "epoch": 2.47, "logps_train/chosen": -83.13906860351562, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -325.217529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7665436267852783, "rewards_train/margins": 17.94720149040222, "rewards_train/rejected": -19.7137451171875, "step": 5019 }, { "epoch": 2.47, "learning_rate": 8.3175294316373e-08, "loss": 0.0, "step": 5020 }, { "epoch": 2.47, "logps_train/chosen": -76.60879516601562, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -325.7260437011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3052891492843628, "rewards_train/margins": 18.87893784046173, "rewards_train/rejected": -20.184226989746094, "step": 5020 }, { "epoch": 2.47, "learning_rate": 8.302546951375656e-08, "loss": 0.0, "step": 5021 }, { "epoch": 2.47, "logps_train/chosen": -79.42743682861328, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -326.68096923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5038275718688965, "rewards_train/margins": 18.858654499053955, "rewards_train/rejected": -20.36248207092285, "step": 5021 }, { "epoch": 2.47, "learning_rate": 8.287576755465031e-08, "loss": 0.0, "step": 5022 }, { "epoch": 2.47, "logps_train/chosen": -81.62892150878906, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -340.0897216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5940932035446167, "rewards_train/margins": 19.451011776924133, "rewards_train/rejected": -21.04510498046875, "step": 5022 }, { "epoch": 2.47, "learning_rate": 8.272618848315788e-08, "loss": 0.0, "step": 5023 }, { "epoch": 2.47, "logps_train/chosen": -81.67771911621094, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -332.09674072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5599596500396729, "rewards_train/margins": 18.689117193222046, "rewards_train/rejected": -20.24907684326172, "step": 5023 }, { "epoch": 2.47, "learning_rate": 8.257673234334567e-08, "loss": 0.0, "step": 5024 }, { "epoch": 2.47, "logps_train/chosen": -85.23273468017578, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -331.91937255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.066681385040283, "rewards_train/margins": 18.589223384857178, "rewards_train/rejected": -20.65590476989746, "step": 5024 }, { "epoch": 2.47, "learning_rate": 8.242739917924486e-08, "loss": 0.0, "step": 5025 }, { "epoch": 2.47, "logps_train/chosen": -78.1259765625, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -322.2864074707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3026373386383057, "rewards_train/margins": 18.014772176742554, "rewards_train/rejected": -19.31740951538086, "step": 5025 }, { "epoch": 2.47, "learning_rate": 8.227818903485013e-08, "loss": 0.0, "step": 5026 }, { "epoch": 2.47, "logps_train/chosen": -74.21954345703125, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -309.5377502441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1638489961624146, "rewards_train/margins": 17.621078372001648, "rewards_train/rejected": -18.784927368164062, "step": 5026 }, { "epoch": 2.48, "learning_rate": 8.212910195411937e-08, "loss": 0.0, "step": 5027 }, { "epoch": 2.48, "logps_train/chosen": -79.458984375, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -320.6954345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.666698932647705, "rewards_train/margins": 17.89766836166382, "rewards_train/rejected": -19.564367294311523, "step": 5027 }, { "epoch": 2.48, "learning_rate": 8.198013798097497e-08, "loss": 0.0002, "step": 5028 }, { "epoch": 2.48, "logps_train/chosen": -75.27499389648438, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -321.1014404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1789641380310059, "rewards_train/margins": 18.582647800445557, "rewards_train/rejected": -19.761611938476562, "step": 5028 }, { "epoch": 2.48, "learning_rate": 8.183129715930264e-08, "loss": 0.0, "step": 5029 }, { "epoch": 2.48, "logps_train/chosen": -74.74697875976562, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -329.4427795410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2026028633117676, "rewards_train/margins": 19.074439525604248, "rewards_train/rejected": -20.277042388916016, "step": 5029 }, { "epoch": 2.48, "learning_rate": 8.168257953295177e-08, "loss": 0.0, "step": 5030 }, { "epoch": 2.48, "logps_train/chosen": -78.114013671875, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -336.1745300292969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3253660202026367, "rewards_train/margins": 19.038474082946777, "rewards_train/rejected": -20.363840103149414, "step": 5030 }, { "epoch": 2.48, "learning_rate": 8.153398514573578e-08, "loss": 0.0, "step": 5031 }, { "epoch": 2.48, "logps_train/chosen": -80.1566162109375, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -323.67169189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4888054132461548, "rewards_train/margins": 18.34716260433197, "rewards_train/rejected": -19.835968017578125, "step": 5031 }, { "epoch": 2.48, "learning_rate": 8.138551404143146e-08, "loss": 0.0, "step": 5032 }, { "epoch": 2.48, "logps_train/chosen": -76.237548828125, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -327.56036376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2966070175170898, "rewards_train/margins": 19.129448890686035, "rewards_train/rejected": -20.426055908203125, "step": 5032 }, { "epoch": 2.48, "learning_rate": 8.123716626377941e-08, "loss": 0.0, "step": 5033 }, { "epoch": 2.48, "logps_train/chosen": -77.04287719726562, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -330.9268798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.293447732925415, "rewards_train/margins": 18.580297231674194, "rewards_train/rejected": -19.87374496459961, "step": 5033 }, { "epoch": 2.48, "learning_rate": 8.10889418564838e-08, "loss": 0.0, "step": 5034 }, { "epoch": 2.48, "logps_train/chosen": -84.1414566040039, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -327.62835693359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.826450228691101, "rewards_train/margins": 18.221447110176086, "rewards_train/rejected": -20.047897338867188, "step": 5034 }, { "epoch": 2.48, "learning_rate": 8.094084086321268e-08, "loss": 0.0, "step": 5035 }, { "epoch": 2.48, "logps_train/chosen": -73.20887756347656, "logps_train/ref_chosen": -60.375, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -310.30877685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2826552391052246, "rewards_train/margins": 17.79236650466919, "rewards_train/rejected": -19.075021743774414, "step": 5035 }, { "epoch": 2.48, "learning_rate": 8.079286332759761e-08, "loss": 0.0, "step": 5036 }, { "epoch": 2.48, "logps_train/chosen": -83.6137466430664, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -326.684326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9166969060897827, "rewards_train/margins": 18.40876543521881, "rewards_train/rejected": -20.325462341308594, "step": 5036 }, { "epoch": 2.48, "learning_rate": 8.064500929323359e-08, "loss": 0.0001, "step": 5037 }, { "epoch": 2.48, "logps_train/chosen": -77.2624740600586, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -317.0625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3323023319244385, "rewards_train/margins": 18.217065572738647, "rewards_train/rejected": -19.549367904663086, "step": 5037 }, { "epoch": 2.48, "learning_rate": 8.049727880367968e-08, "loss": 0.0001, "step": 5038 }, { "epoch": 2.48, "logps_train/chosen": -82.18708038330078, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -329.7679138183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6172430515289307, "rewards_train/margins": 18.429664373397827, "rewards_train/rejected": -20.046907424926758, "step": 5038 }, { "epoch": 2.48, "learning_rate": 8.034967190245811e-08, "loss": 0.0, "step": 5039 }, { "epoch": 2.48, "logps_train/chosen": -78.9416732788086, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -321.07818603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.425417423248291, "rewards_train/margins": 18.271758556365967, "rewards_train/rejected": -19.697175979614258, "step": 5039 }, { "epoch": 2.48, "learning_rate": 8.020218863305489e-08, "loss": 0.0, "step": 5040 }, { "epoch": 2.48, "logps_train/chosen": -76.30624389648438, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -333.86480712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2183196544647217, "rewards_train/margins": 18.96728253364563, "rewards_train/rejected": -20.18560218811035, "step": 5040 }, { "epoch": 2.48, "learning_rate": 8.005482903891952e-08, "loss": 0.0, "step": 5041 }, { "epoch": 2.48, "logps_train/chosen": -76.52059936523438, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -325.8789367675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3919034004211426, "rewards_train/margins": 18.78505277633667, "rewards_train/rejected": -20.176956176757812, "step": 5041 }, { "epoch": 2.48, "learning_rate": 7.99075931634653e-08, "loss": 0.0, "step": 5042 }, { "epoch": 2.48, "logps_train/chosen": -79.12652587890625, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -316.34442138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5412172079086304, "rewards_train/margins": 18.096447587013245, "rewards_train/rejected": -19.637664794921875, "step": 5042 }, { "epoch": 2.48, "learning_rate": 7.976048105006877e-08, "loss": 0.0, "step": 5043 }, { "epoch": 2.48, "logps_train/chosen": -76.43695068359375, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -328.7840270996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.47743558883667, "rewards_train/margins": 18.89881944656372, "rewards_train/rejected": -20.37625503540039, "step": 5043 }, { "epoch": 2.48, "learning_rate": 7.961349274207013e-08, "loss": 0.0, "step": 5044 }, { "epoch": 2.48, "logps_train/chosen": -77.86328125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -327.1373291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2835450172424316, "rewards_train/margins": 18.76680898666382, "rewards_train/rejected": -20.05035400390625, "step": 5044 }, { "epoch": 2.48, "learning_rate": 7.946662828277345e-08, "loss": 0.0, "step": 5045 }, { "epoch": 2.48, "logps_train/chosen": -80.44831848144531, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -318.221435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6676839590072632, "rewards_train/margins": 17.608805298805237, "rewards_train/rejected": -19.2764892578125, "step": 5045 }, { "epoch": 2.48, "learning_rate": 7.931988771544545e-08, "loss": 0.0, "step": 5046 }, { "epoch": 2.48, "logps_train/chosen": -77.54331970214844, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -329.7957763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.385094165802002, "rewards_train/margins": 19.147855281829834, "rewards_train/rejected": -20.532949447631836, "step": 5046 }, { "epoch": 2.48, "learning_rate": 7.917327108331734e-08, "loss": 0.0, "step": 5047 }, { "epoch": 2.48, "logps_train/chosen": -86.25861358642578, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -317.6141052246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.070343494415283, "rewards_train/margins": 17.191409587860107, "rewards_train/rejected": -19.26175308227539, "step": 5047 }, { "epoch": 2.49, "learning_rate": 7.902677842958316e-08, "loss": 0.0, "step": 5048 }, { "epoch": 2.49, "logps_train/chosen": -77.62760925292969, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -318.5291748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.440373182296753, "rewards_train/margins": 18.08647084236145, "rewards_train/rejected": -19.526844024658203, "step": 5048 }, { "epoch": 2.49, "learning_rate": 7.888040979740063e-08, "loss": 0.0, "step": 5049 }, { "epoch": 2.49, "logps_train/chosen": -81.507568359375, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -336.2452087402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.727416753768921, "rewards_train/margins": 19.12024712562561, "rewards_train/rejected": -20.84766387939453, "step": 5049 }, { "epoch": 2.49, "learning_rate": 7.873416522989107e-08, "loss": 0.0, "step": 5050 }, { "epoch": 2.49, "logps_train/chosen": -81.21351623535156, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -323.4351806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6961078643798828, "rewards_train/margins": 18.248680114746094, "rewards_train/rejected": -19.944787979125977, "step": 5050 }, { "epoch": 2.49, "learning_rate": 7.858804477013903e-08, "loss": 0.0, "step": 5051 }, { "epoch": 2.49, "logps_train/chosen": -78.84381103515625, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -328.5339660644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5336968898773193, "rewards_train/margins": 18.591184854507446, "rewards_train/rejected": -20.124881744384766, "step": 5051 }, { "epoch": 2.49, "learning_rate": 7.844204846119246e-08, "loss": 0.0, "step": 5052 }, { "epoch": 2.49, "logps_train/chosen": -80.89237976074219, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -341.21417236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5353320837020874, "rewards_train/margins": 19.47202455997467, "rewards_train/rejected": -21.007356643676758, "step": 5052 }, { "epoch": 2.49, "learning_rate": 7.829617634606311e-08, "loss": 0.0, "step": 5053 }, { "epoch": 2.49, "logps_train/chosen": -80.5341567993164, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -317.8382568359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.371359944343567, "rewards_train/margins": 17.847329020500183, "rewards_train/rejected": -19.21868896484375, "step": 5053 }, { "epoch": 2.49, "learning_rate": 7.81504284677258e-08, "loss": 0.0, "step": 5054 }, { "epoch": 2.49, "logps_train/chosen": -78.48826599121094, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -314.8449401855469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4578595161437988, "rewards_train/margins": 17.443822383880615, "rewards_train/rejected": -18.901681900024414, "step": 5054 }, { "epoch": 2.49, "learning_rate": 7.800480486911876e-08, "loss": 0.0, "step": 5055 }, { "epoch": 2.49, "logps_train/chosen": -80.47050476074219, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -326.1395263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7773728370666504, "rewards_train/margins": 18.19600248336792, "rewards_train/rejected": -19.97337532043457, "step": 5055 }, { "epoch": 2.49, "learning_rate": 7.785930559314363e-08, "loss": 0.0, "step": 5056 }, { "epoch": 2.49, "logps_train/chosen": -78.02241516113281, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -311.870361328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5510210990905762, "rewards_train/margins": 17.484062671661377, "rewards_train/rejected": -19.035083770751953, "step": 5056 }, { "epoch": 2.49, "learning_rate": 7.771393068266569e-08, "loss": 0.0, "step": 5057 }, { "epoch": 2.49, "logps_train/chosen": -83.4299545288086, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -331.9824523925781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.820534586906433, "rewards_train/margins": 18.629809498786926, "rewards_train/rejected": -20.45034408569336, "step": 5057 }, { "epoch": 2.49, "learning_rate": 7.756868018051322e-08, "loss": 0.0, "step": 5058 }, { "epoch": 2.49, "logps_train/chosen": -83.86781311035156, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -334.93170166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8537249565124512, "rewards_train/margins": 18.847017765045166, "rewards_train/rejected": -20.700742721557617, "step": 5058 }, { "epoch": 2.49, "learning_rate": 7.742355412947799e-08, "loss": 0.0, "step": 5059 }, { "epoch": 2.49, "logps_train/chosen": -74.24129486083984, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -327.19854736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0879967212677002, "rewards_train/margins": 18.820042371749878, "rewards_train/rejected": -19.908039093017578, "step": 5059 }, { "epoch": 2.49, "learning_rate": 7.727855257231536e-08, "loss": 0.0, "step": 5060 }, { "epoch": 2.49, "logps_train/chosen": -79.10382080078125, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -328.7973327636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4435369968414307, "rewards_train/margins": 19.03687834739685, "rewards_train/rejected": -20.48041534423828, "step": 5060 }, { "epoch": 2.49, "learning_rate": 7.713367555174333e-08, "loss": 0.0, "step": 5061 }, { "epoch": 2.49, "logps_train/chosen": -80.55747985839844, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -317.529541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6531606912612915, "rewards_train/margins": 17.898085951805115, "rewards_train/rejected": -19.551246643066406, "step": 5061 }, { "epoch": 2.49, "learning_rate": 7.698892311044386e-08, "loss": 0.0, "step": 5062 }, { "epoch": 2.49, "logps_train/chosen": -71.59999084472656, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -310.83514404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9828752279281616, "rewards_train/margins": 17.907280564308167, "rewards_train/rejected": -18.890155792236328, "step": 5062 }, { "epoch": 2.49, "learning_rate": 7.684429529106229e-08, "loss": 0.0, "step": 5063 }, { "epoch": 2.49, "logps_train/chosen": -79.41316223144531, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -325.44061279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3133376836776733, "rewards_train/margins": 18.650548577308655, "rewards_train/rejected": -19.963886260986328, "step": 5063 }, { "epoch": 2.49, "learning_rate": 7.669979213620642e-08, "loss": 0.0, "step": 5064 }, { "epoch": 2.49, "logps_train/chosen": -80.81173706054688, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -130.375, "logps_train/rejected": -326.0669860839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6262422800064087, "rewards_train/margins": 17.941346526145935, "rewards_train/rejected": -19.567588806152344, "step": 5064 }, { "epoch": 2.49, "learning_rate": 7.655541368844825e-08, "loss": 0.0, "step": 5065 }, { "epoch": 2.49, "logps_train/chosen": -74.43777465820312, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -318.6423034667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.242410659790039, "rewards_train/margins": 18.0854434967041, "rewards_train/rejected": -19.32785415649414, "step": 5065 }, { "epoch": 2.49, "learning_rate": 7.64111599903225e-08, "loss": 0.0001, "step": 5066 }, { "epoch": 2.49, "logps_train/chosen": -77.50468444824219, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -320.5941162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1481730937957764, "rewards_train/margins": 18.28194546699524, "rewards_train/rejected": -19.430118560791016, "step": 5066 }, { "epoch": 2.49, "learning_rate": 7.62670310843272e-08, "loss": 0.0, "step": 5067 }, { "epoch": 2.49, "logps_train/chosen": -81.89848327636719, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -326.8465270996094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8034709692001343, "rewards_train/margins": 18.079909920692444, "rewards_train/rejected": -19.883380889892578, "step": 5067 }, { "epoch": 2.5, "learning_rate": 7.612302701292389e-08, "loss": 0.0, "step": 5068 }, { "epoch": 2.5, "logps_train/chosen": -80.82086181640625, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -321.94378662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6002988815307617, "rewards_train/margins": 17.945204734802246, "rewards_train/rejected": -19.545503616333008, "step": 5068 }, { "epoch": 2.5, "learning_rate": 7.597914781853704e-08, "loss": 0.0, "step": 5069 }, { "epoch": 2.5, "logps_train/chosen": -73.95748901367188, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -319.0020751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.166330337524414, "rewards_train/margins": 18.074987411499023, "rewards_train/rejected": -19.241317749023438, "step": 5069 }, { "epoch": 2.5, "learning_rate": 7.583539354355445e-08, "loss": 0.0, "step": 5070 }, { "epoch": 2.5, "logps_train/chosen": -79.34196472167969, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -326.19842529296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5791668891906738, "rewards_train/margins": 18.228806972503662, "rewards_train/rejected": -19.807973861694336, "step": 5070 }, { "epoch": 2.5, "learning_rate": 7.569176423032691e-08, "loss": 0.0, "step": 5071 }, { "epoch": 2.5, "logps_train/chosen": -78.03265380859375, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -323.13128662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.370867371559143, "rewards_train/margins": 18.379468321800232, "rewards_train/rejected": -19.750335693359375, "step": 5071 }, { "epoch": 2.5, "learning_rate": 7.554825992116897e-08, "loss": 0.0, "step": 5072 }, { "epoch": 2.5, "logps_train/chosen": -77.687744140625, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -328.1835632324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.338085651397705, "rewards_train/margins": 19.000728130340576, "rewards_train/rejected": -20.33881378173828, "step": 5072 }, { "epoch": 2.5, "learning_rate": 7.54048806583577e-08, "loss": 0.0001, "step": 5073 }, { "epoch": 2.5, "logps_train/chosen": -86.2943115234375, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -333.7584533691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8935421705245972, "rewards_train/margins": 18.229079365730286, "rewards_train/rejected": -20.122621536254883, "step": 5073 }, { "epoch": 2.5, "learning_rate": 7.526162648413354e-08, "loss": 0.0, "step": 5074 }, { "epoch": 2.5, "logps_train/chosen": -84.84451293945312, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -326.8199157714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9452418088912964, "rewards_train/margins": 18.001495480537415, "rewards_train/rejected": -19.94673728942871, "step": 5074 }, { "epoch": 2.5, "learning_rate": 7.511849744070048e-08, "loss": 0.0, "step": 5075 }, { "epoch": 2.5, "logps_train/chosen": -81.14876556396484, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -330.08428955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5438804626464844, "rewards_train/margins": 18.83847427368164, "rewards_train/rejected": -20.382354736328125, "step": 5075 }, { "epoch": 2.5, "learning_rate": 7.497549357022487e-08, "loss": 0.0, "step": 5076 }, { "epoch": 2.5, "logps_train/chosen": -76.18283081054688, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -316.25030517578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2527556419372559, "rewards_train/margins": 18.066511631011963, "rewards_train/rejected": -19.31926727294922, "step": 5076 }, { "epoch": 2.5, "learning_rate": 7.483261491483688e-08, "loss": 0.0, "step": 5077 }, { "epoch": 2.5, "logps_train/chosen": -78.71654510498047, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -133.875, "logps_train/rejected": -340.95941162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3255119323730469, "rewards_train/margins": 19.378536224365234, "rewards_train/rejected": -20.70404815673828, "step": 5077 }, { "epoch": 2.5, "learning_rate": 7.468986151662954e-08, "loss": 0.0, "step": 5078 }, { "epoch": 2.5, "logps_train/chosen": -84.60635375976562, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -325.66058349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8367582559585571, "rewards_train/margins": 18.012309432029724, "rewards_train/rejected": -19.84906768798828, "step": 5078 }, { "epoch": 2.5, "learning_rate": 7.454723341765872e-08, "loss": 0.0002, "step": 5079 }, { "epoch": 2.5, "logps_train/chosen": -80.96487426757812, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -315.8953857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7662142515182495, "rewards_train/margins": 17.5164395570755, "rewards_train/rejected": -19.28265380859375, "step": 5079 }, { "epoch": 2.5, "learning_rate": 7.44047306599439e-08, "loss": 0.0, "step": 5080 }, { "epoch": 2.5, "logps_train/chosen": -76.04585266113281, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -319.65167236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3349075317382812, "rewards_train/margins": 18.39129638671875, "rewards_train/rejected": -19.72620391845703, "step": 5080 }, { "epoch": 2.5, "learning_rate": 7.426235328546726e-08, "loss": 0.0, "step": 5081 }, { "epoch": 2.5, "logps_train/chosen": -74.51858520507812, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -328.96795654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1462429761886597, "rewards_train/margins": 19.23541748523712, "rewards_train/rejected": -20.38166046142578, "step": 5081 }, { "epoch": 2.5, "learning_rate": 7.412010133617413e-08, "loss": 0.0, "step": 5082 }, { "epoch": 2.5, "logps_train/chosen": -78.05668640136719, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -310.6929016113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5576465129852295, "rewards_train/margins": 17.260083436965942, "rewards_train/rejected": -18.817729949951172, "step": 5082 }, { "epoch": 2.5, "learning_rate": 7.397797485397273e-08, "loss": 0.0, "step": 5083 }, { "epoch": 2.5, "logps_train/chosen": -81.86955261230469, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -321.95123291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6404707431793213, "rewards_train/margins": 17.821840047836304, "rewards_train/rejected": -19.462310791015625, "step": 5083 }, { "epoch": 2.5, "learning_rate": 7.383597388073481e-08, "loss": 0.0, "step": 5084 }, { "epoch": 2.5, "logps_train/chosen": -79.86875915527344, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -332.61962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.252207636833191, "rewards_train/margins": 18.98973572254181, "rewards_train/rejected": -20.241943359375, "step": 5084 }, { "epoch": 2.5, "learning_rate": 7.369409845829461e-08, "loss": 0.0, "step": 5085 }, { "epoch": 2.5, "logps_train/chosen": -79.74363708496094, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -318.0537414550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6398906707763672, "rewards_train/margins": 17.62603187561035, "rewards_train/rejected": -19.26592254638672, "step": 5085 }, { "epoch": 2.5, "learning_rate": 7.355234862844944e-08, "loss": 0.0, "step": 5086 }, { "epoch": 2.5, "logps_train/chosen": -77.7841796875, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -315.79620361328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4819822311401367, "rewards_train/margins": 17.959403038024902, "rewards_train/rejected": -19.44138526916504, "step": 5086 }, { "epoch": 2.5, "learning_rate": 7.34107244329601e-08, "loss": 0.0, "step": 5087 }, { "epoch": 2.5, "logps_train/chosen": -82.4303207397461, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -325.950927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8223285675048828, "rewards_train/margins": 18.19434928894043, "rewards_train/rejected": -20.016677856445312, "step": 5087 }, { "epoch": 2.51, "learning_rate": 7.326922591354989e-08, "loss": 0.0, "step": 5088 }, { "epoch": 2.51, "logps_train/chosen": -80.17524719238281, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -328.9116516113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.366011142730713, "rewards_train/margins": 18.98277235031128, "rewards_train/rejected": -20.348783493041992, "step": 5088 }, { "epoch": 2.51, "learning_rate": 7.312785311190506e-08, "loss": 0.0, "step": 5089 }, { "epoch": 2.51, "logps_train/chosen": -79.59392547607422, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -313.35601806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3720147609710693, "rewards_train/margins": 17.80714249610901, "rewards_train/rejected": -19.179157257080078, "step": 5089 }, { "epoch": 2.51, "learning_rate": 7.298660606967521e-08, "loss": 0.0, "step": 5090 }, { "epoch": 2.51, "logps_train/chosen": -76.31437683105469, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -313.6940612792969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4561938047409058, "rewards_train/margins": 17.52834904193878, "rewards_train/rejected": -18.984542846679688, "step": 5090 }, { "epoch": 2.51, "learning_rate": 7.284548482847269e-08, "loss": 0.0, "step": 5091 }, { "epoch": 2.51, "logps_train/chosen": -74.66680145263672, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -317.4039611816406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0344538688659668, "rewards_train/margins": 18.633774280548096, "rewards_train/rejected": -19.668228149414062, "step": 5091 }, { "epoch": 2.51, "learning_rate": 7.270448942987262e-08, "loss": 0.0, "step": 5092 }, { "epoch": 2.51, "logps_train/chosen": -80.80311584472656, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -329.58154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4435436725616455, "rewards_train/margins": 18.587366819381714, "rewards_train/rejected": -20.03091049194336, "step": 5092 }, { "epoch": 2.51, "learning_rate": 7.25636199154131e-08, "loss": 0.0, "step": 5093 }, { "epoch": 2.51, "logps_train/chosen": -76.81096649169922, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -319.8591613769531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3897881507873535, "rewards_train/margins": 17.94691038131714, "rewards_train/rejected": -19.336698532104492, "step": 5093 }, { "epoch": 2.51, "learning_rate": 7.242287632659555e-08, "loss": 0.0002, "step": 5094 }, { "epoch": 2.51, "logps_train/chosen": -81.98521423339844, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -329.98590087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.766880750656128, "rewards_train/margins": 18.505732774734497, "rewards_train/rejected": -20.272613525390625, "step": 5094 }, { "epoch": 2.51, "learning_rate": 7.228225870488386e-08, "loss": 0.0, "step": 5095 }, { "epoch": 2.51, "logps_train/chosen": -76.36908721923828, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -313.44384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.420917272567749, "rewards_train/margins": 17.836605310440063, "rewards_train/rejected": -19.257522583007812, "step": 5095 }, { "epoch": 2.51, "learning_rate": 7.214176709170483e-08, "loss": 0.0, "step": 5096 }, { "epoch": 2.51, "logps_train/chosen": -84.73457336425781, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -333.3088684082031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8019728660583496, "rewards_train/margins": 18.391560077667236, "rewards_train/rejected": -20.193532943725586, "step": 5096 }, { "epoch": 2.51, "learning_rate": 7.20014015284483e-08, "loss": 0.0, "step": 5097 }, { "epoch": 2.51, "logps_train/chosen": -78.64630126953125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -332.44586181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3619928359985352, "rewards_train/margins": 18.96189022064209, "rewards_train/rejected": -20.323883056640625, "step": 5097 }, { "epoch": 2.51, "learning_rate": 7.186116205646686e-08, "loss": 0.0, "step": 5098 }, { "epoch": 2.51, "logps_train/chosen": -77.86170959472656, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -327.5059814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2790172100067139, "rewards_train/margins": 19.16640591621399, "rewards_train/rejected": -20.445423126220703, "step": 5098 }, { "epoch": 2.51, "learning_rate": 7.172104871707612e-08, "loss": 0.0, "step": 5099 }, { "epoch": 2.51, "logps_train/chosen": -80.96087646484375, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -328.1159362792969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6348084211349487, "rewards_train/margins": 18.425027012825012, "rewards_train/rejected": -20.05983543395996, "step": 5099 }, { "epoch": 2.51, "learning_rate": 7.158106155155435e-08, "loss": 0.0, "step": 5100 }, { "epoch": 2.51, "logps_train/chosen": -78.80160522460938, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -311.23651123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4959807395935059, "rewards_train/margins": 17.536850452423096, "rewards_train/rejected": -19.0328311920166, "step": 5100 }, { "epoch": 2.51, "learning_rate": 7.144120060114261e-08, "loss": 0.0003, "step": 5101 }, { "epoch": 2.51, "logps_train/chosen": -88.7508544921875, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -343.9310302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.12288761138916, "rewards_train/margins": 19.253220558166504, "rewards_train/rejected": -21.376108169555664, "step": 5101 }, { "epoch": 2.51, "learning_rate": 7.130146590704512e-08, "loss": 0.0, "step": 5102 }, { "epoch": 2.51, "logps_train/chosen": -76.92839050292969, "logps_train/ref_chosen": -61.21875, "logps_train/ref_rejected": -117.875, "logps_train/rejected": -313.4393310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5724282264709473, "rewards_train/margins": 17.986642360687256, "rewards_train/rejected": -19.559070587158203, "step": 5102 }, { "epoch": 2.51, "learning_rate": 7.116185751042852e-08, "loss": 0.0, "step": 5103 }, { "epoch": 2.51, "logps_train/chosen": -78.90415954589844, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -321.499267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5720806121826172, "rewards_train/margins": 18.145963668823242, "rewards_train/rejected": -19.71804428100586, "step": 5103 }, { "epoch": 2.51, "learning_rate": 7.102237545242229e-08, "loss": 0.0, "step": 5104 }, { "epoch": 2.51, "logps_train/chosen": -83.26920318603516, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -329.2723388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6274573802947998, "rewards_train/margins": 18.519503831863403, "rewards_train/rejected": -20.146961212158203, "step": 5104 }, { "epoch": 2.51, "learning_rate": 7.088301977411898e-08, "loss": 0.0, "step": 5105 }, { "epoch": 2.51, "logps_train/chosen": -77.1565170288086, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -322.703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.253786563873291, "rewards_train/margins": 18.133418560028076, "rewards_train/rejected": -19.387205123901367, "step": 5105 }, { "epoch": 2.51, "learning_rate": 7.074379051657364e-08, "loss": 0.0, "step": 5106 }, { "epoch": 2.51, "logps_train/chosen": -82.88992309570312, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -312.80621337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7012968063354492, "rewards_train/margins": 17.26760768890381, "rewards_train/rejected": -18.968904495239258, "step": 5106 }, { "epoch": 2.51, "learning_rate": 7.060468772080408e-08, "loss": 0.0, "step": 5107 }, { "epoch": 2.51, "logps_train/chosen": -81.61752319335938, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -317.50604248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7597506046295166, "rewards_train/margins": 17.75882315635681, "rewards_train/rejected": -19.518573760986328, "step": 5107 }, { "epoch": 2.52, "learning_rate": 7.046571142779095e-08, "loss": 0.0001, "step": 5108 }, { "epoch": 2.52, "logps_train/chosen": -80.98072814941406, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -317.47021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6569106578826904, "rewards_train/margins": 17.694411516189575, "rewards_train/rejected": -19.351322174072266, "step": 5108 }, { "epoch": 2.52, "learning_rate": 7.032686167847768e-08, "loss": 0.0, "step": 5109 }, { "epoch": 2.52, "logps_train/chosen": -76.68245697021484, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -326.85052490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1697595119476318, "rewards_train/margins": 18.868223428726196, "rewards_train/rejected": -20.037982940673828, "step": 5109 }, { "epoch": 2.52, "learning_rate": 7.018813851377031e-08, "loss": 0.0, "step": 5110 }, { "epoch": 2.52, "logps_train/chosen": -79.53117370605469, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -328.198974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.515080213546753, "rewards_train/margins": 18.461215257644653, "rewards_train/rejected": -19.976295471191406, "step": 5110 }, { "epoch": 2.52, "learning_rate": 7.004954197453749e-08, "loss": 0.0, "step": 5111 }, { "epoch": 2.52, "logps_train/chosen": -78.59719848632812, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -329.8089599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2145050764083862, "rewards_train/margins": 18.942072987556458, "rewards_train/rejected": -20.156578063964844, "step": 5111 }, { "epoch": 2.52, "learning_rate": 6.991107210161102e-08, "loss": 0.0, "step": 5112 }, { "epoch": 2.52, "logps_train/chosen": -81.26529693603516, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -326.5079345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.666129469871521, "rewards_train/margins": 18.467135071754456, "rewards_train/rejected": -20.133264541625977, "step": 5112 }, { "epoch": 2.52, "learning_rate": 6.977272893578461e-08, "loss": 0.0, "step": 5113 }, { "epoch": 2.52, "logps_train/chosen": -74.77656555175781, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -312.3671569824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.193281888961792, "rewards_train/margins": 17.8788845539093, "rewards_train/rejected": -19.072166442871094, "step": 5113 }, { "epoch": 2.52, "learning_rate": 6.963451251781549e-08, "loss": 0.0, "step": 5114 }, { "epoch": 2.52, "logps_train/chosen": -79.72752380371094, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -326.7127685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5614243745803833, "rewards_train/margins": 18.93250834941864, "rewards_train/rejected": -20.493932723999023, "step": 5114 }, { "epoch": 2.52, "learning_rate": 6.949642288842295e-08, "loss": 0.0, "step": 5115 }, { "epoch": 2.52, "logps_train/chosen": -80.75852966308594, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -328.87493896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.432347297668457, "rewards_train/margins": 18.409979820251465, "rewards_train/rejected": -19.842327117919922, "step": 5115 }, { "epoch": 2.52, "learning_rate": 6.935846008828906e-08, "loss": 0.0, "step": 5116 }, { "epoch": 2.52, "logps_train/chosen": -76.47315979003906, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -314.9736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3155779838562012, "rewards_train/margins": 17.836960315704346, "rewards_train/rejected": -19.152538299560547, "step": 5116 }, { "epoch": 2.52, "learning_rate": 6.922062415805879e-08, "loss": 0.0001, "step": 5117 }, { "epoch": 2.52, "logps_train/chosen": -77.22796630859375, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -317.8976745605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2998473644256592, "rewards_train/margins": 18.318633317947388, "rewards_train/rejected": -19.618480682373047, "step": 5117 }, { "epoch": 2.52, "learning_rate": 6.908291513833947e-08, "loss": 0.0, "step": 5118 }, { "epoch": 2.52, "logps_train/chosen": -74.9649658203125, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -324.4061279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.098888874053955, "rewards_train/margins": 18.70920705795288, "rewards_train/rejected": -19.808095932006836, "step": 5118 }, { "epoch": 2.52, "learning_rate": 6.894533306970107e-08, "loss": 0.0, "step": 5119 }, { "epoch": 2.52, "logps_train/chosen": -78.01974487304688, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -329.43231201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3799043893814087, "rewards_train/margins": 19.080121397972107, "rewards_train/rejected": -20.460025787353516, "step": 5119 }, { "epoch": 2.52, "learning_rate": 6.880787799267606e-08, "loss": 0.0, "step": 5120 }, { "epoch": 2.52, "logps_train/chosen": -77.71794891357422, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -335.1864318847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2420096397399902, "rewards_train/margins": 19.313107013702393, "rewards_train/rejected": -20.555116653442383, "step": 5120 }, { "epoch": 2.52, "learning_rate": 6.867054994775995e-08, "loss": 0.0, "step": 5121 }, { "epoch": 2.52, "logps_train/chosen": -79.93501281738281, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -331.2489318847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4294394254684448, "rewards_train/margins": 18.879047513008118, "rewards_train/rejected": -20.308486938476562, "step": 5121 }, { "epoch": 2.52, "learning_rate": 6.85333489754103e-08, "loss": 0.0, "step": 5122 }, { "epoch": 2.52, "logps_train/chosen": -76.85102081298828, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -320.2475891113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4373486042022705, "rewards_train/margins": 18.144930601119995, "rewards_train/rejected": -19.582279205322266, "step": 5122 }, { "epoch": 2.52, "learning_rate": 6.839627511604734e-08, "loss": 0.0, "step": 5123 }, { "epoch": 2.52, "logps_train/chosen": -82.64189147949219, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -324.0882263183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8301068544387817, "rewards_train/margins": 18.43897044658661, "rewards_train/rejected": -20.26907730102539, "step": 5123 }, { "epoch": 2.52, "learning_rate": 6.825932841005433e-08, "loss": 0.0, "step": 5124 }, { "epoch": 2.52, "logps_train/chosen": -78.30412292480469, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -326.92572021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2934983968734741, "rewards_train/margins": 18.609915375709534, "rewards_train/rejected": -19.903413772583008, "step": 5124 }, { "epoch": 2.52, "learning_rate": 6.812250889777621e-08, "loss": 0.0, "step": 5125 }, { "epoch": 2.52, "logps_train/chosen": -81.1708755493164, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -324.4901123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.357541799545288, "rewards_train/margins": 18.270670175552368, "rewards_train/rejected": -19.628211975097656, "step": 5125 }, { "epoch": 2.52, "learning_rate": 6.798581661952119e-08, "loss": 0.0, "step": 5126 }, { "epoch": 2.52, "logps_train/chosen": -83.9051284790039, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -334.27728271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7816996574401855, "rewards_train/margins": 18.549938678741455, "rewards_train/rejected": -20.33163833618164, "step": 5126 }, { "epoch": 2.52, "learning_rate": 6.784925161555999e-08, "loss": 0.0, "step": 5127 }, { "epoch": 2.52, "logps_train/chosen": -84.16450500488281, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -325.33056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8441851139068604, "rewards_train/margins": 18.094390630722046, "rewards_train/rejected": -19.938575744628906, "step": 5127 }, { "epoch": 2.52, "learning_rate": 6.771281392612505e-08, "loss": 0.0, "step": 5128 }, { "epoch": 2.52, "logps_train/chosen": -76.213134765625, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -320.1406555175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2044684886932373, "rewards_train/margins": 18.329808473587036, "rewards_train/rejected": -19.534276962280273, "step": 5128 }, { "epoch": 2.53, "learning_rate": 6.75765035914122e-08, "loss": 0.0, "step": 5129 }, { "epoch": 2.53, "logps_train/chosen": -81.62220001220703, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -322.879638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.708606481552124, "rewards_train/margins": 18.022716760635376, "rewards_train/rejected": -19.7313232421875, "step": 5129 }, { "epoch": 2.53, "learning_rate": 6.744032065157928e-08, "loss": 0.0, "step": 5130 }, { "epoch": 2.53, "logps_train/chosen": -77.63424682617188, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -333.58154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0604461431503296, "rewards_train/margins": 19.149028420448303, "rewards_train/rejected": -20.209474563598633, "step": 5130 }, { "epoch": 2.53, "learning_rate": 6.730426514674659e-08, "loss": 0.0001, "step": 5131 }, { "epoch": 2.53, "logps_train/chosen": -83.27333068847656, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -329.0732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5123918056488037, "rewards_train/margins": 18.64683747291565, "rewards_train/rejected": -20.159229278564453, "step": 5131 }, { "epoch": 2.53, "learning_rate": 6.716833711699726e-08, "loss": 0.0001, "step": 5132 }, { "epoch": 2.53, "logps_train/chosen": -74.95387268066406, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -317.68939208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0867445468902588, "rewards_train/margins": 18.11075758934021, "rewards_train/rejected": -19.19750213623047, "step": 5132 }, { "epoch": 2.53, "learning_rate": 6.703253660237645e-08, "loss": 0.0009, "step": 5133 }, { "epoch": 2.53, "logps_train/chosen": -78.84053802490234, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -333.4298095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2882046699523926, "rewards_train/margins": 19.068938732147217, "rewards_train/rejected": -20.35714340209961, "step": 5133 }, { "epoch": 2.53, "learning_rate": 6.689686364289193e-08, "loss": 0.0, "step": 5134 }, { "epoch": 2.53, "logps_train/chosen": -80.31959533691406, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -323.299072265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6235120296478271, "rewards_train/margins": 18.12451195716858, "rewards_train/rejected": -19.748023986816406, "step": 5134 }, { "epoch": 2.53, "learning_rate": 6.676131827851372e-08, "loss": 0.0, "step": 5135 }, { "epoch": 2.53, "logps_train/chosen": -85.03962707519531, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -329.6130065917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9422438144683838, "rewards_train/margins": 18.41793417930603, "rewards_train/rejected": -20.360177993774414, "step": 5135 }, { "epoch": 2.53, "learning_rate": 6.662590054917466e-08, "loss": 0.0001, "step": 5136 }, { "epoch": 2.53, "logps_train/chosen": -77.81716918945312, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -325.13836669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4744410514831543, "rewards_train/margins": 18.73558473587036, "rewards_train/rejected": -20.210025787353516, "step": 5136 }, { "epoch": 2.53, "learning_rate": 6.649061049476956e-08, "loss": 0.0, "step": 5137 }, { "epoch": 2.53, "logps_train/chosen": -79.57980346679688, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -328.98046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.462472915649414, "rewards_train/margins": 18.546510696411133, "rewards_train/rejected": -20.008983612060547, "step": 5137 }, { "epoch": 2.53, "learning_rate": 6.635544815515576e-08, "loss": 0.0, "step": 5138 }, { "epoch": 2.53, "logps_train/chosen": -80.57479095458984, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -328.4967041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.475984811782837, "rewards_train/margins": 18.689603567123413, "rewards_train/rejected": -20.16558837890625, "step": 5138 }, { "epoch": 2.53, "learning_rate": 6.622041357015317e-08, "loss": 0.0, "step": 5139 }, { "epoch": 2.53, "logps_train/chosen": -78.05909729003906, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -314.7280578613281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5540788173675537, "rewards_train/margins": 17.46784996986389, "rewards_train/rejected": -19.021928787231445, "step": 5139 }, { "epoch": 2.53, "learning_rate": 6.608550677954378e-08, "loss": 0.0, "step": 5140 }, { "epoch": 2.53, "logps_train/chosen": -90.49417114257812, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -330.403076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.4660189151763916, "rewards_train/margins": 17.54538321495056, "rewards_train/rejected": -20.011402130126953, "step": 5140 }, { "epoch": 2.53, "learning_rate": 6.595072782307198e-08, "loss": 0.0, "step": 5141 }, { "epoch": 2.53, "logps_train/chosen": -81.045654296875, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -323.4822082519531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.581958293914795, "rewards_train/margins": 18.420851230621338, "rewards_train/rejected": -20.002809524536133, "step": 5141 }, { "epoch": 2.53, "learning_rate": 6.581607674044465e-08, "loss": 0.0, "step": 5142 }, { "epoch": 2.53, "logps_train/chosen": -80.2274169921875, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -332.26361083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5354857444763184, "rewards_train/margins": 18.877302646636963, "rewards_train/rejected": -20.41278839111328, "step": 5142 }, { "epoch": 2.53, "learning_rate": 6.568155357133092e-08, "loss": 0.0003, "step": 5143 }, { "epoch": 2.53, "logps_train/chosen": -82.48393249511719, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -328.2962341308594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5352590084075928, "rewards_train/margins": 18.45286202430725, "rewards_train/rejected": -19.988121032714844, "step": 5143 }, { "epoch": 2.53, "learning_rate": 6.554715835536223e-08, "loss": 0.0, "step": 5144 }, { "epoch": 2.53, "logps_train/chosen": -75.85002136230469, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -118.875, "logps_train/rejected": -309.05908203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2431560754776, "rewards_train/margins": 17.77432668209076, "rewards_train/rejected": -19.01748275756836, "step": 5144 }, { "epoch": 2.53, "learning_rate": 6.54128911321321e-08, "loss": 0.0, "step": 5145 }, { "epoch": 2.53, "logps_train/chosen": -79.13008117675781, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -325.9064636230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6725289821624756, "rewards_train/margins": 18.47134041786194, "rewards_train/rejected": -20.143869400024414, "step": 5145 }, { "epoch": 2.53, "learning_rate": 6.527875194119687e-08, "loss": 0.0, "step": 5146 }, { "epoch": 2.53, "logps_train/chosen": -76.44751739501953, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -119.8125, "logps_train/rejected": -313.0110778808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4508552551269531, "rewards_train/margins": 17.86665916442871, "rewards_train/rejected": -19.317514419555664, "step": 5146 }, { "epoch": 2.53, "learning_rate": 6.514474082207471e-08, "loss": 0.0, "step": 5147 }, { "epoch": 2.53, "logps_train/chosen": -77.71305847167969, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -324.57647705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3301925659179688, "rewards_train/margins": 18.43726921081543, "rewards_train/rejected": -19.7674617767334, "step": 5147 }, { "epoch": 2.53, "learning_rate": 6.50108578142462e-08, "loss": 0.0001, "step": 5148 }, { "epoch": 2.53, "logps_train/chosen": -77.20108032226562, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -119.125, "logps_train/rejected": -309.103271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4992103576660156, "rewards_train/margins": 17.496273040771484, "rewards_train/rejected": -18.9954833984375, "step": 5148 }, { "epoch": 2.54, "learning_rate": 6.487710295715421e-08, "loss": 0.0001, "step": 5149 }, { "epoch": 2.54, "logps_train/chosen": -80.34497833251953, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -309.7236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6060309410095215, "rewards_train/margins": 17.400221347808838, "rewards_train/rejected": -19.00625228881836, "step": 5149 }, { "epoch": 2.54, "learning_rate": 6.474347629020366e-08, "loss": 0.0, "step": 5150 }, { "epoch": 2.54, "logps_train/chosen": -77.04508972167969, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -323.44769287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4616862535476685, "rewards_train/margins": 18.341774821281433, "rewards_train/rejected": -19.8034610748291, "step": 5150 }, { "epoch": 2.54, "learning_rate": 6.460997785276207e-08, "loss": 0.0, "step": 5151 }, { "epoch": 2.54, "logps_train/chosen": -77.22772979736328, "logps_train/ref_chosen": -60.5625, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -316.02069091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6683295965194702, "rewards_train/margins": 17.65986406803131, "rewards_train/rejected": -19.32819366455078, "step": 5151 }, { "epoch": 2.54, "learning_rate": 6.447660768415897e-08, "loss": 0.0, "step": 5152 }, { "epoch": 2.54, "logps_train/chosen": -84.60926818847656, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -335.3878173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.787782907485962, "rewards_train/margins": 18.913938283920288, "rewards_train/rejected": -20.70172119140625, "step": 5152 }, { "epoch": 2.54, "learning_rate": 6.434336582368594e-08, "loss": 0.0, "step": 5153 }, { "epoch": 2.54, "logps_train/chosen": -82.09580993652344, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -324.9224548339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6501573324203491, "rewards_train/margins": 18.089354157447815, "rewards_train/rejected": -19.739511489868164, "step": 5153 }, { "epoch": 2.54, "learning_rate": 6.421025231059713e-08, "loss": 0.0, "step": 5154 }, { "epoch": 2.54, "logps_train/chosen": -76.30860137939453, "logps_train/ref_chosen": -63.40625, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -321.6854248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.288916826248169, "rewards_train/margins": 18.847302198410034, "rewards_train/rejected": -20.136219024658203, "step": 5154 }, { "epoch": 2.54, "learning_rate": 6.407726718410861e-08, "loss": 0.0, "step": 5155 }, { "epoch": 2.54, "logps_train/chosen": -80.28990173339844, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -322.0926208496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6852397918701172, "rewards_train/margins": 18.219141006469727, "rewards_train/rejected": -19.904380798339844, "step": 5155 }, { "epoch": 2.54, "learning_rate": 6.394441048339866e-08, "loss": 0.0, "step": 5156 }, { "epoch": 2.54, "logps_train/chosen": -80.71217346191406, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -327.625732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5141372680664062, "rewards_train/margins": 18.680465698242188, "rewards_train/rejected": -20.194602966308594, "step": 5156 }, { "epoch": 2.54, "learning_rate": 6.381168224760769e-08, "loss": 0.0, "step": 5157 }, { "epoch": 2.54, "logps_train/chosen": -77.05755615234375, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -314.7037658691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.358489990234375, "rewards_train/margins": 17.705638885498047, "rewards_train/rejected": -19.064128875732422, "step": 5157 }, { "epoch": 2.54, "learning_rate": 6.367908251583853e-08, "loss": 0.0001, "step": 5158 }, { "epoch": 2.54, "logps_train/chosen": -76.02229309082031, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -323.44036865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2238121032714844, "rewards_train/margins": 18.394201278686523, "rewards_train/rejected": -19.618013381958008, "step": 5158 }, { "epoch": 2.54, "learning_rate": 6.354661132715583e-08, "loss": 0.0, "step": 5159 }, { "epoch": 2.54, "logps_train/chosen": -77.04068756103516, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -328.0440673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3255040645599365, "rewards_train/margins": 18.95874047279358, "rewards_train/rejected": -20.284244537353516, "step": 5159 }, { "epoch": 2.54, "learning_rate": 6.341426872058647e-08, "loss": 0.0, "step": 5160 }, { "epoch": 2.54, "logps_train/chosen": -86.91673278808594, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -133.0, "logps_train/rejected": -347.6346740722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.093040943145752, "rewards_train/margins": 19.365054607391357, "rewards_train/rejected": -21.45809555053711, "step": 5160 }, { "epoch": 2.54, "learning_rate": 6.328205473511967e-08, "loss": 0.0, "step": 5161 }, { "epoch": 2.54, "logps_train/chosen": -79.80548095703125, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -330.86737060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.412334680557251, "rewards_train/margins": 18.89593482017517, "rewards_train/rejected": -20.308269500732422, "step": 5161 }, { "epoch": 2.54, "learning_rate": 6.314996940970624e-08, "loss": 0.0, "step": 5162 }, { "epoch": 2.54, "logps_train/chosen": -78.67318725585938, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -328.00787353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4367029666900635, "rewards_train/margins": 18.748071432113647, "rewards_train/rejected": -20.18477439880371, "step": 5162 }, { "epoch": 2.54, "learning_rate": 6.301801278325958e-08, "loss": 0.0001, "step": 5163 }, { "epoch": 2.54, "logps_train/chosen": -84.81398010253906, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -312.95013427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.028273105621338, "rewards_train/margins": 17.03861379623413, "rewards_train/rejected": -19.06688690185547, "step": 5163 }, { "epoch": 2.54, "learning_rate": 6.28861848946552e-08, "loss": 0.0, "step": 5164 }, { "epoch": 2.54, "logps_train/chosen": -80.5468978881836, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -328.35150146484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4853541851043701, "rewards_train/margins": 18.587979555130005, "rewards_train/rejected": -20.073333740234375, "step": 5164 }, { "epoch": 2.54, "learning_rate": 6.275448578273008e-08, "loss": 0.0, "step": 5165 }, { "epoch": 2.54, "logps_train/chosen": -76.45053100585938, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -118.25, "logps_train/rejected": -307.8790588378906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5368010997772217, "rewards_train/margins": 17.426690340042114, "rewards_train/rejected": -18.963491439819336, "step": 5165 }, { "epoch": 2.54, "learning_rate": 6.262291548628396e-08, "loss": 0.0002, "step": 5166 }, { "epoch": 2.54, "logps_train/chosen": -80.36038208007812, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -330.2297058105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4349639415740967, "rewards_train/margins": 18.71647572517395, "rewards_train/rejected": -20.151439666748047, "step": 5166 }, { "epoch": 2.54, "learning_rate": 6.249147404407828e-08, "loss": 0.0003, "step": 5167 }, { "epoch": 2.54, "logps_train/chosen": -78.27945709228516, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -323.70733642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3031413555145264, "rewards_train/margins": 18.63190197944641, "rewards_train/rejected": -19.935043334960938, "step": 5167 }, { "epoch": 2.54, "learning_rate": 6.236016149483647e-08, "loss": 0.0, "step": 5168 }, { "epoch": 2.54, "logps_train/chosen": -79.1373291015625, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -330.83203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3050906658172607, "rewards_train/margins": 19.06990933418274, "rewards_train/rejected": -20.375, "step": 5168 }, { "epoch": 2.55, "learning_rate": 6.222897787724423e-08, "loss": 0.0, "step": 5169 }, { "epoch": 2.55, "logps_train/chosen": -79.6631088256836, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -315.216796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.770998477935791, "rewards_train/margins": 17.60234022140503, "rewards_train/rejected": -19.37333869934082, "step": 5169 }, { "epoch": 2.55, "learning_rate": 6.20979232299491e-08, "loss": 0.0, "step": 5170 }, { "epoch": 2.55, "logps_train/chosen": -74.58042907714844, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -325.82080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2030141353607178, "rewards_train/margins": 18.99879288673401, "rewards_train/rejected": -20.201807022094727, "step": 5170 }, { "epoch": 2.55, "learning_rate": 6.196699759156065e-08, "loss": 0.0, "step": 5171 }, { "epoch": 2.55, "logps_train/chosen": -79.30450439453125, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -310.9139404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5458319187164307, "rewards_train/margins": 17.502252340316772, "rewards_train/rejected": -19.048084259033203, "step": 5171 }, { "epoch": 2.55, "learning_rate": 6.183620100065035e-08, "loss": 0.0, "step": 5172 }, { "epoch": 2.55, "logps_train/chosen": -87.6695327758789, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -326.1662292480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0563089847564697, "rewards_train/margins": 17.85797095298767, "rewards_train/rejected": -19.91427993774414, "step": 5172 }, { "epoch": 2.55, "learning_rate": 6.170553349575197e-08, "loss": 0.0, "step": 5173 }, { "epoch": 2.55, "logps_train/chosen": -75.87176513671875, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -315.93603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3520206212997437, "rewards_train/margins": 17.95418131351471, "rewards_train/rejected": -19.306201934814453, "step": 5173 }, { "epoch": 2.55, "learning_rate": 6.15749951153609e-08, "loss": 0.0001, "step": 5174 }, { "epoch": 2.55, "logps_train/chosen": -78.70234680175781, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -329.9023132324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3734084367752075, "rewards_train/margins": 18.9079350233078, "rewards_train/rejected": -20.281343460083008, "step": 5174 }, { "epoch": 2.55, "learning_rate": 6.144458589793461e-08, "loss": 0.0001, "step": 5175 }, { "epoch": 2.55, "logps_train/chosen": -82.71556854248047, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -335.1292724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6048088073730469, "rewards_train/margins": 19.17218017578125, "rewards_train/rejected": -20.776988983154297, "step": 5175 }, { "epoch": 2.55, "learning_rate": 6.131430588189273e-08, "loss": 0.0, "step": 5176 }, { "epoch": 2.55, "logps_train/chosen": -86.25237274169922, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -335.07073974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1144957542419434, "rewards_train/margins": 18.730177402496338, "rewards_train/rejected": -20.84467315673828, "step": 5176 }, { "epoch": 2.55, "learning_rate": 6.118415510561631e-08, "loss": 0.0, "step": 5177 }, { "epoch": 2.55, "logps_train/chosen": -77.67901611328125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -328.40594482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2695612907409668, "rewards_train/margins": 18.772400379180908, "rewards_train/rejected": -20.041961669921875, "step": 5177 }, { "epoch": 2.55, "learning_rate": 6.105413360744882e-08, "loss": 0.0, "step": 5178 }, { "epoch": 2.55, "logps_train/chosen": -74.85881042480469, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -314.6304931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.13851797580719, "rewards_train/margins": 17.9553884267807, "rewards_train/rejected": -19.09390640258789, "step": 5178 }, { "epoch": 2.55, "learning_rate": 6.092424142569564e-08, "loss": 0.0, "step": 5179 }, { "epoch": 2.55, "logps_train/chosen": -77.67005920410156, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -325.4855041503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.262244701385498, "rewards_train/margins": 18.682156085968018, "rewards_train/rejected": -19.944400787353516, "step": 5179 }, { "epoch": 2.55, "learning_rate": 6.079447859862352e-08, "loss": 0.0, "step": 5180 }, { "epoch": 2.55, "logps_train/chosen": -75.27610778808594, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -311.8114929199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.189183235168457, "rewards_train/margins": 17.725607872009277, "rewards_train/rejected": -18.914791107177734, "step": 5180 }, { "epoch": 2.55, "learning_rate": 6.06648451644618e-08, "loss": 0.0, "step": 5181 }, { "epoch": 2.55, "logps_train/chosen": -85.29420471191406, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -335.7222900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.754127860069275, "rewards_train/margins": 18.986215233802795, "rewards_train/rejected": -20.74034309387207, "step": 5181 }, { "epoch": 2.55, "learning_rate": 6.053534116140119e-08, "loss": 0.0, "step": 5182 }, { "epoch": 2.55, "logps_train/chosen": -80.39312744140625, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -315.5221252441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7897040843963623, "rewards_train/margins": 17.568124055862427, "rewards_train/rejected": -19.35782814025879, "step": 5182 }, { "epoch": 2.55, "learning_rate": 6.04059666275944e-08, "loss": 0.0002, "step": 5183 }, { "epoch": 2.55, "logps_train/chosen": -78.55155181884766, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -322.9571533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.367215633392334, "rewards_train/margins": 18.40984582901001, "rewards_train/rejected": -19.777061462402344, "step": 5183 }, { "epoch": 2.55, "learning_rate": 6.027672160115621e-08, "loss": 0.0, "step": 5184 }, { "epoch": 2.55, "logps_train/chosen": -80.79006958007812, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -326.1536865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4141144752502441, "rewards_train/margins": 18.4011549949646, "rewards_train/rejected": -19.815269470214844, "step": 5184 }, { "epoch": 2.55, "learning_rate": 6.014760612016296e-08, "loss": 0.0002, "step": 5185 }, { "epoch": 2.55, "logps_train/chosen": -81.44013977050781, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -317.2323303222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6929888725280762, "rewards_train/margins": 17.631222248077393, "rewards_train/rejected": -19.32421112060547, "step": 5185 }, { "epoch": 2.55, "learning_rate": 6.001862022265297e-08, "loss": 0.0, "step": 5186 }, { "epoch": 2.55, "logps_train/chosen": -82.8729248046875, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -325.31500244140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8047736883163452, "rewards_train/margins": 18.277120232582092, "rewards_train/rejected": -20.081893920898438, "step": 5186 }, { "epoch": 2.55, "learning_rate": 5.988976394662632e-08, "loss": 0.0002, "step": 5187 }, { "epoch": 2.55, "logps_train/chosen": -81.01891326904297, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -324.98388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5868525505065918, "rewards_train/margins": 18.316223621368408, "rewards_train/rejected": -19.903076171875, "step": 5187 }, { "epoch": 2.55, "learning_rate": 5.9761037330045e-08, "loss": 0.0, "step": 5188 }, { "epoch": 2.55, "logps_train/chosen": -81.00084686279297, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -325.06671142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9087278842926025, "rewards_train/margins": 18.476461172103882, "rewards_train/rejected": -20.385189056396484, "step": 5188 }, { "epoch": 2.55, "learning_rate": 5.963244041083271e-08, "loss": 0.0, "step": 5189 }, { "epoch": 2.55, "logps_train/chosen": -78.47129821777344, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -309.96685791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5028921365737915, "rewards_train/margins": 17.353511214256287, "rewards_train/rejected": -18.856403350830078, "step": 5189 }, { "epoch": 2.56, "learning_rate": 5.950397322687489e-08, "loss": 0.0, "step": 5190 }, { "epoch": 2.56, "logps_train/chosen": -78.74234008789062, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -329.33392333984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.575650691986084, "rewards_train/margins": 18.972778797149658, "rewards_train/rejected": -20.548429489135742, "step": 5190 }, { "epoch": 2.56, "learning_rate": 5.937563581601901e-08, "loss": 0.0002, "step": 5191 }, { "epoch": 2.56, "logps_train/chosen": -77.95502471923828, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -334.6644592285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2460393905639648, "rewards_train/margins": 19.298630714416504, "rewards_train/rejected": -20.54467010498047, "step": 5191 }, { "epoch": 2.56, "learning_rate": 5.9247428216074034e-08, "loss": 0.0, "step": 5192 }, { "epoch": 2.56, "logps_train/chosen": -78.01838684082031, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -317.4227294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.56643807888031, "rewards_train/margins": 17.92734968662262, "rewards_train/rejected": -19.49378776550293, "step": 5192 }, { "epoch": 2.56, "learning_rate": 5.911935046481076e-08, "loss": 0.0, "step": 5193 }, { "epoch": 2.56, "logps_train/chosen": -85.899658203125, "logps_train/ref_chosen": -67.75, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -336.92852783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8152096271514893, "rewards_train/margins": 19.02954936027527, "rewards_train/rejected": -20.844758987426758, "step": 5193 }, { "epoch": 2.56, "learning_rate": 5.8991402599961825e-08, "loss": 0.0, "step": 5194 }, { "epoch": 2.56, "logps_train/chosen": -82.59382629394531, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -324.9219665527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.735994577407837, "rewards_train/margins": 18.0302255153656, "rewards_train/rejected": -19.766220092773438, "step": 5194 }, { "epoch": 2.56, "learning_rate": 5.886358465922153e-08, "loss": 0.0, "step": 5195 }, { "epoch": 2.56, "logps_train/chosen": -77.2127685546875, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -324.18310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2821166515350342, "rewards_train/margins": 18.639220476150513, "rewards_train/rejected": -19.921337127685547, "step": 5195 }, { "epoch": 2.56, "learning_rate": 5.8735896680245924e-08, "loss": 0.0, "step": 5196 }, { "epoch": 2.56, "logps_train/chosen": -80.27061462402344, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -313.8284912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5618271827697754, "rewards_train/margins": 17.421510219573975, "rewards_train/rejected": -18.98333740234375, "step": 5196 }, { "epoch": 2.56, "learning_rate": 5.8608338700652624e-08, "loss": 0.0001, "step": 5197 }, { "epoch": 2.56, "logps_train/chosen": -79.64305877685547, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -319.3769226074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6492185592651367, "rewards_train/margins": 17.836228370666504, "rewards_train/rejected": -19.48544692993164, "step": 5197 }, { "epoch": 2.56, "learning_rate": 5.84809107580212e-08, "loss": 0.0001, "step": 5198 }, { "epoch": 2.56, "logps_train/chosen": -77.44490814208984, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -325.7096862792969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5308191776275635, "rewards_train/margins": 18.58663535118103, "rewards_train/rejected": -20.117454528808594, "step": 5198 }, { "epoch": 2.56, "learning_rate": 5.83536128898926e-08, "loss": 0.0, "step": 5199 }, { "epoch": 2.56, "logps_train/chosen": -79.53105926513672, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -323.5087585449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7210501432418823, "rewards_train/margins": 18.14169132709503, "rewards_train/rejected": -19.862741470336914, "step": 5199 }, { "epoch": 2.56, "learning_rate": 5.8226445133769896e-08, "loss": 0.0001, "step": 5200 }, { "epoch": 2.56, "logps_train/chosen": -82.44964599609375, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -337.25146484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6340758800506592, "rewards_train/margins": 18.940293550491333, "rewards_train/rejected": -20.574369430541992, "step": 5200 }, { "epoch": 2.56, "learning_rate": 5.8099407527117395e-08, "loss": 0.0, "step": 5201 }, { "epoch": 2.56, "logps_train/chosen": -80.54981994628906, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -321.81134033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5003435611724854, "rewards_train/margins": 18.03284239768982, "rewards_train/rejected": -19.533185958862305, "step": 5201 }, { "epoch": 2.56, "learning_rate": 5.797250010736121e-08, "loss": 0.0, "step": 5202 }, { "epoch": 2.56, "logps_train/chosen": -72.79006958007812, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -312.3494873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0849153995513916, "rewards_train/margins": 18.019906759262085, "rewards_train/rejected": -19.104822158813477, "step": 5202 }, { "epoch": 2.56, "learning_rate": 5.7845722911889325e-08, "loss": 0.0001, "step": 5203 }, { "epoch": 2.56, "logps_train/chosen": -80.05948638916016, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -132.625, "logps_train/rejected": -340.9627990722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5950108766555786, "rewards_train/margins": 19.236081957817078, "rewards_train/rejected": -20.831092834472656, "step": 5203 }, { "epoch": 2.56, "learning_rate": 5.771907597805098e-08, "loss": 0.0, "step": 5204 }, { "epoch": 2.56, "logps_train/chosen": -82.02597045898438, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -317.8241882324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9862889051437378, "rewards_train/margins": 17.774548649787903, "rewards_train/rejected": -19.76083755493164, "step": 5204 }, { "epoch": 2.56, "learning_rate": 5.7592559343157154e-08, "loss": 0.0, "step": 5205 }, { "epoch": 2.56, "logps_train/chosen": -84.24420928955078, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -325.6629638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9198553562164307, "rewards_train/margins": 18.317094564437866, "rewards_train/rejected": -20.236949920654297, "step": 5205 }, { "epoch": 2.56, "learning_rate": 5.74661730444807e-08, "loss": 0.0, "step": 5206 }, { "epoch": 2.56, "logps_train/chosen": -79.46394348144531, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -333.75042724609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3842365741729736, "rewards_train/margins": 19.106919050216675, "rewards_train/rejected": -20.49115562438965, "step": 5206 }, { "epoch": 2.56, "learning_rate": 5.7339917119255786e-08, "loss": 0.0001, "step": 5207 }, { "epoch": 2.56, "logps_train/chosen": -82.51834106445312, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -315.45855712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6574004888534546, "rewards_train/margins": 17.447248101234436, "rewards_train/rejected": -19.10464859008789, "step": 5207 }, { "epoch": 2.56, "learning_rate": 5.721379160467826e-08, "loss": 0.0, "step": 5208 }, { "epoch": 2.56, "logps_train/chosen": -78.89192199707031, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -313.6276550292969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6219074726104736, "rewards_train/margins": 17.70657992362976, "rewards_train/rejected": -19.328487396240234, "step": 5208 }, { "epoch": 2.56, "learning_rate": 5.708779653790546e-08, "loss": 0.0, "step": 5209 }, { "epoch": 2.56, "logps_train/chosen": -79.47521209716797, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -120.875, "logps_train/rejected": -315.1311340332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6181268692016602, "rewards_train/margins": 17.805094718933105, "rewards_train/rejected": -19.423221588134766, "step": 5209 }, { "epoch": 2.57, "learning_rate": 5.696193195605653e-08, "loss": 0.0, "step": 5210 }, { "epoch": 2.57, "logps_train/chosen": -83.86019134521484, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -330.69073486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8025720119476318, "rewards_train/margins": 18.667721033096313, "rewards_train/rejected": -20.470293045043945, "step": 5210 }, { "epoch": 2.57, "learning_rate": 5.6836197896211965e-08, "loss": 0.0002, "step": 5211 }, { "epoch": 2.57, "logps_train/chosen": -80.86898803710938, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -323.8793029785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5196874141693115, "rewards_train/margins": 18.246418237686157, "rewards_train/rejected": -19.76610565185547, "step": 5211 }, { "epoch": 2.57, "learning_rate": 5.6710594395413813e-08, "loss": 0.0001, "step": 5212 }, { "epoch": 2.57, "logps_train/chosen": -85.32737731933594, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -335.3787841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8329823017120361, "rewards_train/margins": 18.87389063835144, "rewards_train/rejected": -20.706872940063477, "step": 5212 }, { "epoch": 2.57, "learning_rate": 5.658512149066591e-08, "loss": 0.0, "step": 5213 }, { "epoch": 2.57, "logps_train/chosen": -80.27153778076172, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -324.33642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3560600280761719, "rewards_train/margins": 18.287982940673828, "rewards_train/rejected": -19.64404296875, "step": 5213 }, { "epoch": 2.57, "learning_rate": 5.645977921893308e-08, "loss": 0.0, "step": 5214 }, { "epoch": 2.57, "logps_train/chosen": -78.68370056152344, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -322.42169189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.563389539718628, "rewards_train/margins": 18.312472581863403, "rewards_train/rejected": -19.87586212158203, "step": 5214 }, { "epoch": 2.57, "learning_rate": 5.633456761714223e-08, "loss": 0.0, "step": 5215 }, { "epoch": 2.57, "logps_train/chosen": -81.9998550415039, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -323.49169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6553072929382324, "rewards_train/margins": 17.94420289993286, "rewards_train/rejected": -19.599510192871094, "step": 5215 }, { "epoch": 2.57, "learning_rate": 5.6209486722181674e-08, "loss": 0.0, "step": 5216 }, { "epoch": 2.57, "logps_train/chosen": -85.5667495727539, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -337.22393798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.960141897201538, "rewards_train/margins": 18.829588174819946, "rewards_train/rejected": -20.789730072021484, "step": 5216 }, { "epoch": 2.57, "learning_rate": 5.608453657090073e-08, "loss": 0.0001, "step": 5217 }, { "epoch": 2.57, "logps_train/chosen": -79.7217788696289, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -335.8778991699219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5882912874221802, "rewards_train/margins": 19.072545886039734, "rewards_train/rejected": -20.660837173461914, "step": 5217 }, { "epoch": 2.57, "learning_rate": 5.595971720011089e-08, "loss": 0.0, "step": 5218 }, { "epoch": 2.57, "logps_train/chosen": -79.69873046875, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -326.8307800292969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4776849746704102, "rewards_train/margins": 18.671996116638184, "rewards_train/rejected": -20.149681091308594, "step": 5218 }, { "epoch": 2.57, "learning_rate": 5.583502864658457e-08, "loss": 0.0, "step": 5219 }, { "epoch": 2.57, "logps_train/chosen": -78.73295593261719, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -333.1487731933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4647507667541504, "rewards_train/margins": 18.78479528427124, "rewards_train/rejected": -20.24954605102539, "step": 5219 }, { "epoch": 2.57, "learning_rate": 5.571047094705589e-08, "loss": 0.0, "step": 5220 }, { "epoch": 2.57, "logps_train/chosen": -82.55747985839844, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -324.85064697265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7543803453445435, "rewards_train/margins": 18.096896052360535, "rewards_train/rejected": -19.851276397705078, "step": 5220 }, { "epoch": 2.57, "learning_rate": 5.558604413822049e-08, "loss": 0.0, "step": 5221 }, { "epoch": 2.57, "logps_train/chosen": -84.22572326660156, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -326.0291748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9730610847473145, "rewards_train/margins": 18.23132085800171, "rewards_train/rejected": -20.204381942749023, "step": 5221 }, { "epoch": 2.57, "learning_rate": 5.546174825673527e-08, "loss": 0.0, "step": 5222 }, { "epoch": 2.57, "logps_train/chosen": -77.82283020019531, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -323.76300048828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.417902946472168, "rewards_train/margins": 18.357321739196777, "rewards_train/rejected": -19.775224685668945, "step": 5222 }, { "epoch": 2.57, "learning_rate": 5.533758333921873e-08, "loss": 0.0, "step": 5223 }, { "epoch": 2.57, "logps_train/chosen": -72.33959197998047, "logps_train/ref_chosen": -59.46875, "logps_train/ref_rejected": -117.125, "logps_train/rejected": -311.3470153808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.288109302520752, "rewards_train/margins": 18.136924266815186, "rewards_train/rejected": -19.425033569335938, "step": 5223 }, { "epoch": 2.57, "learning_rate": 5.521354942225043e-08, "loss": 0.0001, "step": 5224 }, { "epoch": 2.57, "logps_train/chosen": -77.37821960449219, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -322.9646911621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.376347541809082, "rewards_train/margins": 18.340336799621582, "rewards_train/rejected": -19.716684341430664, "step": 5224 }, { "epoch": 2.57, "learning_rate": 5.508964654237192e-08, "loss": 0.0, "step": 5225 }, { "epoch": 2.57, "logps_train/chosen": -76.28075408935547, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -322.47149658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.207421064376831, "rewards_train/margins": 18.459162950515747, "rewards_train/rejected": -19.666584014892578, "step": 5225 }, { "epoch": 2.57, "learning_rate": 5.4965874736085717e-08, "loss": 0.0, "step": 5226 }, { "epoch": 2.57, "logps_train/chosen": -84.69446563720703, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -132.125, "logps_train/rejected": -342.4219665527344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7990853786468506, "rewards_train/margins": 19.23403000831604, "rewards_train/rejected": -21.03311538696289, "step": 5226 }, { "epoch": 2.57, "learning_rate": 5.4842234039855664e-08, "loss": 0.0, "step": 5227 }, { "epoch": 2.57, "logps_train/chosen": -77.43878173828125, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -328.051025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4065009355545044, "rewards_train/margins": 18.825945734977722, "rewards_train/rejected": -20.232446670532227, "step": 5227 }, { "epoch": 2.57, "learning_rate": 5.4718724490107504e-08, "loss": 0.0, "step": 5228 }, { "epoch": 2.57, "logps_train/chosen": -80.935302734375, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -331.84295654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5666751861572266, "rewards_train/margins": 18.75707244873047, "rewards_train/rejected": -20.323747634887695, "step": 5228 }, { "epoch": 2.57, "learning_rate": 5.459534612322753e-08, "loss": 0.0, "step": 5229 }, { "epoch": 2.57, "logps_train/chosen": -78.86956024169922, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -329.783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3602468967437744, "rewards_train/margins": 18.774030447006226, "rewards_train/rejected": -20.13427734375, "step": 5229 }, { "epoch": 2.58, "learning_rate": 5.4472098975564094e-08, "loss": 0.0, "step": 5230 }, { "epoch": 2.58, "logps_train/chosen": -78.42607116699219, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -320.1239318847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6225392818450928, "rewards_train/margins": 18.11710000038147, "rewards_train/rejected": -19.739639282226562, "step": 5230 }, { "epoch": 2.58, "learning_rate": 5.434898308342678e-08, "loss": 0.0, "step": 5231 }, { "epoch": 2.58, "logps_train/chosen": -82.82923126220703, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -324.31378173828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7385873794555664, "rewards_train/margins": 18.29293918609619, "rewards_train/rejected": -20.031526565551758, "step": 5231 }, { "epoch": 2.58, "learning_rate": 5.422599848308601e-08, "loss": 0.0001, "step": 5232 }, { "epoch": 2.58, "logps_train/chosen": -76.01193237304688, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -120.5625, "logps_train/rejected": -318.6463317871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3763890266418457, "rewards_train/margins": 18.434239864349365, "rewards_train/rejected": -19.81062889099121, "step": 5232 }, { "epoch": 2.58, "learning_rate": 5.410314521077419e-08, "loss": 0.0, "step": 5233 }, { "epoch": 2.58, "logps_train/chosen": -81.31045532226562, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -316.4424133300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6944736242294312, "rewards_train/margins": 17.691857933998108, "rewards_train/rejected": -19.38633155822754, "step": 5233 }, { "epoch": 2.58, "learning_rate": 5.39804233026846e-08, "loss": 0.0, "step": 5234 }, { "epoch": 2.58, "logps_train/chosen": -77.02376556396484, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -316.5279235839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3664880990982056, "rewards_train/margins": 17.880055785179138, "rewards_train/rejected": -19.246543884277344, "step": 5234 }, { "epoch": 2.58, "learning_rate": 5.385783279497191e-08, "loss": 0.0, "step": 5235 }, { "epoch": 2.58, "logps_train/chosen": -81.09276580810547, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -332.9834899902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7213859558105469, "rewards_train/margins": 18.84708023071289, "rewards_train/rejected": -20.568466186523438, "step": 5235 }, { "epoch": 2.58, "learning_rate": 5.373537372375209e-08, "loss": 0.0001, "step": 5236 }, { "epoch": 2.58, "logps_train/chosen": -77.0179214477539, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -313.9892272949219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3791844844818115, "rewards_train/margins": 17.85528588294983, "rewards_train/rejected": -19.23447036743164, "step": 5236 }, { "epoch": 2.58, "learning_rate": 5.361304612510248e-08, "loss": 0.0, "step": 5237 }, { "epoch": 2.58, "logps_train/chosen": -77.17784118652344, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -334.80023193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.428379774093628, "rewards_train/margins": 19.152472734451294, "rewards_train/rejected": -20.580852508544922, "step": 5237 }, { "epoch": 2.58, "learning_rate": 5.349085003506165e-08, "loss": 0.0001, "step": 5238 }, { "epoch": 2.58, "logps_train/chosen": -85.3487548828125, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -318.34765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9704222679138184, "rewards_train/margins": 17.50536012649536, "rewards_train/rejected": -19.47578239440918, "step": 5238 }, { "epoch": 2.58, "learning_rate": 5.3368785489629255e-08, "loss": 0.0, "step": 5239 }, { "epoch": 2.58, "logps_train/chosen": -79.11971282958984, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -323.00152587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4764735698699951, "rewards_train/margins": 18.235108613967896, "rewards_train/rejected": -19.71158218383789, "step": 5239 }, { "epoch": 2.58, "learning_rate": 5.3246852524766464e-08, "loss": 0.0, "step": 5240 }, { "epoch": 2.58, "logps_train/chosen": -82.28744506835938, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -326.72991943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9667080640792847, "rewards_train/margins": 18.25677216053009, "rewards_train/rejected": -20.223480224609375, "step": 5240 }, { "epoch": 2.58, "learning_rate": 5.312505117639554e-08, "loss": 0.0001, "step": 5241 }, { "epoch": 2.58, "logps_train/chosen": -76.03874206542969, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -328.0352478027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.276921272277832, "rewards_train/margins": 18.977381706237793, "rewards_train/rejected": -20.254302978515625, "step": 5241 }, { "epoch": 2.58, "learning_rate": 5.300338148039979e-08, "loss": 0.0, "step": 5242 }, { "epoch": 2.58, "logps_train/chosen": -72.0243911743164, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -324.25616455078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.758103609085083, "rewards_train/margins": 18.953158140182495, "rewards_train/rejected": -19.711261749267578, "step": 5242 }, { "epoch": 2.58, "learning_rate": 5.288184347262426e-08, "loss": 0.0, "step": 5243 }, { "epoch": 2.58, "logps_train/chosen": -72.49374389648438, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -313.11395263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1445404291152954, "rewards_train/margins": 17.97979199886322, "rewards_train/rejected": -19.124332427978516, "step": 5243 }, { "epoch": 2.58, "learning_rate": 5.276043718887463e-08, "loss": 0.0, "step": 5244 }, { "epoch": 2.58, "logps_train/chosen": -76.0036392211914, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -324.4459228515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.360227108001709, "rewards_train/margins": 18.570303440093994, "rewards_train/rejected": -19.930530548095703, "step": 5244 }, { "epoch": 2.58, "learning_rate": 5.263916266491808e-08, "loss": 0.0, "step": 5245 }, { "epoch": 2.58, "logps_train/chosen": -78.26791381835938, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -319.03570556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.519613265991211, "rewards_train/margins": 17.905248641967773, "rewards_train/rejected": -19.424861907958984, "step": 5245 }, { "epoch": 2.58, "learning_rate": 5.2518019936482806e-08, "loss": 0.0, "step": 5246 }, { "epoch": 2.58, "logps_train/chosen": -77.17012023925781, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -324.4105224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5260941982269287, "rewards_train/margins": 18.226579904556274, "rewards_train/rejected": -19.752674102783203, "step": 5246 }, { "epoch": 2.58, "learning_rate": 5.239700903925848e-08, "loss": 0.0, "step": 5247 }, { "epoch": 2.58, "logps_train/chosen": -75.90048217773438, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -323.466552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.466244101524353, "rewards_train/margins": 18.629146218299866, "rewards_train/rejected": -20.09539031982422, "step": 5247 }, { "epoch": 2.58, "learning_rate": 5.227613000889558e-08, "loss": 0.0002, "step": 5248 }, { "epoch": 2.58, "logps_train/chosen": -74.6282730102539, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -325.63519287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.178550124168396, "rewards_train/margins": 18.794294953346252, "rewards_train/rejected": -19.97284507751465, "step": 5248 }, { "epoch": 2.58, "learning_rate": 5.2155382881005906e-08, "loss": 0.0, "step": 5249 }, { "epoch": 2.58, "logps_train/chosen": -81.63278198242188, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -344.56488037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4858856201171875, "rewards_train/margins": 19.725780487060547, "rewards_train/rejected": -21.211666107177734, "step": 5249 }, { "epoch": 2.58, "learning_rate": 5.2034767691162385e-08, "loss": 0.0, "step": 5250 }, { "epoch": 2.58, "logps_train/chosen": -79.48976135253906, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -320.2362060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.652174949645996, "rewards_train/margins": 17.89829921722412, "rewards_train/rejected": -19.550474166870117, "step": 5250 }, { "epoch": 2.59, "learning_rate": 5.191428447489898e-08, "loss": 0.0001, "step": 5251 }, { "epoch": 2.59, "logps_train/chosen": -74.7052001953125, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -317.6259765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3129510879516602, "rewards_train/margins": 18.288758277893066, "rewards_train/rejected": -19.601709365844727, "step": 5251 }, { "epoch": 2.59, "learning_rate": 5.179393326771103e-08, "loss": 0.0, "step": 5252 }, { "epoch": 2.59, "logps_train/chosen": -73.34806823730469, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -326.07989501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0661543607711792, "rewards_train/margins": 19.074453473091125, "rewards_train/rejected": -20.140607833862305, "step": 5252 }, { "epoch": 2.59, "learning_rate": 5.1673714105054775e-08, "loss": 0.0, "step": 5253 }, { "epoch": 2.59, "logps_train/chosen": -81.98033142089844, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -331.618896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5686875581741333, "rewards_train/margins": 18.786853671073914, "rewards_train/rejected": -20.355541229248047, "step": 5253 }, { "epoch": 2.59, "learning_rate": 5.155362702234739e-08, "loss": 0.0, "step": 5254 }, { "epoch": 2.59, "logps_train/chosen": -87.08609008789062, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -339.52783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9521143436431885, "rewards_train/margins": 19.013316869735718, "rewards_train/rejected": -20.965431213378906, "step": 5254 }, { "epoch": 2.59, "learning_rate": 5.1433672054967625e-08, "loss": 0.0, "step": 5255 }, { "epoch": 2.59, "logps_train/chosen": -84.29174041748047, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -335.4623718261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6479237079620361, "rewards_train/margins": 18.809643507003784, "rewards_train/rejected": -20.45756721496582, "step": 5255 }, { "epoch": 2.59, "learning_rate": 5.1313849238254884e-08, "loss": 0.0, "step": 5256 }, { "epoch": 2.59, "logps_train/chosen": -80.21770477294922, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -328.00628662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6062431335449219, "rewards_train/margins": 18.624608993530273, "rewards_train/rejected": -20.230852127075195, "step": 5256 }, { "epoch": 2.59, "learning_rate": 5.11941586075097e-08, "loss": 0.0, "step": 5257 }, { "epoch": 2.59, "logps_train/chosen": -79.59416198730469, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -316.3121032714844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5693529844284058, "rewards_train/margins": 17.669134259223938, "rewards_train/rejected": -19.238487243652344, "step": 5257 }, { "epoch": 2.59, "learning_rate": 5.107460019799387e-08, "loss": 0.0, "step": 5258 }, { "epoch": 2.59, "logps_train/chosen": -83.64409637451172, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -334.1402587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.694267749786377, "rewards_train/margins": 18.475715160369873, "rewards_train/rejected": -20.16998291015625, "step": 5258 }, { "epoch": 2.59, "learning_rate": 5.0955174044930106e-08, "loss": 0.0001, "step": 5259 }, { "epoch": 2.59, "logps_train/chosen": -81.00139617919922, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -328.8873291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6791436672210693, "rewards_train/margins": 18.624727487564087, "rewards_train/rejected": -20.303871154785156, "step": 5259 }, { "epoch": 2.59, "learning_rate": 5.08358801835021e-08, "loss": 0.0, "step": 5260 }, { "epoch": 2.59, "logps_train/chosen": -77.90998840332031, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -320.983642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5002756118774414, "rewards_train/margins": 18.239691734313965, "rewards_train/rejected": -19.739967346191406, "step": 5260 }, { "epoch": 2.59, "learning_rate": 5.0716718648854574e-08, "loss": 0.0, "step": 5261 }, { "epoch": 2.59, "logps_train/chosen": -86.40502166748047, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -320.4969177246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.3186519145965576, "rewards_train/margins": 17.532214403152466, "rewards_train/rejected": -19.850866317749023, "step": 5261 }, { "epoch": 2.59, "learning_rate": 5.059768947609344e-08, "loss": 0.0, "step": 5262 }, { "epoch": 2.59, "logps_train/chosen": -75.18812561035156, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -303.1274719238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.225843906402588, "rewards_train/margins": 17.040860652923584, "rewards_train/rejected": -18.266704559326172, "step": 5262 }, { "epoch": 2.59, "learning_rate": 5.0478792700285487e-08, "loss": 0.0002, "step": 5263 }, { "epoch": 2.59, "logps_train/chosen": -73.90690612792969, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -320.75494384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9515306949615479, "rewards_train/margins": 18.62010931968689, "rewards_train/rejected": -19.571640014648438, "step": 5263 }, { "epoch": 2.59, "learning_rate": 5.036002835645836e-08, "loss": 0.0001, "step": 5264 }, { "epoch": 2.59, "logps_train/chosen": -74.53904724121094, "logps_train/ref_chosen": -61.4375, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -305.02386474609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3097645044326782, "rewards_train/margins": 17.16537892818451, "rewards_train/rejected": -18.475143432617188, "step": 5264 }, { "epoch": 2.59, "learning_rate": 5.0241396479601126e-08, "loss": 0.0, "step": 5265 }, { "epoch": 2.59, "logps_train/chosen": -80.39442443847656, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -337.91339111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6182029247283936, "rewards_train/margins": 19.190226793289185, "rewards_train/rejected": -20.808429718017578, "step": 5265 }, { "epoch": 2.59, "learning_rate": 5.0122897104663165e-08, "loss": 0.0, "step": 5266 }, { "epoch": 2.59, "logps_train/chosen": -85.06646728515625, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -333.8893127441406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0085997581481934, "rewards_train/margins": 18.691561222076416, "rewards_train/rejected": -20.70016098022461, "step": 5266 }, { "epoch": 2.59, "learning_rate": 5.000453026655538e-08, "loss": 0.0, "step": 5267 }, { "epoch": 2.59, "logps_train/chosen": -77.87593841552734, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -330.78240966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4234336614608765, "rewards_train/margins": 19.021508812904358, "rewards_train/rejected": -20.444942474365234, "step": 5267 }, { "epoch": 2.59, "learning_rate": 4.988629600014965e-08, "loss": 0.0, "step": 5268 }, { "epoch": 2.59, "logps_train/chosen": -81.23963928222656, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -318.8348693847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7160537242889404, "rewards_train/margins": 17.770365953445435, "rewards_train/rejected": -19.486419677734375, "step": 5268 }, { "epoch": 2.59, "learning_rate": 4.9768194340278124e-08, "loss": 0.0, "step": 5269 }, { "epoch": 2.59, "logps_train/chosen": -83.17988586425781, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -132.375, "logps_train/rejected": -335.04522705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.687617540359497, "rewards_train/margins": 18.58614420890808, "rewards_train/rejected": -20.273761749267578, "step": 5269 }, { "epoch": 2.59, "learning_rate": 4.96502253217348e-08, "loss": 0.0, "step": 5270 }, { "epoch": 2.59, "logps_train/chosen": -83.41061401367188, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -323.5163879394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.923703670501709, "rewards_train/margins": 18.260502338409424, "rewards_train/rejected": -20.184206008911133, "step": 5270 }, { "epoch": 2.6, "learning_rate": 4.953238897927386e-08, "loss": 0.0001, "step": 5271 }, { "epoch": 2.6, "logps_train/chosen": -86.67987823486328, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -334.0379333496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9648380279541016, "rewards_train/margins": 18.53299903869629, "rewards_train/rejected": -20.49783706665039, "step": 5271 }, { "epoch": 2.6, "learning_rate": 4.941468534761073e-08, "loss": 0.0, "step": 5272 }, { "epoch": 2.6, "logps_train/chosen": -73.76213073730469, "logps_train/ref_chosen": -60.78125, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -311.7074890136719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2994797229766846, "rewards_train/margins": 17.80940318107605, "rewards_train/rejected": -19.108882904052734, "step": 5272 }, { "epoch": 2.6, "learning_rate": 4.929711446142182e-08, "loss": 0.0, "step": 5273 }, { "epoch": 2.6, "logps_train/chosen": -80.80686950683594, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -328.8076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6383774280548096, "rewards_train/margins": 18.547953367233276, "rewards_train/rejected": -20.186330795288086, "step": 5273 }, { "epoch": 2.6, "learning_rate": 4.91796763553442e-08, "loss": 0.0, "step": 5274 }, { "epoch": 2.6, "logps_train/chosen": -76.83258056640625, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -320.5162353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3609428405761719, "rewards_train/margins": 18.335067749023438, "rewards_train/rejected": -19.69601058959961, "step": 5274 }, { "epoch": 2.6, "learning_rate": 4.9062371063975984e-08, "loss": 0.0, "step": 5275 }, { "epoch": 2.6, "logps_train/chosen": -83.51687622070312, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -335.436767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7388458251953125, "rewards_train/margins": 18.907127380371094, "rewards_train/rejected": -20.645973205566406, "step": 5275 }, { "epoch": 2.6, "learning_rate": 4.894519862187596e-08, "loss": 0.0002, "step": 5276 }, { "epoch": 2.6, "logps_train/chosen": -84.70233154296875, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -340.950439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7178895473480225, "rewards_train/margins": 19.132086038589478, "rewards_train/rejected": -20.8499755859375, "step": 5276 }, { "epoch": 2.6, "learning_rate": 4.8828159063564244e-08, "loss": 0.0001, "step": 5277 }, { "epoch": 2.6, "logps_train/chosen": -77.76454162597656, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -312.34735107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5055556297302246, "rewards_train/margins": 17.76702356338501, "rewards_train/rejected": -19.272579193115234, "step": 5277 }, { "epoch": 2.6, "learning_rate": 4.8711252423521085e-08, "loss": 0.0, "step": 5278 }, { "epoch": 2.6, "logps_train/chosen": -77.0279541015625, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -322.116943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.431018590927124, "rewards_train/margins": 18.62628197669983, "rewards_train/rejected": -20.057300567626953, "step": 5278 }, { "epoch": 2.6, "learning_rate": 4.85944787361881e-08, "loss": 0.0, "step": 5279 }, { "epoch": 2.6, "logps_train/chosen": -73.28115844726562, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -305.8028564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.002871036529541, "rewards_train/margins": 17.374484539031982, "rewards_train/rejected": -18.377355575561523, "step": 5279 }, { "epoch": 2.6, "learning_rate": 4.847783803596789e-08, "loss": 0.0, "step": 5280 }, { "epoch": 2.6, "logps_train/chosen": -80.81193542480469, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -328.3428955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7307051420211792, "rewards_train/margins": 18.34762966632843, "rewards_train/rejected": -20.07833480834961, "step": 5280 }, { "epoch": 2.6, "learning_rate": 4.8361330357223173e-08, "loss": 0.0002, "step": 5281 }, { "epoch": 2.6, "logps_train/chosen": -80.15348815917969, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -311.683349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.423943042755127, "rewards_train/margins": 17.475444316864014, "rewards_train/rejected": -18.89938735961914, "step": 5281 }, { "epoch": 2.6, "learning_rate": 4.824495573427817e-08, "loss": 0.0002, "step": 5282 }, { "epoch": 2.6, "logps_train/chosen": -82.72296142578125, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -327.17108154296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7649719715118408, "rewards_train/margins": 18.319422483444214, "rewards_train/rejected": -20.084394454956055, "step": 5282 }, { "epoch": 2.6, "learning_rate": 4.812871420141751e-08, "loss": 0.0, "step": 5283 }, { "epoch": 2.6, "logps_train/chosen": -81.06575775146484, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -324.7833251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6368005275726318, "rewards_train/margins": 18.259695291519165, "rewards_train/rejected": -19.896495819091797, "step": 5283 }, { "epoch": 2.6, "learning_rate": 4.801260579288668e-08, "loss": 0.0, "step": 5284 }, { "epoch": 2.6, "logps_train/chosen": -86.1908187866211, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -330.787841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0585837364196777, "rewards_train/margins": 18.25076723098755, "rewards_train/rejected": -20.309350967407227, "step": 5284 }, { "epoch": 2.6, "learning_rate": 4.7896630542892225e-08, "loss": 0.0001, "step": 5285 }, { "epoch": 2.6, "logps_train/chosen": -81.24414825439453, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -310.321044921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6989021301269531, "rewards_train/margins": 17.249364852905273, "rewards_train/rejected": -18.948266983032227, "step": 5285 }, { "epoch": 2.6, "learning_rate": 4.778078848560108e-08, "loss": 0.0001, "step": 5286 }, { "epoch": 2.6, "logps_train/chosen": -77.0909423828125, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -316.0045166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2226197719573975, "rewards_train/margins": 17.964014291763306, "rewards_train/rejected": -19.186634063720703, "step": 5286 }, { "epoch": 2.6, "learning_rate": 4.7665079655141047e-08, "loss": 0.0, "step": 5287 }, { "epoch": 2.6, "logps_train/chosen": -77.82261657714844, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -324.12286376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3901718854904175, "rewards_train/margins": 18.650534749031067, "rewards_train/rejected": -20.040706634521484, "step": 5287 }, { "epoch": 2.6, "learning_rate": 4.7549504085600766e-08, "loss": 0.0, "step": 5288 }, { "epoch": 2.6, "logps_train/chosen": -70.51339721679688, "logps_train/ref_chosen": -60.5, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -315.1009826660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0028541088104248, "rewards_train/margins": 18.20245862007141, "rewards_train/rejected": -19.205312728881836, "step": 5288 }, { "epoch": 2.6, "learning_rate": 4.7434061811029715e-08, "loss": 0.0, "step": 5289 }, { "epoch": 2.6, "logps_train/chosen": -79.95698547363281, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -326.4855651855469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5865671634674072, "rewards_train/margins": 18.239774465560913, "rewards_train/rejected": -19.82634162902832, "step": 5289 }, { "epoch": 2.6, "learning_rate": 4.731875286543785e-08, "loss": 0.0, "step": 5290 }, { "epoch": 2.6, "logps_train/chosen": -81.97955322265625, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -335.0801696777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.87398099899292, "rewards_train/margins": 19.09028673171997, "rewards_train/rejected": -20.96426773071289, "step": 5290 }, { "epoch": 2.61, "learning_rate": 4.720357728279589e-08, "loss": 0.0, "step": 5291 }, { "epoch": 2.61, "logps_train/chosen": -82.68055725097656, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -320.69219970703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8185932636260986, "rewards_train/margins": 17.708048582077026, "rewards_train/rejected": -19.526641845703125, "step": 5291 }, { "epoch": 2.61, "learning_rate": 4.708853509703548e-08, "loss": 0.0, "step": 5292 }, { "epoch": 2.61, "logps_train/chosen": -79.34671020507812, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -331.9881591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.498489260673523, "rewards_train/margins": 19.017269253730774, "rewards_train/rejected": -20.515758514404297, "step": 5292 }, { "epoch": 2.61, "learning_rate": 4.697362634204882e-08, "loss": 0.0, "step": 5293 }, { "epoch": 2.61, "logps_train/chosen": -81.90071105957031, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -328.68304443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7242023944854736, "rewards_train/margins": 18.18326449394226, "rewards_train/rejected": -19.907466888427734, "step": 5293 }, { "epoch": 2.61, "learning_rate": 4.685885105168863e-08, "loss": 0.0, "step": 5294 }, { "epoch": 2.61, "logps_train/chosen": -85.4826431274414, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -325.7890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9808324575424194, "rewards_train/margins": 17.912676453590393, "rewards_train/rejected": -19.893508911132812, "step": 5294 }, { "epoch": 2.61, "learning_rate": 4.674420925976869e-08, "loss": 0.0, "step": 5295 }, { "epoch": 2.61, "logps_train/chosen": -78.99058532714844, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -318.73199462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4982774257659912, "rewards_train/margins": 18.041033029556274, "rewards_train/rejected": -19.539310455322266, "step": 5295 }, { "epoch": 2.61, "learning_rate": 4.662970100006319e-08, "loss": 0.0002, "step": 5296 }, { "epoch": 2.61, "logps_train/chosen": -80.35371398925781, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -320.2918701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4632035493850708, "rewards_train/margins": 17.915935397148132, "rewards_train/rejected": -19.379138946533203, "step": 5296 }, { "epoch": 2.61, "learning_rate": 4.6515326306307e-08, "loss": 0.0001, "step": 5297 }, { "epoch": 2.61, "logps_train/chosen": -74.04447937011719, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -315.2156066894531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2369186878204346, "rewards_train/margins": 18.246994733810425, "rewards_train/rejected": -19.48391342163086, "step": 5297 }, { "epoch": 2.61, "learning_rate": 4.64010852121956e-08, "loss": 0.0, "step": 5298 }, { "epoch": 2.61, "logps_train/chosen": -85.06501770019531, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -337.986572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9200273752212524, "rewards_train/margins": 19.153924345970154, "rewards_train/rejected": -21.073951721191406, "step": 5298 }, { "epoch": 2.61, "learning_rate": 4.628697775138535e-08, "loss": 0.0, "step": 5299 }, { "epoch": 2.61, "logps_train/chosen": -83.94400024414062, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -328.0009460449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7925448417663574, "rewards_train/margins": 18.402763843536377, "rewards_train/rejected": -20.195308685302734, "step": 5299 }, { "epoch": 2.61, "learning_rate": 4.6173003957493015e-08, "loss": 0.0001, "step": 5300 }, { "epoch": 2.61, "logps_train/chosen": -82.13705444335938, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -339.4100646972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7457857131958008, "rewards_train/margins": 19.286824226379395, "rewards_train/rejected": -21.032609939575195, "step": 5300 }, { "epoch": 2.61, "learning_rate": 4.605916386409603e-08, "loss": 0.0, "step": 5301 }, { "epoch": 2.61, "logps_train/chosen": -80.1837387084961, "logps_train/ref_chosen": -63.46875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -322.85650634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6730127334594727, "rewards_train/margins": 18.08969211578369, "rewards_train/rejected": -19.762704849243164, "step": 5301 }, { "epoch": 2.61, "learning_rate": 4.594545750473244e-08, "loss": 0.0, "step": 5302 }, { "epoch": 2.61, "logps_train/chosen": -81.92935180664062, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -330.02642822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6418116092681885, "rewards_train/margins": 18.616103887557983, "rewards_train/rejected": -20.257915496826172, "step": 5302 }, { "epoch": 2.61, "learning_rate": 4.5831884912900855e-08, "loss": 0.0, "step": 5303 }, { "epoch": 2.61, "logps_train/chosen": -71.54435729980469, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -309.8551330566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9628582000732422, "rewards_train/margins": 17.9892578125, "rewards_train/rejected": -18.952116012573242, "step": 5303 }, { "epoch": 2.61, "learning_rate": 4.571844612206066e-08, "loss": 0.0, "step": 5304 }, { "epoch": 2.61, "logps_train/chosen": -79.94712829589844, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -334.4719543457031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.610484004020691, "rewards_train/margins": 19.047845005989075, "rewards_train/rejected": -20.658329010009766, "step": 5304 }, { "epoch": 2.61, "learning_rate": 4.5605141165631634e-08, "loss": 0.0, "step": 5305 }, { "epoch": 2.61, "logps_train/chosen": -81.09797668457031, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -327.7109069824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7344073057174683, "rewards_train/margins": 18.41334354877472, "rewards_train/rejected": -20.147750854492188, "step": 5305 }, { "epoch": 2.61, "learning_rate": 4.549197007699407e-08, "loss": 0.0, "step": 5306 }, { "epoch": 2.61, "logps_train/chosen": -81.32487487792969, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -324.3742980957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7725509405136108, "rewards_train/margins": 17.866832375526428, "rewards_train/rejected": -19.63938331604004, "step": 5306 }, { "epoch": 2.61, "learning_rate": 4.537893288948913e-08, "loss": 0.0002, "step": 5307 }, { "epoch": 2.61, "logps_train/chosen": -78.05908203125, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -325.6748352050781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4773684740066528, "rewards_train/margins": 18.685330271720886, "rewards_train/rejected": -20.16269874572754, "step": 5307 }, { "epoch": 2.61, "learning_rate": 4.5266029636418234e-08, "loss": 0.0, "step": 5308 }, { "epoch": 2.61, "logps_train/chosen": -76.06385803222656, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -316.63616943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3421039581298828, "rewards_train/margins": 17.775859832763672, "rewards_train/rejected": -19.117963790893555, "step": 5308 }, { "epoch": 2.61, "learning_rate": 4.515326035104333e-08, "loss": 0.0, "step": 5309 }, { "epoch": 2.61, "logps_train/chosen": -82.41951751708984, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -336.09747314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8505945205688477, "rewards_train/margins": 18.830246925354004, "rewards_train/rejected": -20.68084144592285, "step": 5309 }, { "epoch": 2.61, "learning_rate": 4.5040625066587234e-08, "loss": 0.0001, "step": 5310 }, { "epoch": 2.61, "logps_train/chosen": -77.15267944335938, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -326.32794189453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1677583456039429, "rewards_train/margins": 18.728607773780823, "rewards_train/rejected": -19.896366119384766, "step": 5310 }, { "epoch": 2.61, "learning_rate": 4.4928123816232956e-08, "loss": 0.0, "step": 5311 }, { "epoch": 2.61, "logps_train/chosen": -74.75362396240234, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -326.80120849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3196494579315186, "rewards_train/margins": 19.107787370681763, "rewards_train/rejected": -20.42743682861328, "step": 5311 }, { "epoch": 2.62, "learning_rate": 4.4815756633124144e-08, "loss": 0.0, "step": 5312 }, { "epoch": 2.62, "logps_train/chosen": -82.78491973876953, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -332.0521240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6414803266525269, "rewards_train/margins": 18.884045720100403, "rewards_train/rejected": -20.52552604675293, "step": 5312 }, { "epoch": 2.62, "learning_rate": 4.470352355036478e-08, "loss": 0.0, "step": 5313 }, { "epoch": 2.62, "logps_train/chosen": -78.08747863769531, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -333.6397705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1265703439712524, "rewards_train/margins": 19.572123885154724, "rewards_train/rejected": -20.698694229125977, "step": 5313 }, { "epoch": 2.62, "learning_rate": 4.459142460101967e-08, "loss": 0.0, "step": 5314 }, { "epoch": 2.62, "logps_train/chosen": -77.89598083496094, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -330.4395446777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.434861421585083, "rewards_train/margins": 18.8310649394989, "rewards_train/rejected": -20.265926361083984, "step": 5314 }, { "epoch": 2.62, "learning_rate": 4.447945981811391e-08, "loss": 0.0, "step": 5315 }, { "epoch": 2.62, "logps_train/chosen": -83.24394226074219, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -332.1578063964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6531046628952026, "rewards_train/margins": 18.766483426094055, "rewards_train/rejected": -20.419588088989258, "step": 5315 }, { "epoch": 2.62, "learning_rate": 4.436762923463294e-08, "loss": 0.0, "step": 5316 }, { "epoch": 2.62, "logps_train/chosen": -82.44184875488281, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -319.61920166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.887495756149292, "rewards_train/margins": 17.73282551765442, "rewards_train/rejected": -19.62032127380371, "step": 5316 }, { "epoch": 2.62, "learning_rate": 4.425593288352308e-08, "loss": 0.0004, "step": 5317 }, { "epoch": 2.62, "logps_train/chosen": -82.052490234375, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -328.4434814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9014155864715576, "rewards_train/margins": 18.43903088569641, "rewards_train/rejected": -20.34044647216797, "step": 5317 }, { "epoch": 2.62, "learning_rate": 4.414437079769045e-08, "loss": 0.0, "step": 5318 }, { "epoch": 2.62, "logps_train/chosen": -82.21837615966797, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -322.2240905761719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9461541175842285, "rewards_train/margins": 17.997445583343506, "rewards_train/rejected": -19.943599700927734, "step": 5318 }, { "epoch": 2.62, "learning_rate": 4.403294301000227e-08, "loss": 0.0, "step": 5319 }, { "epoch": 2.62, "logps_train/chosen": -79.47309112548828, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -321.08697509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4590036869049072, "rewards_train/margins": 18.100032567977905, "rewards_train/rejected": -19.559036254882812, "step": 5319 }, { "epoch": 2.62, "learning_rate": 4.392164955328581e-08, "loss": 0.0, "step": 5320 }, { "epoch": 2.62, "logps_train/chosen": -81.06974792480469, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -327.6156921386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4723800420761108, "rewards_train/margins": 18.565507531166077, "rewards_train/rejected": -20.037887573242188, "step": 5320 }, { "epoch": 2.62, "learning_rate": 4.381049046032887e-08, "loss": 0.0, "step": 5321 }, { "epoch": 2.62, "logps_train/chosen": -82.4400634765625, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -318.34716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9092408418655396, "rewards_train/margins": 17.732894778251648, "rewards_train/rejected": -19.642135620117188, "step": 5321 }, { "epoch": 2.62, "learning_rate": 4.369946576387978e-08, "loss": 0.0, "step": 5322 }, { "epoch": 2.62, "logps_train/chosen": -79.12788391113281, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -320.158447265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5742629766464233, "rewards_train/margins": 18.005353569984436, "rewards_train/rejected": -19.57961654663086, "step": 5322 }, { "epoch": 2.62, "learning_rate": 4.3588575496647016e-08, "loss": 0.0, "step": 5323 }, { "epoch": 2.62, "logps_train/chosen": -76.47373962402344, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -327.9559631347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2238872051239014, "rewards_train/margins": 18.82483410835266, "rewards_train/rejected": -20.048721313476562, "step": 5323 }, { "epoch": 2.62, "learning_rate": 4.3477819691299765e-08, "loss": 0.0, "step": 5324 }, { "epoch": 2.62, "logps_train/chosen": -76.12295532226562, "logps_train/ref_chosen": -63.15625, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -317.27191162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2974026203155518, "rewards_train/margins": 18.191797971725464, "rewards_train/rejected": -19.489200592041016, "step": 5324 }, { "epoch": 2.62, "learning_rate": 4.336719838046715e-08, "loss": 0.0, "step": 5325 }, { "epoch": 2.62, "logps_train/chosen": -76.14886474609375, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -321.15301513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3414490222930908, "rewards_train/margins": 18.365158796310425, "rewards_train/rejected": -19.706607818603516, "step": 5325 }, { "epoch": 2.62, "learning_rate": 4.325671159673933e-08, "loss": 0.0, "step": 5326 }, { "epoch": 2.62, "logps_train/chosen": -80.27064514160156, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -333.16943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.601380467414856, "rewards_train/margins": 18.70521056652069, "rewards_train/rejected": -20.306591033935547, "step": 5326 }, { "epoch": 2.62, "learning_rate": 4.3146359372666194e-08, "loss": 0.0, "step": 5327 }, { "epoch": 2.62, "logps_train/chosen": -76.32894897460938, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -333.16546630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3703458309173584, "rewards_train/margins": 19.275985956192017, "rewards_train/rejected": -20.646331787109375, "step": 5327 }, { "epoch": 2.62, "learning_rate": 4.3036141740758256e-08, "loss": 0.0, "step": 5328 }, { "epoch": 2.62, "logps_train/chosen": -84.4432601928711, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -335.7442626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9975485801696777, "rewards_train/margins": 18.66271448135376, "rewards_train/rejected": -20.660263061523438, "step": 5328 }, { "epoch": 2.62, "learning_rate": 4.292605873348665e-08, "loss": 0.0, "step": 5329 }, { "epoch": 2.62, "logps_train/chosen": -79.27620697021484, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -326.1436462402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4409992694854736, "rewards_train/margins": 18.617213010787964, "rewards_train/rejected": -20.058212280273438, "step": 5329 }, { "epoch": 2.62, "learning_rate": 4.2816110383282144e-08, "loss": 0.0001, "step": 5330 }, { "epoch": 2.62, "logps_train/chosen": -75.24055480957031, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -321.48712158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2750563621520996, "rewards_train/margins": 18.445875644683838, "rewards_train/rejected": -19.720932006835938, "step": 5330 }, { "epoch": 2.62, "learning_rate": 4.270629672253651e-08, "loss": 0.0, "step": 5331 }, { "epoch": 2.62, "logps_train/chosen": -83.26324462890625, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -312.0339660644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9406793117523193, "rewards_train/margins": 17.226194620132446, "rewards_train/rejected": -19.166873931884766, "step": 5331 }, { "epoch": 2.63, "learning_rate": 4.259661778360174e-08, "loss": 0.0, "step": 5332 }, { "epoch": 2.63, "logps_train/chosen": -75.15505981445312, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -315.1890869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1229275465011597, "rewards_train/margins": 18.23064911365509, "rewards_train/rejected": -19.35357666015625, "step": 5332 }, { "epoch": 2.63, "learning_rate": 4.2487073598789615e-08, "loss": 0.0, "step": 5333 }, { "epoch": 2.63, "logps_train/chosen": -81.20254516601562, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -335.7290344238281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5352931022644043, "rewards_train/margins": 19.513243198394775, "rewards_train/rejected": -21.04853630065918, "step": 5333 }, { "epoch": 2.63, "learning_rate": 4.237766420037292e-08, "loss": 0.0, "step": 5334 }, { "epoch": 2.63, "logps_train/chosen": -81.08377075195312, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -338.34954833984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3279087543487549, "rewards_train/margins": 19.639124631881714, "rewards_train/rejected": -20.96703338623047, "step": 5334 }, { "epoch": 2.63, "learning_rate": 4.226838962058432e-08, "loss": 0.0, "step": 5335 }, { "epoch": 2.63, "logps_train/chosen": -79.82206726074219, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -323.96844482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.725175380706787, "rewards_train/margins": 18.16746950149536, "rewards_train/rejected": -19.89264488220215, "step": 5335 }, { "epoch": 2.63, "learning_rate": 4.215924989161662e-08, "loss": 0.0, "step": 5336 }, { "epoch": 2.63, "logps_train/chosen": -69.07379913330078, "logps_train/ref_chosen": -60.09375, "logps_train/ref_rejected": -118.0625, "logps_train/rejected": -298.16357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8972727060317993, "rewards_train/margins": 17.112346053123474, "rewards_train/rejected": -18.009618759155273, "step": 5336 }, { "epoch": 2.63, "learning_rate": 4.205024504562349e-08, "loss": 0.0, "step": 5337 }, { "epoch": 2.63, "logps_train/chosen": -82.270263671875, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -318.7366943359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6986079216003418, "rewards_train/margins": 17.664804935455322, "rewards_train/rejected": -19.363412857055664, "step": 5337 }, { "epoch": 2.63, "learning_rate": 4.1941375114718233e-08, "loss": 0.0, "step": 5338 }, { "epoch": 2.63, "logps_train/chosen": -85.2786636352539, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -342.088134765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8366068601608276, "rewards_train/margins": 19.243887066841125, "rewards_train/rejected": -21.080493927001953, "step": 5338 }, { "epoch": 2.63, "learning_rate": 4.183264013097476e-08, "loss": 0.0, "step": 5339 }, { "epoch": 2.63, "logps_train/chosen": -84.06055450439453, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -327.61114501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.110401153564453, "rewards_train/margins": 18.416873931884766, "rewards_train/rejected": -20.52727508544922, "step": 5339 }, { "epoch": 2.63, "learning_rate": 4.1724040126426996e-08, "loss": 0.0, "step": 5340 }, { "epoch": 2.63, "logps_train/chosen": -83.21903991699219, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -334.97613525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8659467697143555, "rewards_train/margins": 18.87810230255127, "rewards_train/rejected": -20.744049072265625, "step": 5340 }, { "epoch": 2.63, "learning_rate": 4.161557513306946e-08, "loss": 0.0, "step": 5341 }, { "epoch": 2.63, "logps_train/chosen": -71.464599609375, "logps_train/ref_chosen": -61.0625, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -315.6622619628906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0414791107177734, "rewards_train/margins": 18.237150192260742, "rewards_train/rejected": -19.278629302978516, "step": 5341 }, { "epoch": 2.63, "learning_rate": 4.150724518285659e-08, "loss": 0.0, "step": 5342 }, { "epoch": 2.63, "logps_train/chosen": -76.6818618774414, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -120.125, "logps_train/rejected": -317.318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3787826299667358, "rewards_train/margins": 18.34279978275299, "rewards_train/rejected": -19.721582412719727, "step": 5342 }, { "epoch": 2.63, "learning_rate": 4.1399050307702964e-08, "loss": 0.0, "step": 5343 }, { "epoch": 2.63, "logps_train/chosen": -83.9497299194336, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -335.32562255859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6699237823486328, "rewards_train/margins": 18.863615036010742, "rewards_train/rejected": -20.533538818359375, "step": 5343 }, { "epoch": 2.63, "learning_rate": 4.129099053948376e-08, "loss": 0.0, "step": 5344 }, { "epoch": 2.63, "logps_train/chosen": -80.18209838867188, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -320.43450927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6193815469741821, "rewards_train/margins": 17.89712083339691, "rewards_train/rejected": -19.516502380371094, "step": 5344 }, { "epoch": 2.63, "learning_rate": 4.118306591003395e-08, "loss": 0.0, "step": 5345 }, { "epoch": 2.63, "logps_train/chosen": -79.16145324707031, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -328.2376403808594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4722249507904053, "rewards_train/margins": 18.654762506484985, "rewards_train/rejected": -20.12698745727539, "step": 5345 }, { "epoch": 2.63, "learning_rate": 4.107527645114889e-08, "loss": 0.0, "step": 5346 }, { "epoch": 2.63, "logps_train/chosen": -77.15699768066406, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -326.2675476074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4216080904006958, "rewards_train/margins": 18.678340792655945, "rewards_train/rejected": -20.09994888305664, "step": 5346 }, { "epoch": 2.63, "learning_rate": 4.0967622194584106e-08, "loss": 0.0, "step": 5347 }, { "epoch": 2.63, "logps_train/chosen": -84.044677734375, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -329.4211730957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.836597204208374, "rewards_train/margins": 18.644582986831665, "rewards_train/rejected": -20.48118019104004, "step": 5347 }, { "epoch": 2.63, "learning_rate": 4.086010317205535e-08, "loss": 0.0, "step": 5348 }, { "epoch": 2.63, "logps_train/chosen": -79.22708892822266, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -321.38677978515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.508011817932129, "rewards_train/margins": 17.615090370178223, "rewards_train/rejected": -19.12310218811035, "step": 5348 }, { "epoch": 2.63, "learning_rate": 4.0752719415238335e-08, "loss": 0.0001, "step": 5349 }, { "epoch": 2.63, "logps_train/chosen": -80.33731842041016, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -336.02093505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5429611206054688, "rewards_train/margins": 19.090824127197266, "rewards_train/rejected": -20.633785247802734, "step": 5349 }, { "epoch": 2.63, "learning_rate": 4.064547095576904e-08, "loss": 0.0001, "step": 5350 }, { "epoch": 2.63, "logps_train/chosen": -83.08122253417969, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -320.769287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.942448377609253, "rewards_train/margins": 17.61133122444153, "rewards_train/rejected": -19.55377960205078, "step": 5350 }, { "epoch": 2.63, "learning_rate": 4.053835782524356e-08, "loss": 0.0, "step": 5351 }, { "epoch": 2.63, "logps_train/chosen": -80.01181030273438, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -328.6425476074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4084076881408691, "rewards_train/margins": 19.117372035980225, "rewards_train/rejected": -20.525779724121094, "step": 5351 }, { "epoch": 2.64, "learning_rate": 4.043138005521829e-08, "loss": 0.0, "step": 5352 }, { "epoch": 2.64, "logps_train/chosen": -84.63640594482422, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -328.56219482421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8236992359161377, "rewards_train/margins": 18.218605756759644, "rewards_train/rejected": -20.04230499267578, "step": 5352 }, { "epoch": 2.64, "learning_rate": 4.0324537677209505e-08, "loss": 0.0, "step": 5353 }, { "epoch": 2.64, "logps_train/chosen": -76.93189239501953, "logps_train/ref_chosen": -60.625, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -311.80157470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.629615306854248, "rewards_train/margins": 17.561771869659424, "rewards_train/rejected": -19.191387176513672, "step": 5353 }, { "epoch": 2.64, "learning_rate": 4.0217830722693694e-08, "loss": 0.0, "step": 5354 }, { "epoch": 2.64, "logps_train/chosen": -78.49851989746094, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -329.1383056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.41694176197052, "rewards_train/margins": 18.797036051750183, "rewards_train/rejected": -20.213977813720703, "step": 5354 }, { "epoch": 2.64, "learning_rate": 4.0111259223107374e-08, "loss": 0.0, "step": 5355 }, { "epoch": 2.64, "logps_train/chosen": -79.37950897216797, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -330.20745849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4992300271987915, "rewards_train/margins": 19.06643807888031, "rewards_train/rejected": -20.5656681060791, "step": 5355 }, { "epoch": 2.64, "learning_rate": 4.000482320984738e-08, "loss": 0.0, "step": 5356 }, { "epoch": 2.64, "logps_train/chosen": -78.77262878417969, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -323.69281005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3838543891906738, "rewards_train/margins": 18.637868404388428, "rewards_train/rejected": -20.0217227935791, "step": 5356 }, { "epoch": 2.64, "learning_rate": 3.9898522714270464e-08, "loss": 0.0, "step": 5357 }, { "epoch": 2.64, "logps_train/chosen": -81.36921691894531, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -337.0928955078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5044515132904053, "rewards_train/margins": 19.24712586402893, "rewards_train/rejected": -20.751577377319336, "step": 5357 }, { "epoch": 2.64, "learning_rate": 3.979235776769324e-08, "loss": 0.0, "step": 5358 }, { "epoch": 2.64, "logps_train/chosen": -82.10285949707031, "logps_train/ref_chosen": -68.375, "logps_train/ref_rejected": -132.875, "logps_train/rejected": -344.42816162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3749347925186157, "rewards_train/margins": 19.782236456871033, "rewards_train/rejected": -21.15717124938965, "step": 5358 }, { "epoch": 2.64, "learning_rate": 3.9686328401392966e-08, "loss": 0.0, "step": 5359 }, { "epoch": 2.64, "logps_train/chosen": -73.36634063720703, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -315.1876220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9883434772491455, "rewards_train/margins": 18.08910822868347, "rewards_train/rejected": -19.077451705932617, "step": 5359 }, { "epoch": 2.64, "learning_rate": 3.958043464660638e-08, "loss": 0.0001, "step": 5360 }, { "epoch": 2.64, "logps_train/chosen": -83.31259155273438, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -322.1181335449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8814060688018799, "rewards_train/margins": 17.747498273849487, "rewards_train/rejected": -19.628904342651367, "step": 5360 }, { "epoch": 2.64, "learning_rate": 3.9474676534530515e-08, "loss": 0.0, "step": 5361 }, { "epoch": 2.64, "logps_train/chosen": -76.45984649658203, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -333.541015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2133185863494873, "rewards_train/margins": 19.29996418952942, "rewards_train/rejected": -20.513282775878906, "step": 5361 }, { "epoch": 2.64, "learning_rate": 3.936905409632241e-08, "loss": 0.0, "step": 5362 }, { "epoch": 2.64, "logps_train/chosen": -77.3185043334961, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -322.29754638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.374086856842041, "rewards_train/margins": 18.745023250579834, "rewards_train/rejected": -20.119110107421875, "step": 5362 }, { "epoch": 2.64, "learning_rate": 3.9263567363099325e-08, "loss": 0.0, "step": 5363 }, { "epoch": 2.64, "logps_train/chosen": -83.34675598144531, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -316.8834533691406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6728599071502686, "rewards_train/margins": 17.79361081123352, "rewards_train/rejected": -19.46647071838379, "step": 5363 }, { "epoch": 2.64, "learning_rate": 3.915821636593819e-08, "loss": 0.0, "step": 5364 }, { "epoch": 2.64, "logps_train/chosen": -79.23674774169922, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -323.2351379394531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5907893180847168, "rewards_train/margins": 18.270126819610596, "rewards_train/rejected": -19.860916137695312, "step": 5364 }, { "epoch": 2.64, "learning_rate": 3.9053001135876065e-08, "loss": 0.0, "step": 5365 }, { "epoch": 2.64, "logps_train/chosen": -81.37406158447266, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -332.9884033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.752933382987976, "rewards_train/margins": 18.74131977558136, "rewards_train/rejected": -20.494253158569336, "step": 5365 }, { "epoch": 2.64, "learning_rate": 3.894792170391037e-08, "loss": 0.0, "step": 5366 }, { "epoch": 2.64, "logps_train/chosen": -77.56383514404297, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -327.0555419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3697378635406494, "rewards_train/margins": 18.705201864242554, "rewards_train/rejected": -20.074939727783203, "step": 5366 }, { "epoch": 2.64, "learning_rate": 3.884297810099779e-08, "loss": 0.0, "step": 5367 }, { "epoch": 2.64, "logps_train/chosen": -85.8167953491211, "logps_train/ref_chosen": -68.8125, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -340.43817138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7022120952606201, "rewards_train/margins": 19.318753480911255, "rewards_train/rejected": -21.020965576171875, "step": 5367 }, { "epoch": 2.64, "learning_rate": 3.873817035805571e-08, "loss": 0.0, "step": 5368 }, { "epoch": 2.64, "logps_train/chosen": -84.61032104492188, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -334.8514404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8577120304107666, "rewards_train/margins": 18.745792150497437, "rewards_train/rejected": -20.603504180908203, "step": 5368 }, { "epoch": 2.64, "learning_rate": 3.863349850596126e-08, "loss": 0.0, "step": 5369 }, { "epoch": 2.64, "logps_train/chosen": -84.03885650634766, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -323.0723876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.072049617767334, "rewards_train/margins": 17.98978090286255, "rewards_train/rejected": -20.061830520629883, "step": 5369 }, { "epoch": 2.64, "learning_rate": 3.852896257555116e-08, "loss": 0.0001, "step": 5370 }, { "epoch": 2.64, "logps_train/chosen": -76.19720458984375, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -322.7977294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2987494468688965, "rewards_train/margins": 18.16583490371704, "rewards_train/rejected": -19.464584350585938, "step": 5370 }, { "epoch": 2.64, "learning_rate": 3.842456259762267e-08, "loss": 0.0, "step": 5371 }, { "epoch": 2.64, "logps_train/chosen": -80.08660888671875, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -332.1156005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.55470609664917, "rewards_train/margins": 18.694992542266846, "rewards_train/rejected": -20.249698638916016, "step": 5371 }, { "epoch": 2.65, "learning_rate": 3.832029860293262e-08, "loss": 0.0, "step": 5372 }, { "epoch": 2.65, "logps_train/chosen": -82.29643249511719, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -336.04742431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.546000361442566, "rewards_train/margins": 19.050929188728333, "rewards_train/rejected": -20.5969295501709, "step": 5372 }, { "epoch": 2.65, "learning_rate": 3.821617062219784e-08, "loss": 0.0, "step": 5373 }, { "epoch": 2.65, "logps_train/chosen": -77.01301574707031, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -332.1864318847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2991044521331787, "rewards_train/margins": 19.08477234840393, "rewards_train/rejected": -20.38387680053711, "step": 5373 }, { "epoch": 2.65, "learning_rate": 3.8112178686095345e-08, "loss": 0.0, "step": 5374 }, { "epoch": 2.65, "logps_train/chosen": -79.59048461914062, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -337.79901123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6092928647994995, "rewards_train/margins": 19.212013363838196, "rewards_train/rejected": -20.821306228637695, "step": 5374 }, { "epoch": 2.65, "learning_rate": 3.8008322825261615e-08, "loss": 0.0, "step": 5375 }, { "epoch": 2.65, "logps_train/chosen": -83.14165496826172, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -326.9947814941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7634334564208984, "rewards_train/margins": 18.39034080505371, "rewards_train/rejected": -20.15377426147461, "step": 5375 }, { "epoch": 2.65, "learning_rate": 3.7904603070293474e-08, "loss": 0.0, "step": 5376 }, { "epoch": 2.65, "logps_train/chosen": -75.591064453125, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -318.5113525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2916259765625, "rewards_train/margins": 18.227577209472656, "rewards_train/rejected": -19.519203186035156, "step": 5376 }, { "epoch": 2.65, "learning_rate": 3.780101945174724e-08, "loss": 0.0, "step": 5377 }, { "epoch": 2.65, "logps_train/chosen": -80.4289321899414, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -331.209228515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5014381408691406, "rewards_train/margins": 18.74848747253418, "rewards_train/rejected": -20.24992561340332, "step": 5377 }, { "epoch": 2.65, "learning_rate": 3.769757200013962e-08, "loss": 0.0, "step": 5378 }, { "epoch": 2.65, "logps_train/chosen": -81.34619903564453, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -326.8304748535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6212899684906006, "rewards_train/margins": 18.797109842300415, "rewards_train/rejected": -20.418399810791016, "step": 5378 }, { "epoch": 2.65, "learning_rate": 3.759426074594674e-08, "loss": 0.0, "step": 5379 }, { "epoch": 2.65, "logps_train/chosen": -82.3970947265625, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -328.9134521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7272342443466187, "rewards_train/margins": 18.36264955997467, "rewards_train/rejected": -20.08988380432129, "step": 5379 }, { "epoch": 2.65, "learning_rate": 3.74910857196048e-08, "loss": 0.0, "step": 5380 }, { "epoch": 2.65, "logps_train/chosen": -85.35475158691406, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -338.471435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9673601388931274, "rewards_train/margins": 19.132808089256287, "rewards_train/rejected": -21.100168228149414, "step": 5380 }, { "epoch": 2.65, "learning_rate": 3.7388046951510046e-08, "loss": 0.0, "step": 5381 }, { "epoch": 2.65, "logps_train/chosen": -78.67127227783203, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -317.6101989746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4766732454299927, "rewards_train/margins": 17.881616473197937, "rewards_train/rejected": -19.35828971862793, "step": 5381 }, { "epoch": 2.65, "learning_rate": 3.7285144472018135e-08, "loss": 0.0001, "step": 5382 }, { "epoch": 2.65, "logps_train/chosen": -79.00191497802734, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -326.5637512207031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6780720949172974, "rewards_train/margins": 18.56824481487274, "rewards_train/rejected": -20.24631690979004, "step": 5382 }, { "epoch": 2.65, "learning_rate": 3.718237831144494e-08, "loss": 0.0, "step": 5383 }, { "epoch": 2.65, "logps_train/chosen": -78.60710144042969, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -323.98236083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5669605731964111, "rewards_train/margins": 18.36882472038269, "rewards_train/rejected": -19.9357852935791, "step": 5383 }, { "epoch": 2.65, "learning_rate": 3.7079748500066233e-08, "loss": 0.0, "step": 5384 }, { "epoch": 2.65, "logps_train/chosen": -86.4675064086914, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -333.63226318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.013620376586914, "rewards_train/margins": 18.425386428833008, "rewards_train/rejected": -20.439006805419922, "step": 5384 }, { "epoch": 2.65, "learning_rate": 3.6977255068117105e-08, "loss": 0.0, "step": 5385 }, { "epoch": 2.65, "logps_train/chosen": -85.0050048828125, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -332.4443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7851685285568237, "rewards_train/margins": 18.775769352912903, "rewards_train/rejected": -20.560937881469727, "step": 5385 }, { "epoch": 2.65, "learning_rate": 3.687489804579308e-08, "loss": 0.0002, "step": 5386 }, { "epoch": 2.65, "logps_train/chosen": -83.42352294921875, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -332.2854919433594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.989764928817749, "rewards_train/margins": 18.42306160926819, "rewards_train/rejected": -20.412826538085938, "step": 5386 }, { "epoch": 2.65, "learning_rate": 3.677267746324919e-08, "loss": 0.0001, "step": 5387 }, { "epoch": 2.65, "logps_train/chosen": -80.73249053955078, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -318.94097900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7366034984588623, "rewards_train/margins": 17.59445881843567, "rewards_train/rejected": -19.33106231689453, "step": 5387 }, { "epoch": 2.65, "learning_rate": 3.667059335060013e-08, "loss": 0.0, "step": 5388 }, { "epoch": 2.65, "logps_train/chosen": -84.22457885742188, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -325.61962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8525359630584717, "rewards_train/margins": 18.0765163898468, "rewards_train/rejected": -19.929052352905273, "step": 5388 }, { "epoch": 2.65, "learning_rate": 3.656864573792085e-08, "loss": 0.0, "step": 5389 }, { "epoch": 2.65, "logps_train/chosen": -76.72675323486328, "logps_train/ref_chosen": -62.46875, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -315.18072509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4254584312438965, "rewards_train/margins": 17.963510990142822, "rewards_train/rejected": -19.38896942138672, "step": 5389 }, { "epoch": 2.65, "learning_rate": 3.6466834655245635e-08, "loss": 0.0001, "step": 5390 }, { "epoch": 2.65, "logps_train/chosen": -81.82551574707031, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -328.8832092285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.529524326324463, "rewards_train/margins": 18.430280208587646, "rewards_train/rejected": -19.95980453491211, "step": 5390 }, { "epoch": 2.65, "learning_rate": 3.636516013256874e-08, "loss": 0.0, "step": 5391 }, { "epoch": 2.65, "logps_train/chosen": -80.96870422363281, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -327.87908935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5634721517562866, "rewards_train/margins": 18.83717978000641, "rewards_train/rejected": -20.400651931762695, "step": 5391 }, { "epoch": 2.65, "learning_rate": 3.626362219984408e-08, "loss": 0.0, "step": 5392 }, { "epoch": 2.65, "logps_train/chosen": -81.14778137207031, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -333.8669738769531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6464675664901733, "rewards_train/margins": 18.93905794620514, "rewards_train/rejected": -20.585525512695312, "step": 5392 }, { "epoch": 2.66, "learning_rate": 3.616222088698556e-08, "loss": 0.0, "step": 5393 }, { "epoch": 2.66, "logps_train/chosen": -79.6625747680664, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -316.9449462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5680150985717773, "rewards_train/margins": 17.659193992614746, "rewards_train/rejected": -19.227209091186523, "step": 5393 }, { "epoch": 2.66, "learning_rate": 3.606095622386668e-08, "loss": 0.0, "step": 5394 }, { "epoch": 2.66, "logps_train/chosen": -78.29039001464844, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -322.7910461425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5110214948654175, "rewards_train/margins": 18.571452260017395, "rewards_train/rejected": -20.082473754882812, "step": 5394 }, { "epoch": 2.66, "learning_rate": 3.595982824032057e-08, "loss": 0.0, "step": 5395 }, { "epoch": 2.66, "logps_train/chosen": -81.71790313720703, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -331.95697021484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5187630653381348, "rewards_train/margins": 18.7449049949646, "rewards_train/rejected": -20.263668060302734, "step": 5395 }, { "epoch": 2.66, "learning_rate": 3.5858836966140336e-08, "loss": 0.0, "step": 5396 }, { "epoch": 2.66, "logps_train/chosen": -86.67891693115234, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -330.27972412109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1464076042175293, "rewards_train/margins": 18.305588245391846, "rewards_train/rejected": -20.451995849609375, "step": 5396 }, { "epoch": 2.66, "learning_rate": 3.5757982431078634e-08, "loss": 0.0, "step": 5397 }, { "epoch": 2.66, "logps_train/chosen": -75.45210266113281, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -327.8785400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1048784255981445, "rewards_train/margins": 18.857096672058105, "rewards_train/rejected": -19.96197509765625, "step": 5397 }, { "epoch": 2.66, "learning_rate": 3.565726466484798e-08, "loss": 0.0, "step": 5398 }, { "epoch": 2.66, "logps_train/chosen": -73.20414733886719, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -325.61944580078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9682421684265137, "rewards_train/margins": 18.882568836212158, "rewards_train/rejected": -19.850811004638672, "step": 5398 }, { "epoch": 2.66, "learning_rate": 3.5556683697120285e-08, "loss": 0.0, "step": 5399 }, { "epoch": 2.66, "logps_train/chosen": -76.19642639160156, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -312.6890869140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1966936588287354, "rewards_train/margins": 17.775245428085327, "rewards_train/rejected": -18.971939086914062, "step": 5399 }, { "epoch": 2.66, "learning_rate": 3.545623955752758e-08, "loss": 0.0001, "step": 5400 }, { "epoch": 2.66, "logps_train/chosen": -80.85757446289062, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -324.95526123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5293617248535156, "rewards_train/margins": 18.296241760253906, "rewards_train/rejected": -19.825603485107422, "step": 5400 }, { "epoch": 2.66, "learning_rate": 3.5355932275661414e-08, "loss": 0.0, "step": 5401 }, { "epoch": 2.66, "logps_train/chosen": -90.83403015136719, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -338.63372802734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.348344564437866, "rewards_train/margins": 18.527624368667603, "rewards_train/rejected": -20.87596893310547, "step": 5401 }, { "epoch": 2.66, "learning_rate": 3.525576188107282e-08, "loss": 0.0, "step": 5402 }, { "epoch": 2.66, "logps_train/chosen": -75.26070404052734, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -321.35174560546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2132774591445923, "rewards_train/margins": 18.461058020591736, "rewards_train/rejected": -19.674335479736328, "step": 5402 }, { "epoch": 2.66, "learning_rate": 3.5155728403272796e-08, "loss": 0.0, "step": 5403 }, { "epoch": 2.66, "logps_train/chosen": -76.27362060546875, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -315.41650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4240906238555908, "rewards_train/margins": 17.888947248458862, "rewards_train/rejected": -19.313037872314453, "step": 5403 }, { "epoch": 2.66, "learning_rate": 3.5055831871731775e-08, "loss": 0.0, "step": 5404 }, { "epoch": 2.66, "logps_train/chosen": -87.18614196777344, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -335.5435791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0478618144989014, "rewards_train/margins": 18.580323457717896, "rewards_train/rejected": -20.628185272216797, "step": 5404 }, { "epoch": 2.66, "learning_rate": 3.49560723158801e-08, "loss": 0.0, "step": 5405 }, { "epoch": 2.66, "logps_train/chosen": -74.0438232421875, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -124.0625, "logps_train/rejected": -316.22625732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1809449195861816, "rewards_train/margins": 18.0354323387146, "rewards_train/rejected": -19.21637725830078, "step": 5405 }, { "epoch": 2.66, "learning_rate": 3.485644976510754e-08, "loss": 0.0, "step": 5406 }, { "epoch": 2.66, "logps_train/chosen": -82.21406555175781, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -323.8441162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7393258810043335, "rewards_train/margins": 18.06119978427887, "rewards_train/rejected": -19.800525665283203, "step": 5406 }, { "epoch": 2.66, "learning_rate": 3.475696424876345e-08, "loss": 0.0, "step": 5407 }, { "epoch": 2.66, "logps_train/chosen": -76.81277465820312, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -331.9468994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1278595924377441, "rewards_train/margins": 19.11639165878296, "rewards_train/rejected": -20.244251251220703, "step": 5407 }, { "epoch": 2.66, "learning_rate": 3.4657615796157116e-08, "loss": 0.0, "step": 5408 }, { "epoch": 2.66, "logps_train/chosen": -74.05351257324219, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -325.1996154785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1554979085922241, "rewards_train/margins": 19.088635087013245, "rewards_train/rejected": -20.24413299560547, "step": 5408 }, { "epoch": 2.66, "learning_rate": 3.455840443655722e-08, "loss": 0.0, "step": 5409 }, { "epoch": 2.66, "logps_train/chosen": -78.1685791015625, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -317.46441650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5546996593475342, "rewards_train/margins": 18.11464238166809, "rewards_train/rejected": -19.669342041015625, "step": 5409 }, { "epoch": 2.66, "learning_rate": 3.445933019919195e-08, "loss": 0.0001, "step": 5410 }, { "epoch": 2.66, "logps_train/chosen": -86.46849822998047, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -317.9639587402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1533684730529785, "rewards_train/margins": 17.383212566375732, "rewards_train/rejected": -19.53658103942871, "step": 5410 }, { "epoch": 2.66, "learning_rate": 3.436039311324945e-08, "loss": 0.0001, "step": 5411 }, { "epoch": 2.66, "logps_train/chosen": -78.26371765136719, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -320.34747314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4936572313308716, "rewards_train/margins": 18.416775584220886, "rewards_train/rejected": -19.910432815551758, "step": 5411 }, { "epoch": 2.66, "learning_rate": 3.426159320787719e-08, "loss": 0.0, "step": 5412 }, { "epoch": 2.66, "logps_train/chosen": -77.10221862792969, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -324.7337646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2676435708999634, "rewards_train/margins": 18.628878951072693, "rewards_train/rejected": -19.896522521972656, "step": 5412 }, { "epoch": 2.67, "learning_rate": 3.416293051218222e-08, "loss": 0.0, "step": 5413 }, { "epoch": 2.67, "logps_train/chosen": -78.15018463134766, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -322.36981201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.201810359954834, "rewards_train/margins": 18.465542316436768, "rewards_train/rejected": -19.6673526763916, "step": 5413 }, { "epoch": 2.67, "learning_rate": 3.4064405055231227e-08, "loss": 0.0, "step": 5414 }, { "epoch": 2.67, "logps_train/chosen": -82.87256622314453, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -340.4400939941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6801763772964478, "rewards_train/margins": 19.552993178367615, "rewards_train/rejected": -21.233169555664062, "step": 5414 }, { "epoch": 2.67, "learning_rate": 3.3966016866050603e-08, "loss": 0.0, "step": 5415 }, { "epoch": 2.67, "logps_train/chosen": -75.16877746582031, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -325.445068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.218928575515747, "rewards_train/margins": 18.838663816452026, "rewards_train/rejected": -20.057592391967773, "step": 5415 }, { "epoch": 2.67, "learning_rate": 3.3867765973626115e-08, "loss": 0.0, "step": 5416 }, { "epoch": 2.67, "logps_train/chosen": -82.34815979003906, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -319.96875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7654552459716797, "rewards_train/margins": 17.930686950683594, "rewards_train/rejected": -19.696142196655273, "step": 5416 }, { "epoch": 2.67, "learning_rate": 3.376965240690299e-08, "loss": 0.0, "step": 5417 }, { "epoch": 2.67, "logps_train/chosen": -81.8028335571289, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -119.5, "logps_train/rejected": -312.6504211425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7925386428833008, "rewards_train/margins": 17.523385047912598, "rewards_train/rejected": -19.3159236907959, "step": 5417 }, { "epoch": 2.67, "learning_rate": 3.3671676194786504e-08, "loss": 0.0001, "step": 5418 }, { "epoch": 2.67, "logps_train/chosen": -76.73797607421875, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -315.37615966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.403387188911438, "rewards_train/margins": 17.93989360332489, "rewards_train/rejected": -19.343280792236328, "step": 5418 }, { "epoch": 2.67, "learning_rate": 3.357383736614067e-08, "loss": 0.0001, "step": 5419 }, { "epoch": 2.67, "logps_train/chosen": -79.3996353149414, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -329.5752258300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2177228927612305, "rewards_train/margins": 18.838189125061035, "rewards_train/rejected": -20.055912017822266, "step": 5419 }, { "epoch": 2.67, "learning_rate": 3.3476135949789705e-08, "loss": 0.0, "step": 5420 }, { "epoch": 2.67, "logps_train/chosen": -80.74525451660156, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -118.5625, "logps_train/rejected": -309.96966552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6262832880020142, "rewards_train/margins": 17.512089133262634, "rewards_train/rejected": -19.13837242126465, "step": 5420 }, { "epoch": 2.67, "learning_rate": 3.337857197451721e-08, "loss": 0.0, "step": 5421 }, { "epoch": 2.67, "logps_train/chosen": -77.06903839111328, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -328.73779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3051458597183228, "rewards_train/margins": 18.678889632225037, "rewards_train/rejected": -19.98403549194336, "step": 5421 }, { "epoch": 2.67, "learning_rate": 3.328114546906591e-08, "loss": 0.0, "step": 5422 }, { "epoch": 2.67, "logps_train/chosen": -77.87681579589844, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -120.5625, "logps_train/rejected": -313.4940490722656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5121440887451172, "rewards_train/margins": 17.782184600830078, "rewards_train/rejected": -19.294328689575195, "step": 5422 }, { "epoch": 2.67, "learning_rate": 3.318385646213856e-08, "loss": 0.0007, "step": 5423 }, { "epoch": 2.67, "logps_train/chosen": -88.6934814453125, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -323.393310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.278430461883545, "rewards_train/margins": 17.573986530303955, "rewards_train/rejected": -19.8524169921875, "step": 5423 }, { "epoch": 2.67, "learning_rate": 3.308670498239707e-08, "loss": 0.0, "step": 5424 }, { "epoch": 2.67, "logps_train/chosen": -88.11551666259766, "logps_train/ref_chosen": -70.3125, "logps_train/ref_rejected": -136.75, "logps_train/rejected": -352.824462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.780789852142334, "rewards_train/margins": 19.820602893829346, "rewards_train/rejected": -21.60139274597168, "step": 5424 }, { "epoch": 2.67, "learning_rate": 3.298969105846283e-08, "loss": 0.0, "step": 5425 }, { "epoch": 2.67, "logps_train/chosen": -83.70845031738281, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -323.6881103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9015328884124756, "rewards_train/margins": 18.141985177993774, "rewards_train/rejected": -20.04351806640625, "step": 5425 }, { "epoch": 2.67, "learning_rate": 3.289281471891692e-08, "loss": 0.0, "step": 5426 }, { "epoch": 2.67, "logps_train/chosen": -73.47366333007812, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -324.9127197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8180691003799438, "rewards_train/margins": 18.92300832271576, "rewards_train/rejected": -19.741077423095703, "step": 5426 }, { "epoch": 2.67, "learning_rate": 3.279607599229978e-08, "loss": 0.0, "step": 5427 }, { "epoch": 2.67, "logps_train/chosen": -74.73025512695312, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -317.18853759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1616235971450806, "rewards_train/margins": 18.11279809474945, "rewards_train/rejected": -19.27442169189453, "step": 5427 }, { "epoch": 2.67, "learning_rate": 3.269947490711117e-08, "loss": 0.0, "step": 5428 }, { "epoch": 2.67, "logps_train/chosen": -78.76570129394531, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -322.95574951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4497144222259521, "rewards_train/margins": 18.660362482070923, "rewards_train/rejected": -20.110076904296875, "step": 5428 }, { "epoch": 2.67, "learning_rate": 3.260301149181044e-08, "loss": 0.0, "step": 5429 }, { "epoch": 2.67, "logps_train/chosen": -85.02767944335938, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -311.0506286621094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0556490421295166, "rewards_train/margins": 17.020118474960327, "rewards_train/rejected": -19.075767517089844, "step": 5429 }, { "epoch": 2.67, "learning_rate": 3.250668577481652e-08, "loss": 0.0, "step": 5430 }, { "epoch": 2.67, "logps_train/chosen": -86.21229553222656, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -331.0252685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.020644187927246, "rewards_train/margins": 18.37836742401123, "rewards_train/rejected": -20.399011611938477, "step": 5430 }, { "epoch": 2.67, "learning_rate": 3.2410497784507443e-08, "loss": 0.0, "step": 5431 }, { "epoch": 2.67, "logps_train/chosen": -81.07405090332031, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -121.625, "logps_train/rejected": -320.9718017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8153151273727417, "rewards_train/margins": 18.11980450153351, "rewards_train/rejected": -19.93511962890625, "step": 5431 }, { "epoch": 2.67, "learning_rate": 3.2314447549220856e-08, "loss": 0.0, "step": 5432 }, { "epoch": 2.67, "logps_train/chosen": -81.57325744628906, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -320.2149658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.824122428894043, "rewards_train/margins": 17.99962329864502, "rewards_train/rejected": -19.823745727539062, "step": 5432 }, { "epoch": 2.68, "learning_rate": 3.2218535097253973e-08, "loss": 0.0, "step": 5433 }, { "epoch": 2.68, "logps_train/chosen": -75.37841796875, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -319.8096618652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3724116086959839, "rewards_train/margins": 18.315929532051086, "rewards_train/rejected": -19.68834114074707, "step": 5433 }, { "epoch": 2.68, "learning_rate": 3.212276045686302e-08, "loss": 0.0, "step": 5434 }, { "epoch": 2.68, "logps_train/chosen": -79.11241149902344, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -314.97998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4889756441116333, "rewards_train/margins": 17.584998965263367, "rewards_train/rejected": -19.073974609375, "step": 5434 }, { "epoch": 2.68, "learning_rate": 3.202712365626403e-08, "loss": 0.0, "step": 5435 }, { "epoch": 2.68, "logps_train/chosen": -78.8639907836914, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -327.70465087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3228247165679932, "rewards_train/margins": 18.685140371322632, "rewards_train/rejected": -20.007965087890625, "step": 5435 }, { "epoch": 2.68, "learning_rate": 3.193162472363214e-08, "loss": 0.0, "step": 5436 }, { "epoch": 2.68, "logps_train/chosen": -75.35960388183594, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -325.43798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9824445247650146, "rewards_train/margins": 19.005199670791626, "rewards_train/rejected": -19.98764419555664, "step": 5436 }, { "epoch": 2.68, "learning_rate": 3.183626368710202e-08, "loss": 0.0, "step": 5437 }, { "epoch": 2.68, "logps_train/chosen": -78.51404571533203, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -330.0134582519531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4854624271392822, "rewards_train/margins": 19.038589239120483, "rewards_train/rejected": -20.524051666259766, "step": 5437 }, { "epoch": 2.68, "learning_rate": 3.174104057476767e-08, "loss": 0.0, "step": 5438 }, { "epoch": 2.68, "logps_train/chosen": -71.98457336425781, "logps_train/ref_chosen": -60.46875, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -329.29412841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.152022123336792, "rewards_train/margins": 19.42910122871399, "rewards_train/rejected": -20.58112335205078, "step": 5438 }, { "epoch": 2.68, "learning_rate": 3.1645955414682515e-08, "loss": 0.0001, "step": 5439 }, { "epoch": 2.68, "logps_train/chosen": -82.16448974609375, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -325.93353271484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9474060535430908, "rewards_train/margins": 18.184033155441284, "rewards_train/rejected": -20.131439208984375, "step": 5439 }, { "epoch": 2.68, "learning_rate": 3.155100823485923e-08, "loss": 0.0, "step": 5440 }, { "epoch": 2.68, "logps_train/chosen": -80.9366683959961, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -335.4999694824219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5171537399291992, "rewards_train/margins": 19.05442523956299, "rewards_train/rejected": -20.571578979492188, "step": 5440 }, { "epoch": 2.68, "learning_rate": 3.1456199063269795e-08, "loss": 0.0, "step": 5441 }, { "epoch": 2.68, "logps_train/chosen": -81.72949981689453, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -319.25555419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5708993673324585, "rewards_train/margins": 18.183563828468323, "rewards_train/rejected": -19.75446319580078, "step": 5441 }, { "epoch": 2.68, "learning_rate": 3.136152792784586e-08, "loss": 0.0001, "step": 5442 }, { "epoch": 2.68, "logps_train/chosen": -72.88691711425781, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -118.0, "logps_train/rejected": -305.74066162109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.138228178024292, "rewards_train/margins": 17.634814500808716, "rewards_train/rejected": -18.773042678833008, "step": 5442 }, { "epoch": 2.68, "learning_rate": 3.126699485647799e-08, "loss": 0.0, "step": 5443 }, { "epoch": 2.68, "logps_train/chosen": -80.39751434326172, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -339.92431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5251030921936035, "rewards_train/margins": 19.501119136810303, "rewards_train/rejected": -21.026222229003906, "step": 5443 }, { "epoch": 2.68, "learning_rate": 3.117259987701632e-08, "loss": 0.0, "step": 5444 }, { "epoch": 2.68, "logps_train/chosen": -84.29437255859375, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -339.26031494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9717706441879272, "rewards_train/margins": 18.99849760532379, "rewards_train/rejected": -20.97026824951172, "step": 5444 }, { "epoch": 2.68, "learning_rate": 3.10783430172703e-08, "loss": 0.0, "step": 5445 }, { "epoch": 2.68, "logps_train/chosen": -78.57963562011719, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -326.3473815917969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6251022815704346, "rewards_train/margins": 18.4402015209198, "rewards_train/rejected": -20.065303802490234, "step": 5445 }, { "epoch": 2.68, "learning_rate": 3.098422430500863e-08, "loss": 0.0, "step": 5446 }, { "epoch": 2.68, "logps_train/chosen": -81.27931213378906, "logps_train/ref_chosen": -68.4375, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -319.65411376953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2838143110275269, "rewards_train/margins": 18.00415527820587, "rewards_train/rejected": -19.2879695892334, "step": 5446 }, { "epoch": 2.68, "learning_rate": 3.089024376795918e-08, "loss": 0.0, "step": 5447 }, { "epoch": 2.68, "logps_train/chosen": -76.53544616699219, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -333.3252868652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4229786396026611, "rewards_train/margins": 19.36550784111023, "rewards_train/rejected": -20.78848648071289, "step": 5447 }, { "epoch": 2.68, "learning_rate": 3.079640143380946e-08, "loss": 0.0, "step": 5448 }, { "epoch": 2.68, "logps_train/chosen": -86.27865600585938, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -330.4631042480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9127777814865112, "rewards_train/margins": 18.297450184822083, "rewards_train/rejected": -20.210227966308594, "step": 5448 }, { "epoch": 2.68, "learning_rate": 3.0702697330205974e-08, "loss": 0.0001, "step": 5449 }, { "epoch": 2.68, "logps_train/chosen": -84.27540588378906, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -331.959716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.020606517791748, "rewards_train/margins": 18.37805414199829, "rewards_train/rejected": -20.39866065979004, "step": 5449 }, { "epoch": 2.68, "learning_rate": 3.0609131484754526e-08, "loss": 0.0, "step": 5450 }, { "epoch": 2.68, "logps_train/chosen": -84.07308197021484, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -327.7344055175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8811118602752686, "rewards_train/margins": 18.31527829170227, "rewards_train/rejected": -20.19639015197754, "step": 5450 }, { "epoch": 2.68, "learning_rate": 3.0515703925020285e-08, "loss": 0.0001, "step": 5451 }, { "epoch": 2.68, "logps_train/chosen": -81.69334411621094, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -327.7300109863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6722640991210938, "rewards_train/margins": 18.35527992248535, "rewards_train/rejected": -20.027544021606445, "step": 5451 }, { "epoch": 2.68, "learning_rate": 3.042241467852752e-08, "loss": 0.0001, "step": 5452 }, { "epoch": 2.68, "logps_train/chosen": -82.70113372802734, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -328.43798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6607879400253296, "rewards_train/margins": 18.291799187660217, "rewards_train/rejected": -19.952587127685547, "step": 5452 }, { "epoch": 2.68, "learning_rate": 3.032926377276013e-08, "loss": 0.0, "step": 5453 }, { "epoch": 2.68, "logps_train/chosen": -79.69458770751953, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -333.87945556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4728765487670898, "rewards_train/margins": 19.28157138824463, "rewards_train/rejected": -20.75444793701172, "step": 5453 }, { "epoch": 2.69, "learning_rate": 3.023625123516083e-08, "loss": 0.0, "step": 5454 }, { "epoch": 2.69, "logps_train/chosen": -78.52135467529297, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -324.3382873535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5247914791107178, "rewards_train/margins": 18.124664545059204, "rewards_train/rejected": -19.649456024169922, "step": 5454 }, { "epoch": 2.69, "learning_rate": 3.014337709313175e-08, "loss": 0.0001, "step": 5455 }, { "epoch": 2.69, "logps_train/chosen": -80.96441650390625, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -327.65545654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5778379440307617, "rewards_train/margins": 18.778286933898926, "rewards_train/rejected": -20.356124877929688, "step": 5455 }, { "epoch": 2.69, "learning_rate": 3.0050641374034235e-08, "loss": 0.0, "step": 5456 }, { "epoch": 2.69, "logps_train/chosen": -80.00592803955078, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -340.7633056640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3214421272277832, "rewards_train/margins": 19.679254055023193, "rewards_train/rejected": -21.000696182250977, "step": 5456 }, { "epoch": 2.69, "learning_rate": 2.995804410518893e-08, "loss": 0.0, "step": 5457 }, { "epoch": 2.69, "logps_train/chosen": -80.18843078613281, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -319.98797607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5411577224731445, "rewards_train/margins": 18.28361415863037, "rewards_train/rejected": -19.824771881103516, "step": 5457 }, { "epoch": 2.69, "learning_rate": 2.9865585313875564e-08, "loss": 0.0, "step": 5458 }, { "epoch": 2.69, "logps_train/chosen": -77.86842346191406, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -328.7764892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3486104011535645, "rewards_train/margins": 18.694859981536865, "rewards_train/rejected": -20.04347038269043, "step": 5458 }, { "epoch": 2.69, "learning_rate": 2.9773265027333015e-08, "loss": 0.0, "step": 5459 }, { "epoch": 2.69, "logps_train/chosen": -76.29446411132812, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -324.7230224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3764183521270752, "rewards_train/margins": 18.663172483444214, "rewards_train/rejected": -20.03959083557129, "step": 5459 }, { "epoch": 2.69, "learning_rate": 2.9681083272759643e-08, "loss": 0.0, "step": 5460 }, { "epoch": 2.69, "logps_train/chosen": -74.7269515991211, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -320.6348876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2614648342132568, "rewards_train/margins": 18.725557565689087, "rewards_train/rejected": -19.987022399902344, "step": 5460 }, { "epoch": 2.69, "learning_rate": 2.9589040077312777e-08, "loss": 0.0, "step": 5461 }, { "epoch": 2.69, "logps_train/chosen": -79.23786926269531, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -334.3830871582031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.508406400680542, "rewards_train/margins": 19.182198762893677, "rewards_train/rejected": -20.69060516357422, "step": 5461 }, { "epoch": 2.69, "learning_rate": 2.9497135468108837e-08, "loss": 0.0, "step": 5462 }, { "epoch": 2.69, "logps_train/chosen": -82.37770080566406, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -328.9923095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.718775987625122, "rewards_train/margins": 18.798134565353394, "rewards_train/rejected": -20.516910552978516, "step": 5462 }, { "epoch": 2.69, "learning_rate": 2.9405369472223663e-08, "loss": 0.0, "step": 5463 }, { "epoch": 2.69, "logps_train/chosen": -78.34609985351562, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -324.2503662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6482820510864258, "rewards_train/margins": 18.123093605041504, "rewards_train/rejected": -19.77137565612793, "step": 5463 }, { "epoch": 2.69, "learning_rate": 2.9313742116692186e-08, "loss": 0.0, "step": 5464 }, { "epoch": 2.69, "logps_train/chosen": -77.79719543457031, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -321.9679870605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3879218101501465, "rewards_train/margins": 18.552088260650635, "rewards_train/rejected": -19.94001007080078, "step": 5464 }, { "epoch": 2.69, "learning_rate": 2.9222253428508313e-08, "loss": 0.0, "step": 5465 }, { "epoch": 2.69, "logps_train/chosen": -80.40614318847656, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -333.6129150390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.578993320465088, "rewards_train/margins": 18.792062282562256, "rewards_train/rejected": -20.371055603027344, "step": 5465 }, { "epoch": 2.69, "learning_rate": 2.9130903434625152e-08, "loss": 0.0001, "step": 5466 }, { "epoch": 2.69, "logps_train/chosen": -80.39418029785156, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -343.2986145019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5297499895095825, "rewards_train/margins": 19.728333353996277, "rewards_train/rejected": -21.25808334350586, "step": 5466 }, { "epoch": 2.69, "learning_rate": 2.903969216195529e-08, "loss": 0.0, "step": 5467 }, { "epoch": 2.69, "logps_train/chosen": -82.09400177001953, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -330.96258544921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7159919738769531, "rewards_train/margins": 18.630508422851562, "rewards_train/rejected": -20.346500396728516, "step": 5467 }, { "epoch": 2.69, "learning_rate": 2.8948619637370053e-08, "loss": 0.0, "step": 5468 }, { "epoch": 2.69, "logps_train/chosen": -80.03588104248047, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -331.61444091796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5389883518218994, "rewards_train/margins": 18.92997717857361, "rewards_train/rejected": -20.468965530395508, "step": 5468 }, { "epoch": 2.69, "learning_rate": 2.8857685887699877e-08, "loss": 0.0, "step": 5469 }, { "epoch": 2.69, "logps_train/chosen": -72.54853820800781, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -314.58587646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0045855045318604, "rewards_train/margins": 18.40688681602478, "rewards_train/rejected": -19.41147232055664, "step": 5469 }, { "epoch": 2.69, "learning_rate": 2.8766890939734834e-08, "loss": 0.0, "step": 5470 }, { "epoch": 2.69, "logps_train/chosen": -80.73236846923828, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -319.43603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5225529670715332, "rewards_train/margins": 18.1102614402771, "rewards_train/rejected": -19.632814407348633, "step": 5470 }, { "epoch": 2.69, "learning_rate": 2.8676234820223243e-08, "loss": 0.0, "step": 5471 }, { "epoch": 2.69, "logps_train/chosen": -78.5079345703125, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -327.46142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4902459383010864, "rewards_train/margins": 18.63729465007782, "rewards_train/rejected": -20.127540588378906, "step": 5471 }, { "epoch": 2.69, "learning_rate": 2.85857175558733e-08, "loss": 0.0, "step": 5472 }, { "epoch": 2.69, "logps_train/chosen": -78.129150390625, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -316.5304260253906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5611572265625, "rewards_train/margins": 17.961612701416016, "rewards_train/rejected": -19.522769927978516, "step": 5472 }, { "epoch": 2.69, "learning_rate": 2.849533917335212e-08, "loss": 0.0, "step": 5473 }, { "epoch": 2.69, "logps_train/chosen": -74.65519714355469, "logps_train/ref_chosen": -60.71875, "logps_train/ref_rejected": -120.75, "logps_train/rejected": -314.4411926269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.394620656967163, "rewards_train/margins": 17.97161889076233, "rewards_train/rejected": -19.366239547729492, "step": 5473 }, { "epoch": 2.7, "learning_rate": 2.840509969928545e-08, "loss": 0.0, "step": 5474 }, { "epoch": 2.7, "logps_train/chosen": -81.37255096435547, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -321.6349182128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5369133949279785, "rewards_train/margins": 18.18321943283081, "rewards_train/rejected": -19.72013282775879, "step": 5474 }, { "epoch": 2.7, "learning_rate": 2.8314999160258702e-08, "loss": 0.0, "step": 5475 }, { "epoch": 2.7, "logps_train/chosen": -75.93971252441406, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -332.0014343261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0704365968704224, "rewards_train/margins": 19.11056697368622, "rewards_train/rejected": -20.18100357055664, "step": 5475 }, { "epoch": 2.7, "learning_rate": 2.8225037582816026e-08, "loss": 0.0, "step": 5476 }, { "epoch": 2.7, "logps_train/chosen": -83.75883483886719, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -333.2588195800781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8310595750808716, "rewards_train/margins": 18.92988097667694, "rewards_train/rejected": -20.760940551757812, "step": 5476 }, { "epoch": 2.7, "learning_rate": 2.813521499346072e-08, "loss": 0.0, "step": 5477 }, { "epoch": 2.7, "logps_train/chosen": -78.36732482910156, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -322.6181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3122206926345825, "rewards_train/margins": 18.42537772655487, "rewards_train/rejected": -19.737598419189453, "step": 5477 }, { "epoch": 2.7, "learning_rate": 2.8045531418654956e-08, "loss": 0.0, "step": 5478 }, { "epoch": 2.7, "logps_train/chosen": -90.60124206542969, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -333.7872009277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.3776051998138428, "rewards_train/margins": 17.9502375125885, "rewards_train/rejected": -20.327842712402344, "step": 5478 }, { "epoch": 2.7, "learning_rate": 2.7955986884820425e-08, "loss": 0.0001, "step": 5479 }, { "epoch": 2.7, "logps_train/chosen": -76.88023376464844, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -323.56207275390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3978863954544067, "rewards_train/margins": 18.18996226787567, "rewards_train/rejected": -19.587848663330078, "step": 5479 }, { "epoch": 2.7, "learning_rate": 2.7866581418337366e-08, "loss": 0.0, "step": 5480 }, { "epoch": 2.7, "logps_train/chosen": -85.30007934570312, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -333.90863037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8866491317749023, "rewards_train/margins": 18.874329566955566, "rewards_train/rejected": -20.76097869873047, "step": 5480 }, { "epoch": 2.7, "learning_rate": 2.7777315045545202e-08, "loss": 0.0, "step": 5481 }, { "epoch": 2.7, "logps_train/chosen": -82.5167236328125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -338.9704895019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7545039653778076, "rewards_train/margins": 19.1517231464386, "rewards_train/rejected": -20.906227111816406, "step": 5481 }, { "epoch": 2.7, "learning_rate": 2.7688187792742625e-08, "loss": 0.0, "step": 5482 }, { "epoch": 2.7, "logps_train/chosen": -79.36576843261719, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -311.163330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7750047445297241, "rewards_train/margins": 17.31632673740387, "rewards_train/rejected": -19.091331481933594, "step": 5482 }, { "epoch": 2.7, "learning_rate": 2.7599199686186792e-08, "loss": 0.0, "step": 5483 }, { "epoch": 2.7, "logps_train/chosen": -75.48881530761719, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -314.54449462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1102588176727295, "rewards_train/margins": 18.042479753494263, "rewards_train/rejected": -19.152738571166992, "step": 5483 }, { "epoch": 2.7, "learning_rate": 2.7510350752094403e-08, "loss": 0.0, "step": 5484 }, { "epoch": 2.7, "logps_train/chosen": -74.9603271484375, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -315.189208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2956414222717285, "rewards_train/margins": 17.752870082855225, "rewards_train/rejected": -19.048511505126953, "step": 5484 }, { "epoch": 2.7, "learning_rate": 2.7421641016641074e-08, "loss": 0.0, "step": 5485 }, { "epoch": 2.7, "logps_train/chosen": -77.37139892578125, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -320.554931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4520810842514038, "rewards_train/margins": 18.460930466651917, "rewards_train/rejected": -19.91301155090332, "step": 5485 }, { "epoch": 2.7, "learning_rate": 2.733307050596101e-08, "loss": 0.0, "step": 5486 }, { "epoch": 2.7, "logps_train/chosen": -91.84526062011719, "logps_train/ref_chosen": -70.125, "logps_train/ref_rejected": -134.375, "logps_train/rejected": -346.896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.171684741973877, "rewards_train/margins": 19.080756664276123, "rewards_train/rejected": -21.25244140625, "step": 5486 }, { "epoch": 2.7, "learning_rate": 2.7244639246147894e-08, "loss": 0.0, "step": 5487 }, { "epoch": 2.7, "logps_train/chosen": -86.32771301269531, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -327.6666564941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1108477115631104, "rewards_train/margins": 18.0121648311615, "rewards_train/rejected": -20.12301254272461, "step": 5487 }, { "epoch": 2.7, "learning_rate": 2.7156347263254055e-08, "loss": 0.0, "step": 5488 }, { "epoch": 2.7, "logps_train/chosen": -81.02168273925781, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -324.0878601074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.737006664276123, "rewards_train/margins": 18.280619144439697, "rewards_train/rejected": -20.01762580871582, "step": 5488 }, { "epoch": 2.7, "learning_rate": 2.7068194583290906e-08, "loss": 0.0, "step": 5489 }, { "epoch": 2.7, "logps_train/chosen": -76.05458068847656, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -323.84454345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1699597835540771, "rewards_train/margins": 18.770548105239868, "rewards_train/rejected": -19.940507888793945, "step": 5489 }, { "epoch": 2.7, "learning_rate": 2.6980181232228947e-08, "loss": 0.0, "step": 5490 }, { "epoch": 2.7, "logps_train/chosen": -75.2542724609375, "logps_train/ref_chosen": -61.0, "logps_train/ref_rejected": -123.5625, "logps_train/rejected": -319.97320556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4242072105407715, "rewards_train/margins": 18.215301990509033, "rewards_train/rejected": -19.639509201049805, "step": 5490 }, { "epoch": 2.7, "learning_rate": 2.6892307235997447e-08, "loss": 0.0, "step": 5491 }, { "epoch": 2.7, "logps_train/chosen": -74.18179321289062, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -328.96881103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0749905109405518, "rewards_train/margins": 19.285658597946167, "rewards_train/rejected": -20.36064910888672, "step": 5491 }, { "epoch": 2.7, "learning_rate": 2.680457262048458e-08, "loss": 0.0, "step": 5492 }, { "epoch": 2.7, "logps_train/chosen": -74.97736358642578, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -331.6535339355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1866521835327148, "rewards_train/margins": 19.1593656539917, "rewards_train/rejected": -20.346017837524414, "step": 5492 }, { "epoch": 2.7, "learning_rate": 2.671697741153761e-08, "loss": 0.0, "step": 5493 }, { "epoch": 2.7, "logps_train/chosen": -77.75675964355469, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -310.9610595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.499406337738037, "rewards_train/margins": 17.414717197418213, "rewards_train/rejected": -18.91412353515625, "step": 5493 }, { "epoch": 2.71, "learning_rate": 2.6629521634962738e-08, "loss": 0.0, "step": 5494 }, { "epoch": 2.71, "logps_train/chosen": -80.67913055419922, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -316.70953369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6815605163574219, "rewards_train/margins": 17.751110076904297, "rewards_train/rejected": -19.43267059326172, "step": 5494 }, { "epoch": 2.71, "learning_rate": 2.6542205316525012e-08, "loss": 0.0, "step": 5495 }, { "epoch": 2.71, "logps_train/chosen": -76.30519104003906, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -322.38092041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2455096244812012, "rewards_train/margins": 18.46836519241333, "rewards_train/rejected": -19.71387481689453, "step": 5495 }, { "epoch": 2.71, "learning_rate": 2.6455028481948304e-08, "loss": 0.0, "step": 5496 }, { "epoch": 2.71, "logps_train/chosen": -86.23184204101562, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -324.11773681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.078359842300415, "rewards_train/margins": 17.916127920150757, "rewards_train/rejected": -19.994487762451172, "step": 5496 }, { "epoch": 2.71, "learning_rate": 2.6367991156915625e-08, "loss": 0.0001, "step": 5497 }, { "epoch": 2.71, "logps_train/chosen": -80.66282653808594, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -322.4487609863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7461652755737305, "rewards_train/margins": 18.01834201812744, "rewards_train/rejected": -19.764507293701172, "step": 5497 }, { "epoch": 2.71, "learning_rate": 2.628109336706874e-08, "loss": 0.0, "step": 5498 }, { "epoch": 2.71, "logps_train/chosen": -76.6925048828125, "logps_train/ref_chosen": -60.9375, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -317.1700439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.576770544052124, "rewards_train/margins": 18.05649209022522, "rewards_train/rejected": -19.633262634277344, "step": 5498 }, { "epoch": 2.71, "learning_rate": 2.6194335138008174e-08, "loss": 0.0001, "step": 5499 }, { "epoch": 2.71, "logps_train/chosen": -79.81575012207031, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -319.6759033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5361647605895996, "rewards_train/margins": 17.99890375137329, "rewards_train/rejected": -19.53506851196289, "step": 5499 }, { "epoch": 2.71, "learning_rate": 2.6107716495293698e-08, "loss": 0.0, "step": 5500 }, { "epoch": 2.71, "logps_train/chosen": -85.3779296875, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -335.40679931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.907910704612732, "rewards_train/margins": 18.480373978614807, "rewards_train/rejected": -20.38828468322754, "step": 5500 }, { "epoch": 2.71, "learning_rate": 2.602123746444368e-08, "loss": 0.0001, "step": 5501 }, { "epoch": 2.71, "logps_train/chosen": -77.46723937988281, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -328.38348388671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4542924165725708, "rewards_train/margins": 18.808372378349304, "rewards_train/rejected": -20.262664794921875, "step": 5501 }, { "epoch": 2.71, "learning_rate": 2.593489807093535e-08, "loss": 0.0, "step": 5502 }, { "epoch": 2.71, "logps_train/chosen": -80.10321807861328, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -330.0191650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4896190166473389, "rewards_train/margins": 18.997601747512817, "rewards_train/rejected": -20.487220764160156, "step": 5502 }, { "epoch": 2.71, "learning_rate": 2.5848698340204978e-08, "loss": 0.0, "step": 5503 }, { "epoch": 2.71, "logps_train/chosen": -81.07218933105469, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -323.8763732910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.810978651046753, "rewards_train/margins": 18.259276628494263, "rewards_train/rejected": -20.070255279541016, "step": 5503 }, { "epoch": 2.71, "learning_rate": 2.5762638297647408e-08, "loss": 0.0, "step": 5504 }, { "epoch": 2.71, "logps_train/chosen": -83.60992431640625, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -121.3125, "logps_train/rejected": -314.36175537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.88511323928833, "rewards_train/margins": 17.416001796722412, "rewards_train/rejected": -19.301115036010742, "step": 5504 }, { "epoch": 2.71, "learning_rate": 2.5676717968616756e-08, "loss": 0.0, "step": 5505 }, { "epoch": 2.71, "logps_train/chosen": -75.5346450805664, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -321.6708068847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3290016651153564, "rewards_train/margins": 18.591156721115112, "rewards_train/rejected": -19.92015838623047, "step": 5505 }, { "epoch": 2.71, "learning_rate": 2.5590937378425605e-08, "loss": 0.0, "step": 5506 }, { "epoch": 2.71, "logps_train/chosen": -78.11445617675781, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -330.8514099121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2211138010025024, "rewards_train/margins": 19.085950255393982, "rewards_train/rejected": -20.307064056396484, "step": 5506 }, { "epoch": 2.71, "learning_rate": 2.5505296552345458e-08, "loss": 0.0, "step": 5507 }, { "epoch": 2.71, "logps_train/chosen": -82.7910385131836, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -330.3499755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7980493307113647, "rewards_train/margins": 18.403452515602112, "rewards_train/rejected": -20.201501846313477, "step": 5507 }, { "epoch": 2.71, "learning_rate": 2.541979551560669e-08, "loss": 0.0, "step": 5508 }, { "epoch": 2.71, "logps_train/chosen": -76.1128158569336, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -313.0173645019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4131371974945068, "rewards_train/margins": 17.881860971450806, "rewards_train/rejected": -19.294998168945312, "step": 5508 }, { "epoch": 2.71, "learning_rate": 2.5334434293398597e-08, "loss": 0.0, "step": 5509 }, { "epoch": 2.71, "logps_train/chosen": -79.516845703125, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -333.2682189941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5125243663787842, "rewards_train/margins": 19.0705463886261, "rewards_train/rejected": -20.583070755004883, "step": 5509 }, { "epoch": 2.71, "learning_rate": 2.5249212910869067e-08, "loss": 0.0, "step": 5510 }, { "epoch": 2.71, "logps_train/chosen": -81.4887924194336, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -336.40045166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.709475040435791, "rewards_train/margins": 18.961480617523193, "rewards_train/rejected": -20.670955657958984, "step": 5510 }, { "epoch": 2.71, "learning_rate": 2.5164131393124844e-08, "loss": 0.0, "step": 5511 }, { "epoch": 2.71, "logps_train/chosen": -76.24946594238281, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -317.84735107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1268513202667236, "rewards_train/margins": 18.095234632492065, "rewards_train/rejected": -19.22208595275879, "step": 5511 }, { "epoch": 2.71, "learning_rate": 2.5079189765231713e-08, "loss": 0.0, "step": 5512 }, { "epoch": 2.71, "logps_train/chosen": -84.57291412353516, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -329.33868408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7998697757720947, "rewards_train/margins": 18.30782675743103, "rewards_train/rejected": -20.107696533203125, "step": 5512 }, { "epoch": 2.71, "learning_rate": 2.4994388052213877e-08, "loss": 0.0, "step": 5513 }, { "epoch": 2.71, "logps_train/chosen": -81.21408081054688, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -332.4784851074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5375207662582397, "rewards_train/margins": 18.83962309360504, "rewards_train/rejected": -20.37714385986328, "step": 5513 }, { "epoch": 2.71, "learning_rate": 2.4909726279054523e-08, "loss": 0.0, "step": 5514 }, { "epoch": 2.71, "logps_train/chosen": -83.25890350341797, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -328.7420349121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9487416744232178, "rewards_train/margins": 18.59924340248108, "rewards_train/rejected": -20.547985076904297, "step": 5514 }, { "epoch": 2.72, "learning_rate": 2.4825204470695637e-08, "loss": 0.0, "step": 5515 }, { "epoch": 2.72, "logps_train/chosen": -79.17922973632812, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -331.5926513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4050328731536865, "rewards_train/margins": 18.865022897720337, "rewards_train/rejected": -20.270055770874023, "step": 5515 }, { "epoch": 2.72, "learning_rate": 2.4740822652037862e-08, "loss": 0.0, "step": 5516 }, { "epoch": 2.72, "logps_train/chosen": -78.07406616210938, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -316.47613525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3514988422393799, "rewards_train/margins": 17.73932909965515, "rewards_train/rejected": -19.09082794189453, "step": 5516 }, { "epoch": 2.72, "learning_rate": 2.46565808479407e-08, "loss": 0.0, "step": 5517 }, { "epoch": 2.72, "logps_train/chosen": -81.76405334472656, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -335.33355712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6541402339935303, "rewards_train/margins": 18.874431848526, "rewards_train/rejected": -20.52857208251953, "step": 5517 }, { "epoch": 2.72, "learning_rate": 2.457247908322224e-08, "loss": 0.0, "step": 5518 }, { "epoch": 2.72, "logps_train/chosen": -82.21601104736328, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -322.52227783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9081733226776123, "rewards_train/margins": 18.15215802192688, "rewards_train/rejected": -20.060331344604492, "step": 5518 }, { "epoch": 2.72, "learning_rate": 2.4488517382659667e-08, "loss": 0.0, "step": 5519 }, { "epoch": 2.72, "logps_train/chosen": -78.35957336425781, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -328.9168701171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4130083322525024, "rewards_train/margins": 18.621795058250427, "rewards_train/rejected": -20.03480339050293, "step": 5519 }, { "epoch": 2.72, "learning_rate": 2.440469577098836e-08, "loss": 0.0001, "step": 5520 }, { "epoch": 2.72, "logps_train/chosen": -78.44926452636719, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -328.9976806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4337446689605713, "rewards_train/margins": 18.74551749229431, "rewards_train/rejected": -20.179262161254883, "step": 5520 }, { "epoch": 2.72, "learning_rate": 2.4321014272902896e-08, "loss": 0.0, "step": 5521 }, { "epoch": 2.72, "logps_train/chosen": -82.73876953125, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -328.645263671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7293944358825684, "rewards_train/margins": 18.340406894683838, "rewards_train/rejected": -20.069801330566406, "step": 5521 }, { "epoch": 2.72, "learning_rate": 2.42374729130565e-08, "loss": 0.0, "step": 5522 }, { "epoch": 2.72, "logps_train/chosen": -77.29359436035156, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -321.14971923828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3566536903381348, "rewards_train/margins": 18.25558614730835, "rewards_train/rejected": -19.612239837646484, "step": 5522 }, { "epoch": 2.72, "learning_rate": 2.415407171606082e-08, "loss": 0.0, "step": 5523 }, { "epoch": 2.72, "logps_train/chosen": -74.63613891601562, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -322.252197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1849517822265625, "rewards_train/margins": 18.44778823852539, "rewards_train/rejected": -19.632740020751953, "step": 5523 }, { "epoch": 2.72, "learning_rate": 2.4070810706486534e-08, "loss": 0.0, "step": 5524 }, { "epoch": 2.72, "logps_train/chosen": -83.16110229492188, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -322.1747131347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.886373519897461, "rewards_train/margins": 17.78168296813965, "rewards_train/rejected": -19.66805648803711, "step": 5524 }, { "epoch": 2.72, "learning_rate": 2.3987689908862908e-08, "loss": 0.0, "step": 5525 }, { "epoch": 2.72, "logps_train/chosen": -77.72959899902344, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -325.6197509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4688093662261963, "rewards_train/margins": 18.7079598903656, "rewards_train/rejected": -20.176769256591797, "step": 5525 }, { "epoch": 2.72, "learning_rate": 2.3904709347677797e-08, "loss": 0.0, "step": 5526 }, { "epoch": 2.72, "logps_train/chosen": -82.52374267578125, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -334.8360900878906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4263980388641357, "rewards_train/margins": 19.152133226394653, "rewards_train/rejected": -20.57853126525879, "step": 5526 }, { "epoch": 2.72, "learning_rate": 2.382186904737793e-08, "loss": 0.0, "step": 5527 }, { "epoch": 2.72, "logps_train/chosen": -81.73908233642578, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -333.9130554199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5304999351501465, "rewards_train/margins": 18.85182237625122, "rewards_train/rejected": -20.382322311401367, "step": 5527 }, { "epoch": 2.72, "learning_rate": 2.3739169032368556e-08, "loss": 0.0, "step": 5528 }, { "epoch": 2.72, "logps_train/chosen": -78.87772369384766, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -325.73223876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4305944442749023, "rewards_train/margins": 18.661381721496582, "rewards_train/rejected": -20.091976165771484, "step": 5528 }, { "epoch": 2.72, "learning_rate": 2.3656609327013744e-08, "loss": 0.0, "step": 5529 }, { "epoch": 2.72, "logps_train/chosen": -87.29174041748047, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -338.90087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0755605697631836, "rewards_train/margins": 18.822339057922363, "rewards_train/rejected": -20.897899627685547, "step": 5529 }, { "epoch": 2.72, "learning_rate": 2.3574189955635926e-08, "loss": 0.0, "step": 5530 }, { "epoch": 2.72, "logps_train/chosen": -84.350830078125, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -341.415283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6695060729980469, "rewards_train/margins": 19.372119903564453, "rewards_train/rejected": -21.0416259765625, "step": 5530 }, { "epoch": 2.72, "learning_rate": 2.349191094251668e-08, "loss": 0.0, "step": 5531 }, { "epoch": 2.72, "logps_train/chosen": -78.11502075195312, "logps_train/ref_chosen": -60.59375, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -330.156982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7536406517028809, "rewards_train/margins": 18.792086124420166, "rewards_train/rejected": -20.545726776123047, "step": 5531 }, { "epoch": 2.72, "learning_rate": 2.3409772311895838e-08, "loss": 0.0, "step": 5532 }, { "epoch": 2.72, "logps_train/chosen": -76.5791015625, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -315.54156494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.334717035293579, "rewards_train/margins": 18.1779363155365, "rewards_train/rejected": -19.512653350830078, "step": 5532 }, { "epoch": 2.72, "learning_rate": 2.3327774087971875e-08, "loss": 0.0, "step": 5533 }, { "epoch": 2.72, "logps_train/chosen": -85.24472045898438, "logps_train/ref_chosen": -68.0625, "logps_train/ref_rejected": -133.5, "logps_train/rejected": -341.5153503417969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.719394326210022, "rewards_train/margins": 19.08697736263275, "rewards_train/rejected": -20.806371688842773, "step": 5533 }, { "epoch": 2.72, "learning_rate": 2.32459162949023e-08, "loss": 0.0001, "step": 5534 }, { "epoch": 2.72, "logps_train/chosen": -80.1706771850586, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -337.12353515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203885555267334, "rewards_train/margins": 19.343334436416626, "rewards_train/rejected": -20.66372299194336, "step": 5534 }, { "epoch": 2.73, "learning_rate": 2.3164198956802715e-08, "loss": 0.0, "step": 5535 }, { "epoch": 2.73, "logps_train/chosen": -82.52285766601562, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -342.8698425292969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.762783408164978, "rewards_train/margins": 19.500762581825256, "rewards_train/rejected": -21.263545989990234, "step": 5535 }, { "epoch": 2.73, "learning_rate": 2.308262209774764e-08, "loss": 0.0, "step": 5536 }, { "epoch": 2.73, "logps_train/chosen": -77.15038299560547, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -316.018798828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.564696192741394, "rewards_train/margins": 18.063746571540833, "rewards_train/rejected": -19.628442764282227, "step": 5536 }, { "epoch": 2.73, "learning_rate": 2.3001185741770344e-08, "loss": 0.0, "step": 5537 }, { "epoch": 2.73, "logps_train/chosen": -86.08699798583984, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -330.9739685058594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0127527713775635, "rewards_train/margins": 18.334352254867554, "rewards_train/rejected": -20.347105026245117, "step": 5537 }, { "epoch": 2.73, "learning_rate": 2.291988991286231e-08, "loss": 0.0, "step": 5538 }, { "epoch": 2.73, "logps_train/chosen": -85.3373031616211, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -335.44097900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9221092462539673, "rewards_train/margins": 18.663784384727478, "rewards_train/rejected": -20.585893630981445, "step": 5538 }, { "epoch": 2.73, "learning_rate": 2.2838734634973932e-08, "loss": 0.0, "step": 5539 }, { "epoch": 2.73, "logps_train/chosen": -83.64045715332031, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -332.9119873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7728346586227417, "rewards_train/margins": 19.06792461872101, "rewards_train/rejected": -20.84075927734375, "step": 5539 }, { "epoch": 2.73, "learning_rate": 2.2757719932014195e-08, "loss": 0.0, "step": 5540 }, { "epoch": 2.73, "logps_train/chosen": -82.68052673339844, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -330.398681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6740095615386963, "rewards_train/margins": 18.737638235092163, "rewards_train/rejected": -20.41164779663086, "step": 5540 }, { "epoch": 2.73, "learning_rate": 2.2676845827850343e-08, "loss": 0.0, "step": 5541 }, { "epoch": 2.73, "logps_train/chosen": -78.46867370605469, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -313.8614501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5030685663223267, "rewards_train/margins": 17.440889716148376, "rewards_train/rejected": -18.943958282470703, "step": 5541 }, { "epoch": 2.73, "learning_rate": 2.2596112346308648e-08, "loss": 0.0001, "step": 5542 }, { "epoch": 2.73, "logps_train/chosen": -78.99586486816406, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -324.7901916503906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5547137260437012, "rewards_train/margins": 18.531729221343994, "rewards_train/rejected": -20.086442947387695, "step": 5542 }, { "epoch": 2.73, "learning_rate": 2.2515519511173697e-08, "loss": 0.0, "step": 5543 }, { "epoch": 2.73, "logps_train/chosen": -83.34686279296875, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -322.8536071777344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.755975365638733, "rewards_train/margins": 18.003261923789978, "rewards_train/rejected": -19.75923728942871, "step": 5543 }, { "epoch": 2.73, "learning_rate": 2.2435067346188606e-08, "loss": 0.0, "step": 5544 }, { "epoch": 2.73, "logps_train/chosen": -83.95386505126953, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -333.1976623535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.972339391708374, "rewards_train/margins": 18.64967370033264, "rewards_train/rejected": -20.622013092041016, "step": 5544 }, { "epoch": 2.73, "learning_rate": 2.2354755875055142e-08, "loss": 0.0, "step": 5545 }, { "epoch": 2.73, "logps_train/chosen": -83.92849731445312, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -334.2481689453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7857215404510498, "rewards_train/margins": 18.740071535110474, "rewards_train/rejected": -20.525793075561523, "step": 5545 }, { "epoch": 2.73, "learning_rate": 2.227458512143371e-08, "loss": 0.0, "step": 5546 }, { "epoch": 2.73, "logps_train/chosen": -83.88915252685547, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -325.2462158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.065185070037842, "rewards_train/margins": 18.10880136489868, "rewards_train/rejected": -20.173986434936523, "step": 5546 }, { "epoch": 2.73, "learning_rate": 2.2194555108943024e-08, "loss": 0.0, "step": 5547 }, { "epoch": 2.73, "logps_train/chosen": -81.19790649414062, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -329.42938232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.68937087059021, "rewards_train/margins": 18.60322403907776, "rewards_train/rejected": -20.29259490966797, "step": 5547 }, { "epoch": 2.73, "learning_rate": 2.211466586116051e-08, "loss": 0.0, "step": 5548 }, { "epoch": 2.73, "logps_train/chosen": -76.45025634765625, "logps_train/ref_chosen": -61.875, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -314.5951843261719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4578181505203247, "rewards_train/margins": 18.004433512687683, "rewards_train/rejected": -19.462251663208008, "step": 5548 }, { "epoch": 2.73, "learning_rate": 2.203491740162211e-08, "loss": 0.0, "step": 5549 }, { "epoch": 2.73, "logps_train/chosen": -78.70132446289062, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -323.49127197265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3014311790466309, "rewards_train/margins": 18.378754138946533, "rewards_train/rejected": -19.680185317993164, "step": 5549 }, { "epoch": 2.73, "learning_rate": 2.195530975382226e-08, "loss": 0.0, "step": 5550 }, { "epoch": 2.73, "logps_train/chosen": -70.38334655761719, "logps_train/ref_chosen": -61.03125, "logps_train/ref_rejected": -117.875, "logps_train/rejected": -307.70123291015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9359912276268005, "rewards_train/margins": 18.04267507791519, "rewards_train/rejected": -18.978666305541992, "step": 5550 }, { "epoch": 2.73, "learning_rate": 2.1875842941213806e-08, "loss": 0.0, "step": 5551 }, { "epoch": 2.73, "logps_train/chosen": -84.69371032714844, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -323.92218017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9127306938171387, "rewards_train/margins": 17.824459552764893, "rewards_train/rejected": -19.73719024658203, "step": 5551 }, { "epoch": 2.73, "learning_rate": 2.1796516987208358e-08, "loss": 0.0, "step": 5552 }, { "epoch": 2.73, "logps_train/chosen": -78.64485931396484, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -334.5550842285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484701156616211, "rewards_train/margins": 18.922466278076172, "rewards_train/rejected": -20.407167434692383, "step": 5552 }, { "epoch": 2.73, "learning_rate": 2.171733191517583e-08, "loss": 0.0, "step": 5553 }, { "epoch": 2.73, "logps_train/chosen": -79.910888671875, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -319.4090576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6089115142822266, "rewards_train/margins": 17.96270751953125, "rewards_train/rejected": -19.571619033813477, "step": 5553 }, { "epoch": 2.73, "learning_rate": 2.1638287748444672e-08, "loss": 0.0001, "step": 5554 }, { "epoch": 2.73, "logps_train/chosen": -76.77278137207031, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -329.95721435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2200520038604736, "rewards_train/margins": 19.159605741500854, "rewards_train/rejected": -20.379657745361328, "step": 5554 }, { "epoch": 2.74, "learning_rate": 2.155938451030176e-08, "loss": 0.0, "step": 5555 }, { "epoch": 2.74, "logps_train/chosen": -79.41508483886719, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -327.2449035644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3602092266082764, "rewards_train/margins": 18.597094774246216, "rewards_train/rejected": -19.957304000854492, "step": 5555 }, { "epoch": 2.74, "learning_rate": 2.1480622223992606e-08, "loss": 0.0, "step": 5556 }, { "epoch": 2.74, "logps_train/chosen": -78.75212860107422, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -329.74139404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2226247787475586, "rewards_train/margins": 19.276026725769043, "rewards_train/rejected": -20.4986515045166, "step": 5556 }, { "epoch": 2.74, "learning_rate": 2.1402000912721043e-08, "loss": 0.0, "step": 5557 }, { "epoch": 2.74, "logps_train/chosen": -84.18461608886719, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -327.7297668457031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8473188877105713, "rewards_train/margins": 17.908275365829468, "rewards_train/rejected": -19.75559425354004, "step": 5557 }, { "epoch": 2.74, "learning_rate": 2.132352059964948e-08, "loss": 0.0, "step": 5558 }, { "epoch": 2.74, "logps_train/chosen": -81.4401626586914, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -333.36505126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.663743019104004, "rewards_train/margins": 18.72647190093994, "rewards_train/rejected": -20.390214920043945, "step": 5558 }, { "epoch": 2.74, "learning_rate": 2.1245181307898763e-08, "loss": 0.0, "step": 5559 }, { "epoch": 2.74, "logps_train/chosen": -83.61569213867188, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -336.1986999511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9178681373596191, "rewards_train/margins": 19.003953456878662, "rewards_train/rejected": -20.92182159423828, "step": 5559 }, { "epoch": 2.74, "learning_rate": 2.1166983060548094e-08, "loss": 0.0, "step": 5560 }, { "epoch": 2.74, "logps_train/chosen": -77.21794891357422, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -122.3125, "logps_train/rejected": -326.6199951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3916192054748535, "rewards_train/margins": 19.03805685043335, "rewards_train/rejected": -20.429676055908203, "step": 5560 }, { "epoch": 2.74, "learning_rate": 2.1088925880635377e-08, "loss": 0.0, "step": 5561 }, { "epoch": 2.74, "logps_train/chosen": -83.2831802368164, "logps_train/ref_chosen": -67.125, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -329.54156494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6176246404647827, "rewards_train/margins": 18.644050240516663, "rewards_train/rejected": -20.261674880981445, "step": 5561 }, { "epoch": 2.74, "learning_rate": 2.1011009791156607e-08, "loss": 0.0, "step": 5562 }, { "epoch": 2.74, "logps_train/chosen": -85.01020812988281, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -324.2787170410156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0013132095336914, "rewards_train/margins": 18.00243854522705, "rewards_train/rejected": -20.003751754760742, "step": 5562 }, { "epoch": 2.74, "learning_rate": 2.0933234815066425e-08, "loss": 0.0, "step": 5563 }, { "epoch": 2.74, "logps_train/chosen": -76.9979248046875, "logps_train/ref_chosen": -60.84375, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -324.46954345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6154906749725342, "rewards_train/margins": 18.549433946609497, "rewards_train/rejected": -20.16492462158203, "step": 5563 }, { "epoch": 2.74, "learning_rate": 2.0855600975277944e-08, "loss": 0.0001, "step": 5564 }, { "epoch": 2.74, "logps_train/chosen": -81.445556640625, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -134.75, "logps_train/rejected": -350.14459228515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4255852699279785, "rewards_train/margins": 20.11133337020874, "rewards_train/rejected": -21.53691864013672, "step": 5564 }, { "epoch": 2.74, "learning_rate": 2.077810829466259e-08, "loss": 0.0, "step": 5565 }, { "epoch": 2.74, "logps_train/chosen": -81.00677490234375, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -321.4255676269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.773969054222107, "rewards_train/margins": 18.32356894016266, "rewards_train/rejected": -20.097537994384766, "step": 5565 }, { "epoch": 2.74, "learning_rate": 2.0700756796050212e-08, "loss": 0.0002, "step": 5566 }, { "epoch": 2.74, "logps_train/chosen": -85.20609283447266, "logps_train/ref_chosen": -67.1875, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -340.3150634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7991738319396973, "rewards_train/margins": 19.040292263031006, "rewards_train/rejected": -20.839466094970703, "step": 5566 }, { "epoch": 2.74, "learning_rate": 2.0623546502228973e-08, "loss": 0.0001, "step": 5567 }, { "epoch": 2.74, "logps_train/chosen": -72.57908630371094, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -328.51434326171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9527572393417358, "rewards_train/margins": 19.389302849769592, "rewards_train/rejected": -20.342060089111328, "step": 5567 }, { "epoch": 2.74, "learning_rate": 2.054647743594573e-08, "loss": 0.0, "step": 5568 }, { "epoch": 2.74, "logps_train/chosen": -78.50263977050781, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -333.0355224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3630074262619019, "rewards_train/margins": 19.256461262702942, "rewards_train/rejected": -20.619468688964844, "step": 5568 }, { "epoch": 2.74, "learning_rate": 2.0469549619905492e-08, "loss": 0.0001, "step": 5569 }, { "epoch": 2.74, "logps_train/chosen": -71.71624755859375, "logps_train/ref_chosen": -60.625, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -310.80340576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1075375080108643, "rewards_train/margins": 17.904051542282104, "rewards_train/rejected": -19.01158905029297, "step": 5569 }, { "epoch": 2.74, "learning_rate": 2.0392763076771625e-08, "loss": 0.0, "step": 5570 }, { "epoch": 2.74, "logps_train/chosen": -77.30247497558594, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -318.00604248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3552956581115723, "rewards_train/margins": 18.110835552215576, "rewards_train/rejected": -19.46613121032715, "step": 5570 }, { "epoch": 2.74, "learning_rate": 2.0316117829166146e-08, "loss": 0.0, "step": 5571 }, { "epoch": 2.74, "logps_train/chosen": -82.06788635253906, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -336.9039001464844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8054699897766113, "rewards_train/margins": 19.234286785125732, "rewards_train/rejected": -21.039756774902344, "step": 5571 }, { "epoch": 2.74, "learning_rate": 2.023961389966905e-08, "loss": 0.0, "step": 5572 }, { "epoch": 2.74, "logps_train/chosen": -80.38166809082031, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -328.58050537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6551098823547363, "rewards_train/margins": 18.64693593978882, "rewards_train/rejected": -20.302045822143555, "step": 5572 }, { "epoch": 2.74, "learning_rate": 2.0163251310818975e-08, "loss": 0.0, "step": 5573 }, { "epoch": 2.74, "logps_train/chosen": -81.60496520996094, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -318.28045654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.707127332687378, "rewards_train/margins": 17.740548372268677, "rewards_train/rejected": -19.447675704956055, "step": 5573 }, { "epoch": 2.74, "learning_rate": 2.0087030085113033e-08, "loss": 0.0, "step": 5574 }, { "epoch": 2.74, "logps_train/chosen": -83.08943176269531, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -330.9377136230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7072829008102417, "rewards_train/margins": 18.2697411775589, "rewards_train/rejected": -19.97702407836914, "step": 5574 }, { "epoch": 2.74, "learning_rate": 2.001095024500621e-08, "loss": 0.0, "step": 5575 }, { "epoch": 2.74, "logps_train/chosen": -79.3667221069336, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -323.3625183105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4484399557113647, "rewards_train/margins": 18.410956025123596, "rewards_train/rejected": -19.85939598083496, "step": 5575 }, { "epoch": 2.75, "learning_rate": 1.9935011812912405e-08, "loss": 0.0, "step": 5576 }, { "epoch": 2.75, "logps_train/chosen": -80.96749877929688, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -329.2752380371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7829804420471191, "rewards_train/margins": 18.54000234603882, "rewards_train/rejected": -20.322982788085938, "step": 5576 }, { "epoch": 2.75, "learning_rate": 1.9859214811203452e-08, "loss": 0.0, "step": 5577 }, { "epoch": 2.75, "logps_train/chosen": -70.66700744628906, "logps_train/ref_chosen": -61.625, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -320.70086669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9046888947486877, "rewards_train/margins": 18.906706750392914, "rewards_train/rejected": -19.8113956451416, "step": 5577 }, { "epoch": 2.75, "learning_rate": 1.978355926220965e-08, "loss": 0.0, "step": 5578 }, { "epoch": 2.75, "logps_train/chosen": -81.30403137207031, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -315.42449951171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5447583198547363, "rewards_train/margins": 17.633723735809326, "rewards_train/rejected": -19.178482055664062, "step": 5578 }, { "epoch": 2.75, "learning_rate": 1.9708045188219725e-08, "loss": 0.0, "step": 5579 }, { "epoch": 2.75, "logps_train/chosen": -73.880859375, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -126.0, "logps_train/rejected": -322.1723937988281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1003904342651367, "rewards_train/margins": 18.51660442352295, "rewards_train/rejected": -19.616994857788086, "step": 5579 }, { "epoch": 2.75, "learning_rate": 1.9632672611480605e-08, "loss": 0.0, "step": 5580 }, { "epoch": 2.75, "logps_train/chosen": -79.19782257080078, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -330.220947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5643622875213623, "rewards_train/margins": 19.03654408454895, "rewards_train/rejected": -20.600906372070312, "step": 5580 }, { "epoch": 2.75, "learning_rate": 1.9557441554197474e-08, "loss": 0.0, "step": 5581 }, { "epoch": 2.75, "logps_train/chosen": -79.78557586669922, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -325.0992431640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6102468967437744, "rewards_train/margins": 18.46945357322693, "rewards_train/rejected": -20.079700469970703, "step": 5581 }, { "epoch": 2.75, "learning_rate": 1.948235203853399e-08, "loss": 0.0, "step": 5582 }, { "epoch": 2.75, "logps_train/chosen": -79.90577697753906, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -325.5457763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5381847620010376, "rewards_train/margins": 18.155746817588806, "rewards_train/rejected": -19.693931579589844, "step": 5582 }, { "epoch": 2.75, "learning_rate": 1.940740408661201e-08, "loss": 0.0, "step": 5583 }, { "epoch": 2.75, "logps_train/chosen": -86.43901062011719, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -317.49298095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0442919731140137, "rewards_train/margins": 17.363306522369385, "rewards_train/rejected": -19.4075984954834, "step": 5583 }, { "epoch": 2.75, "learning_rate": 1.9332597720511767e-08, "loss": 0.0001, "step": 5584 }, { "epoch": 2.75, "logps_train/chosen": -82.243408203125, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -319.464111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5661866664886475, "rewards_train/margins": 17.7080557346344, "rewards_train/rejected": -19.274242401123047, "step": 5584 }, { "epoch": 2.75, "learning_rate": 1.9257932962271572e-08, "loss": 0.0, "step": 5585 }, { "epoch": 2.75, "logps_train/chosen": -80.3946533203125, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -317.585205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4432734251022339, "rewards_train/margins": 17.631752133369446, "rewards_train/rejected": -19.07502555847168, "step": 5585 }, { "epoch": 2.75, "learning_rate": 1.9183409833888387e-08, "loss": 0.0, "step": 5586 }, { "epoch": 2.75, "logps_train/chosen": -88.21949768066406, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -331.2843017578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1868419647216797, "rewards_train/margins": 18.461706161499023, "rewards_train/rejected": -20.648548126220703, "step": 5586 }, { "epoch": 2.75, "learning_rate": 1.9109028357316982e-08, "loss": 0.0, "step": 5587 }, { "epoch": 2.75, "logps_train/chosen": -76.82720947265625, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -327.82672119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3582582473754883, "rewards_train/margins": 18.946974754333496, "rewards_train/rejected": -20.305233001708984, "step": 5587 }, { "epoch": 2.75, "learning_rate": 1.9034788554470715e-08, "loss": 0.0, "step": 5588 }, { "epoch": 2.75, "logps_train/chosen": -77.14292907714844, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -325.49810791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2855331897735596, "rewards_train/margins": 18.701679944992065, "rewards_train/rejected": -19.987213134765625, "step": 5588 }, { "epoch": 2.75, "learning_rate": 1.8960690447221373e-08, "loss": 0.0, "step": 5589 }, { "epoch": 2.75, "logps_train/chosen": -88.69740295410156, "logps_train/ref_chosen": -69.5, "logps_train/ref_rejected": -133.375, "logps_train/rejected": -341.9400634765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9216941595077515, "rewards_train/margins": 18.940770745277405, "rewards_train/rejected": -20.862464904785156, "step": 5589 }, { "epoch": 2.75, "learning_rate": 1.888673405739838e-08, "loss": 0.0, "step": 5590 }, { "epoch": 2.75, "logps_train/chosen": -77.44868469238281, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -325.47015380859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3775347471237183, "rewards_train/margins": 18.29838764667511, "rewards_train/rejected": -19.675922393798828, "step": 5590 }, { "epoch": 2.75, "learning_rate": 1.8812919406790083e-08, "loss": 0.0, "step": 5591 }, { "epoch": 2.75, "logps_train/chosen": -79.50425720214844, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -332.1708679199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.329282522201538, "rewards_train/margins": 19.065439462661743, "rewards_train/rejected": -20.39472198486328, "step": 5591 }, { "epoch": 2.75, "learning_rate": 1.8739246517142647e-08, "loss": 0.0, "step": 5592 }, { "epoch": 2.75, "logps_train/chosen": -81.32434844970703, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -322.46533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5328254699707031, "rewards_train/margins": 17.960439682006836, "rewards_train/rejected": -19.49326515197754, "step": 5592 }, { "epoch": 2.75, "learning_rate": 1.8665715410160542e-08, "loss": 0.0008, "step": 5593 }, { "epoch": 2.75, "logps_train/chosen": -77.21965026855469, "logps_train/ref_chosen": -61.25, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -325.10894775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5967214107513428, "rewards_train/margins": 18.401426553726196, "rewards_train/rejected": -19.99814796447754, "step": 5593 }, { "epoch": 2.75, "learning_rate": 1.859232610750672e-08, "loss": 0.0, "step": 5594 }, { "epoch": 2.75, "logps_train/chosen": -78.14411926269531, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -318.6561279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5145591497421265, "rewards_train/margins": 18.284108757972717, "rewards_train/rejected": -19.798667907714844, "step": 5594 }, { "epoch": 2.75, "learning_rate": 1.8519078630801986e-08, "loss": 0.0, "step": 5595 }, { "epoch": 2.75, "logps_train/chosen": -78.50143432617188, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -324.6313781738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3304165601730347, "rewards_train/margins": 18.655572056770325, "rewards_train/rejected": -19.98598861694336, "step": 5595 }, { "epoch": 2.76, "learning_rate": 1.8445973001625648e-08, "loss": 0.0, "step": 5596 }, { "epoch": 2.76, "logps_train/chosen": -83.48957824707031, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -336.02587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8426098823547363, "rewards_train/margins": 18.779704570770264, "rewards_train/rejected": -20.622314453125, "step": 5596 }, { "epoch": 2.76, "learning_rate": 1.837300924151497e-08, "loss": 0.0, "step": 5597 }, { "epoch": 2.76, "logps_train/chosen": -78.70924377441406, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -335.1936340332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4919207096099854, "rewards_train/margins": 19.3742196559906, "rewards_train/rejected": -20.866140365600586, "step": 5597 }, { "epoch": 2.76, "learning_rate": 1.830018737196576e-08, "loss": 0.0, "step": 5598 }, { "epoch": 2.76, "logps_train/chosen": -79.94447326660156, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -320.8409423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5414204597473145, "rewards_train/margins": 18.331542491912842, "rewards_train/rejected": -19.872962951660156, "step": 5598 }, { "epoch": 2.76, "learning_rate": 1.8227507414431742e-08, "loss": 0.0, "step": 5599 }, { "epoch": 2.76, "logps_train/chosen": -88.50418090820312, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -126.6875, "logps_train/rejected": -329.28607177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.2979769706726074, "rewards_train/margins": 17.96354341506958, "rewards_train/rejected": -20.261520385742188, "step": 5599 }, { "epoch": 2.76, "learning_rate": 1.8154969390324905e-08, "loss": 0.0, "step": 5600 }, { "epoch": 2.76, "logps_train/chosen": -77.8732681274414, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -316.2552490234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.470578670501709, "rewards_train/margins": 18.090004444122314, "rewards_train/rejected": -19.560583114624023, "step": 5600 }, { "epoch": 2.76, "learning_rate": 1.808257332101548e-08, "loss": 0.0, "step": 5601 }, { "epoch": 2.76, "logps_train/chosen": -76.91545104980469, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -319.08135986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3938157558441162, "rewards_train/margins": 18.126283884048462, "rewards_train/rejected": -19.520099639892578, "step": 5601 }, { "epoch": 2.76, "learning_rate": 1.8010319227831805e-08, "loss": 0.0, "step": 5602 }, { "epoch": 2.76, "logps_train/chosen": -83.02389526367188, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -317.63104248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8579564094543457, "rewards_train/margins": 17.42438840866089, "rewards_train/rejected": -19.282344818115234, "step": 5602 }, { "epoch": 2.76, "learning_rate": 1.7938207132060458e-08, "loss": 0.0, "step": 5603 }, { "epoch": 2.76, "logps_train/chosen": -73.92974853515625, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -324.6156921386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0187565088272095, "rewards_train/margins": 18.87103497982025, "rewards_train/rejected": -19.88979148864746, "step": 5603 }, { "epoch": 2.76, "learning_rate": 1.7866237054946166e-08, "loss": 0.0, "step": 5604 }, { "epoch": 2.76, "logps_train/chosen": -77.0624008178711, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -119.625, "logps_train/rejected": -320.20477294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4815080165863037, "rewards_train/margins": 18.57705521583557, "rewards_train/rejected": -20.058563232421875, "step": 5604 }, { "epoch": 2.76, "learning_rate": 1.7794409017691635e-08, "loss": 0.0, "step": 5605 }, { "epoch": 2.76, "logps_train/chosen": -79.79495239257812, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -322.1805419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6496119499206543, "rewards_train/margins": 18.40213441848755, "rewards_train/rejected": -20.051746368408203, "step": 5605 }, { "epoch": 2.76, "learning_rate": 1.772272304145811e-08, "loss": 0.0, "step": 5606 }, { "epoch": 2.76, "logps_train/chosen": -77.98898315429688, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -318.737060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5553436279296875, "rewards_train/margins": 18.030324935913086, "rewards_train/rejected": -19.585668563842773, "step": 5606 }, { "epoch": 2.76, "learning_rate": 1.7651179147364692e-08, "loss": 0.0, "step": 5607 }, { "epoch": 2.76, "logps_train/chosen": -75.71240234375, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -324.4095458984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4410152435302734, "rewards_train/margins": 18.56209945678711, "rewards_train/rejected": -20.003114700317383, "step": 5607 }, { "epoch": 2.76, "learning_rate": 1.7579777356488633e-08, "loss": 0.0001, "step": 5608 }, { "epoch": 2.76, "logps_train/chosen": -83.23509216308594, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -337.37396240234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8580307960510254, "rewards_train/margins": 18.998216152191162, "rewards_train/rejected": -20.856246948242188, "step": 5608 }, { "epoch": 2.76, "learning_rate": 1.7508517689865332e-08, "loss": 0.0, "step": 5609 }, { "epoch": 2.76, "logps_train/chosen": -76.0721664428711, "logps_train/ref_chosen": -61.3125, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -311.43896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4751853942871094, "rewards_train/margins": 17.453968048095703, "rewards_train/rejected": -18.929153442382812, "step": 5609 }, { "epoch": 2.76, "learning_rate": 1.7437400168488602e-08, "loss": 0.0, "step": 5610 }, { "epoch": 2.76, "logps_train/chosen": -81.61436462402344, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -326.60101318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6342880725860596, "rewards_train/margins": 18.45809245109558, "rewards_train/rejected": -20.09238052368164, "step": 5610 }, { "epoch": 2.76, "learning_rate": 1.736642481330991e-08, "loss": 0.0, "step": 5611 }, { "epoch": 2.76, "logps_train/chosen": -84.35846710205078, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -127.125, "logps_train/rejected": -324.0766296386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.015876054763794, "rewards_train/margins": 17.678555250167847, "rewards_train/rejected": -19.69443130493164, "step": 5611 }, { "epoch": 2.76, "learning_rate": 1.7295591645239193e-08, "loss": 0.0, "step": 5612 }, { "epoch": 2.76, "logps_train/chosen": -83.18392181396484, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -337.4434814453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7786945104599, "rewards_train/margins": 18.939921975135803, "rewards_train/rejected": -20.718616485595703, "step": 5612 }, { "epoch": 2.76, "learning_rate": 1.7224900685144372e-08, "loss": 0.0, "step": 5613 }, { "epoch": 2.76, "logps_train/chosen": -82.71304321289062, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -334.5824279785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8392002582550049, "rewards_train/margins": 18.8228976726532, "rewards_train/rejected": -20.662097930908203, "step": 5613 }, { "epoch": 2.76, "learning_rate": 1.7154351953851454e-08, "loss": 0.0, "step": 5614 }, { "epoch": 2.76, "logps_train/chosen": -80.71781158447266, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -317.0506591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7159457206726074, "rewards_train/margins": 17.697323322296143, "rewards_train/rejected": -19.41326904296875, "step": 5614 }, { "epoch": 2.76, "learning_rate": 1.7083945472144478e-08, "loss": 0.0, "step": 5615 }, { "epoch": 2.76, "logps_train/chosen": -78.16294860839844, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -334.5216979980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.383091688156128, "rewards_train/margins": 19.268052339553833, "rewards_train/rejected": -20.65114402770996, "step": 5615 }, { "epoch": 2.77, "learning_rate": 1.7013681260765912e-08, "loss": 0.0, "step": 5616 }, { "epoch": 2.77, "logps_train/chosen": -79.02568054199219, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -327.41229248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.492655873298645, "rewards_train/margins": 18.764002680778503, "rewards_train/rejected": -20.25665855407715, "step": 5616 }, { "epoch": 2.77, "learning_rate": 1.694355934041586e-08, "loss": 0.0, "step": 5617 }, { "epoch": 2.77, "logps_train/chosen": -77.93412780761719, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -120.3125, "logps_train/rejected": -316.10174560546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4584035873413086, "rewards_train/margins": 18.119351387023926, "rewards_train/rejected": -19.577754974365234, "step": 5617 }, { "epoch": 2.77, "learning_rate": 1.6873579731752796e-08, "loss": 0.0, "step": 5618 }, { "epoch": 2.77, "logps_train/chosen": -80.61810302734375, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -316.3551025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.77418851852417, "rewards_train/margins": 17.72684907913208, "rewards_train/rejected": -19.50103759765625, "step": 5618 }, { "epoch": 2.77, "learning_rate": 1.6803742455393123e-08, "loss": 0.0, "step": 5619 }, { "epoch": 2.77, "logps_train/chosen": -79.22138977050781, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -323.6963806152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6942095756530762, "rewards_train/margins": 18.33465814590454, "rewards_train/rejected": -20.028867721557617, "step": 5619 }, { "epoch": 2.77, "learning_rate": 1.6734047531911433e-08, "loss": 0.0, "step": 5620 }, { "epoch": 2.77, "logps_train/chosen": -78.68376159667969, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -324.5126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4462080001831055, "rewards_train/margins": 18.60755443572998, "rewards_train/rejected": -20.053762435913086, "step": 5620 }, { "epoch": 2.77, "learning_rate": 1.666449498184036e-08, "loss": 0.0, "step": 5621 }, { "epoch": 2.77, "logps_train/chosen": -80.95951080322266, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -321.19854736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8919472694396973, "rewards_train/margins": 18.2273211479187, "rewards_train/rejected": -20.1192684173584, "step": 5621 }, { "epoch": 2.77, "learning_rate": 1.6595084825670403e-08, "loss": 0.0, "step": 5622 }, { "epoch": 2.77, "logps_train/chosen": -78.04222106933594, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -332.1583251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2770977020263672, "rewards_train/margins": 19.138639450073242, "rewards_train/rejected": -20.41573715209961, "step": 5622 }, { "epoch": 2.77, "learning_rate": 1.652581708385048e-08, "loss": 0.0, "step": 5623 }, { "epoch": 2.77, "logps_train/chosen": -79.4002685546875, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -327.172607421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.568664789199829, "rewards_train/margins": 18.646934270858765, "rewards_train/rejected": -20.215599060058594, "step": 5623 }, { "epoch": 2.77, "learning_rate": 1.6456691776787103e-08, "loss": 0.0, "step": 5624 }, { "epoch": 2.77, "logps_train/chosen": -79.64862060546875, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -333.72540283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5212585926055908, "rewards_train/margins": 19.163296461105347, "rewards_train/rejected": -20.684555053710938, "step": 5624 }, { "epoch": 2.77, "learning_rate": 1.6387708924845146e-08, "loss": 0.0, "step": 5625 }, { "epoch": 2.77, "logps_train/chosen": -78.17251586914062, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -321.767822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.387027621269226, "rewards_train/margins": 18.501182675361633, "rewards_train/rejected": -19.88821029663086, "step": 5625 }, { "epoch": 2.77, "learning_rate": 1.6318868548347577e-08, "loss": 0.0, "step": 5626 }, { "epoch": 2.77, "logps_train/chosen": -82.63314819335938, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -335.2391357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6867526769638062, "rewards_train/margins": 18.97153627872467, "rewards_train/rejected": -20.658288955688477, "step": 5626 }, { "epoch": 2.77, "learning_rate": 1.6250170667574947e-08, "loss": 0.0, "step": 5627 }, { "epoch": 2.77, "logps_train/chosen": -75.94364166259766, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -316.9322509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2204387187957764, "rewards_train/margins": 18.427863359451294, "rewards_train/rejected": -19.64830207824707, "step": 5627 }, { "epoch": 2.77, "learning_rate": 1.618161530276635e-08, "loss": 0.0001, "step": 5628 }, { "epoch": 2.77, "logps_train/chosen": -81.68509674072266, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -341.4873046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.587455153465271, "rewards_train/margins": 19.422308564186096, "rewards_train/rejected": -21.009763717651367, "step": 5628 }, { "epoch": 2.77, "learning_rate": 1.611320247411857e-08, "loss": 0.0, "step": 5629 }, { "epoch": 2.77, "logps_train/chosen": -73.61844635009766, "logps_train/ref_chosen": -61.21875, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -319.6288146972656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.240262508392334, "rewards_train/margins": 18.33463144302368, "rewards_train/rejected": -19.574893951416016, "step": 5629 }, { "epoch": 2.77, "learning_rate": 1.604493220178649e-08, "loss": 0.0001, "step": 5630 }, { "epoch": 2.77, "logps_train/chosen": -80.96418762207031, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -326.45806884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7392897605895996, "rewards_train/margins": 18.569504261016846, "rewards_train/rejected": -20.308794021606445, "step": 5630 }, { "epoch": 2.77, "learning_rate": 1.597680450588296e-08, "loss": 0.0001, "step": 5631 }, { "epoch": 2.77, "logps_train/chosen": -79.93400573730469, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -331.35931396484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6324138641357422, "rewards_train/margins": 19.009672164916992, "rewards_train/rejected": -20.642086029052734, "step": 5631 }, { "epoch": 2.77, "learning_rate": 1.590881940647898e-08, "loss": 0.0, "step": 5632 }, { "epoch": 2.77, "logps_train/chosen": -82.13609313964844, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -325.7322082519531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8183941841125488, "rewards_train/margins": 18.172988414764404, "rewards_train/rejected": -19.991382598876953, "step": 5632 }, { "epoch": 2.77, "learning_rate": 1.5840976923603255e-08, "loss": 0.0, "step": 5633 }, { "epoch": 2.77, "logps_train/chosen": -73.1798095703125, "logps_train/ref_chosen": -61.125, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -320.70220947265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2044068574905396, "rewards_train/margins": 18.45091998577118, "rewards_train/rejected": -19.65532684326172, "step": 5633 }, { "epoch": 2.77, "learning_rate": 1.5773277077242742e-08, "loss": 0.0, "step": 5634 }, { "epoch": 2.77, "logps_train/chosen": -80.2619400024414, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -321.7014465332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5069067478179932, "rewards_train/margins": 18.11572813987732, "rewards_train/rejected": -19.622634887695312, "step": 5634 }, { "epoch": 2.77, "learning_rate": 1.570571988734226e-08, "loss": 0.0, "step": 5635 }, { "epoch": 2.77, "logps_train/chosen": -78.06580352783203, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -314.20611572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6057994365692139, "rewards_train/margins": 17.7232563495636, "rewards_train/rejected": -19.329055786132812, "step": 5635 }, { "epoch": 2.77, "learning_rate": 1.5638305373804618e-08, "loss": 0.0001, "step": 5636 }, { "epoch": 2.77, "logps_train/chosen": -84.478515625, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -326.8752136230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7458007335662842, "rewards_train/margins": 18.308417558670044, "rewards_train/rejected": -20.054218292236328, "step": 5636 }, { "epoch": 2.78, "learning_rate": 1.5571033556490477e-08, "loss": 0.0, "step": 5637 }, { "epoch": 2.78, "logps_train/chosen": -73.927978515625, "logps_train/ref_chosen": -63.8125, "logps_train/ref_rejected": -127.375, "logps_train/rejected": -329.6605224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.013159155845642, "rewards_train/margins": 19.21368181705475, "rewards_train/rejected": -20.22684097290039, "step": 5637 }, { "epoch": 2.78, "learning_rate": 1.550390445521882e-08, "loss": 0.0, "step": 5638 }, { "epoch": 2.78, "logps_train/chosen": -78.8866195678711, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -326.14190673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4975488185882568, "rewards_train/margins": 18.464197397232056, "rewards_train/rejected": -19.961746215820312, "step": 5638 }, { "epoch": 2.78, "learning_rate": 1.543691808976605e-08, "loss": 0.0, "step": 5639 }, { "epoch": 2.78, "logps_train/chosen": -78.78553771972656, "logps_train/ref_chosen": -60.1875, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -315.6974182128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8590962886810303, "rewards_train/margins": 17.72338891029358, "rewards_train/rejected": -19.58248519897461, "step": 5639 }, { "epoch": 2.78, "learning_rate": 1.537007447986699e-08, "loss": 0.0, "step": 5640 }, { "epoch": 2.78, "logps_train/chosen": -75.7373275756836, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -317.6175537109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2533469200134277, "rewards_train/margins": 18.110117435455322, "rewards_train/rejected": -19.36346435546875, "step": 5640 }, { "epoch": 2.78, "learning_rate": 1.5303373645214114e-08, "loss": 0.0, "step": 5641 }, { "epoch": 2.78, "logps_train/chosen": -84.04286193847656, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -319.81744384765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8893203735351562, "rewards_train/margins": 17.81151580810547, "rewards_train/rejected": -19.700836181640625, "step": 5641 }, { "epoch": 2.78, "learning_rate": 1.5236815605457976e-08, "loss": 0.0, "step": 5642 }, { "epoch": 2.78, "logps_train/chosen": -84.92634582519531, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -338.31414794921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8027421236038208, "rewards_train/margins": 19.07213008403778, "rewards_train/rejected": -20.8748722076416, "step": 5642 }, { "epoch": 2.78, "learning_rate": 1.517040038020706e-08, "loss": 0.0, "step": 5643 }, { "epoch": 2.78, "logps_train/chosen": -80.6463851928711, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -333.1507873535156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4579005241394043, "rewards_train/margins": 18.828176975250244, "rewards_train/rejected": -20.28607749938965, "step": 5643 }, { "epoch": 2.78, "learning_rate": 1.510412798902766e-08, "loss": 0.0, "step": 5644 }, { "epoch": 2.78, "logps_train/chosen": -78.62700653076172, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -327.2086181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2854548692703247, "rewards_train/margins": 18.46782958507538, "rewards_train/rejected": -19.753284454345703, "step": 5644 }, { "epoch": 2.78, "learning_rate": 1.503799845144421e-08, "loss": 0.0006, "step": 5645 }, { "epoch": 2.78, "logps_train/chosen": -78.4588623046875, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -329.7310791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5488650798797607, "rewards_train/margins": 18.71916699409485, "rewards_train/rejected": -20.26803207397461, "step": 5645 }, { "epoch": 2.78, "learning_rate": 1.4972011786938686e-08, "loss": 0.0, "step": 5646 }, { "epoch": 2.78, "logps_train/chosen": -81.07188415527344, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -331.46942138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6250109672546387, "rewards_train/margins": 18.685551166534424, "rewards_train/rejected": -20.310562133789062, "step": 5646 }, { "epoch": 2.78, "learning_rate": 1.4906168014951427e-08, "loss": 0.0, "step": 5647 }, { "epoch": 2.78, "logps_train/chosen": -83.2115478515625, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -338.82257080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.797058343887329, "rewards_train/margins": 19.318645238876343, "rewards_train/rejected": -21.115703582763672, "step": 5647 }, { "epoch": 2.78, "learning_rate": 1.4840467154880409e-08, "loss": 0.0, "step": 5648 }, { "epoch": 2.78, "logps_train/chosen": -78.99837493896484, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -346.48211669921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4996908903121948, "rewards_train/margins": 20.23221218585968, "rewards_train/rejected": -21.731903076171875, "step": 5648 }, { "epoch": 2.78, "learning_rate": 1.4774909226081378e-08, "loss": 0.0, "step": 5649 }, { "epoch": 2.78, "logps_train/chosen": -84.46066284179688, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -325.0215148925781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0190649032592773, "rewards_train/margins": 17.85623264312744, "rewards_train/rejected": -19.87529754638672, "step": 5649 }, { "epoch": 2.78, "learning_rate": 1.4709494247868381e-08, "loss": 0.0001, "step": 5650 }, { "epoch": 2.78, "logps_train/chosen": -75.57186889648438, "logps_train/ref_chosen": -62.5, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -319.084716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3075772523880005, "rewards_train/margins": 18.1874178647995, "rewards_train/rejected": -19.4949951171875, "step": 5650 }, { "epoch": 2.78, "learning_rate": 1.4644222239513005e-08, "loss": 0.0, "step": 5651 }, { "epoch": 2.78, "logps_train/chosen": -83.27119445800781, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -338.24267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.698896050453186, "rewards_train/margins": 18.963602423667908, "rewards_train/rejected": -20.662498474121094, "step": 5651 }, { "epoch": 2.78, "learning_rate": 1.4579093220244753e-08, "loss": 0.0, "step": 5652 }, { "epoch": 2.78, "logps_train/chosen": -75.83815002441406, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -123.0, "logps_train/rejected": -322.0274963378906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.346362829208374, "rewards_train/margins": 18.553898096084595, "rewards_train/rejected": -19.90026092529297, "step": 5652 }, { "epoch": 2.78, "learning_rate": 1.451410720925117e-08, "loss": 0.0, "step": 5653 }, { "epoch": 2.78, "logps_train/chosen": -77.29364013671875, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -323.78228759765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5592957735061646, "rewards_train/margins": 18.42039668560028, "rewards_train/rejected": -19.979692459106445, "step": 5653 }, { "epoch": 2.78, "learning_rate": 1.4449264225677604e-08, "loss": 0.0, "step": 5654 }, { "epoch": 2.78, "logps_train/chosen": -77.14171600341797, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -321.21527099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3570425510406494, "rewards_train/margins": 18.73938536643982, "rewards_train/rejected": -20.09642791748047, "step": 5654 }, { "epoch": 2.78, "learning_rate": 1.438456428862711e-08, "loss": 0.0, "step": 5655 }, { "epoch": 2.78, "logps_train/chosen": -75.56089782714844, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -319.32763671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1111196279525757, "rewards_train/margins": 18.39176046848297, "rewards_train/rejected": -19.502880096435547, "step": 5655 }, { "epoch": 2.78, "learning_rate": 1.4320007417160829e-08, "loss": 0.0, "step": 5656 }, { "epoch": 2.78, "logps_train/chosen": -77.44570922851562, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -321.49609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.293203353881836, "rewards_train/margins": 18.271150588989258, "rewards_train/rejected": -19.564353942871094, "step": 5656 }, { "epoch": 2.79, "learning_rate": 1.4255593630297602e-08, "loss": 0.0, "step": 5657 }, { "epoch": 2.79, "logps_train/chosen": -78.50765991210938, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -324.77587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4503753185272217, "rewards_train/margins": 18.59796452522278, "rewards_train/rejected": -20.04833984375, "step": 5657 }, { "epoch": 2.79, "learning_rate": 1.4191322947014195e-08, "loss": 0.0001, "step": 5658 }, { "epoch": 2.79, "logps_train/chosen": -80.93891906738281, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -328.4794616699219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6197218894958496, "rewards_train/margins": 18.55410623550415, "rewards_train/rejected": -20.173828125, "step": 5658 }, { "epoch": 2.79, "learning_rate": 1.4127195386245127e-08, "loss": 0.0, "step": 5659 }, { "epoch": 2.79, "logps_train/chosen": -76.94593048095703, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -327.3041076660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3759896755218506, "rewards_train/margins": 18.932790994644165, "rewards_train/rejected": -20.308780670166016, "step": 5659 }, { "epoch": 2.79, "learning_rate": 1.4063210966882899e-08, "loss": 0.0, "step": 5660 }, { "epoch": 2.79, "logps_train/chosen": -90.18693542480469, "logps_train/ref_chosen": -68.25, "logps_train/ref_rejected": -135.125, "logps_train/rejected": -352.83880615234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.195939540863037, "rewards_train/margins": 19.57846689224243, "rewards_train/rejected": -21.77440643310547, "step": 5660 }, { "epoch": 2.79, "learning_rate": 1.3999369707777652e-08, "loss": 0.0, "step": 5661 }, { "epoch": 2.79, "logps_train/chosen": -75.70354461669922, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -313.1806335449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2473808526992798, "rewards_train/margins": 17.95261800289154, "rewards_train/rejected": -19.19999885559082, "step": 5661 }, { "epoch": 2.79, "learning_rate": 1.3935671627737566e-08, "loss": 0.0, "step": 5662 }, { "epoch": 2.79, "logps_train/chosen": -81.1835708618164, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -326.9029846191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.608396291732788, "rewards_train/margins": 18.588545083999634, "rewards_train/rejected": -20.196941375732422, "step": 5662 }, { "epoch": 2.79, "learning_rate": 1.3872116745528462e-08, "loss": 0.0, "step": 5663 }, { "epoch": 2.79, "logps_train/chosen": -84.17715454101562, "logps_train/ref_chosen": -68.0, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -328.28314208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.616445779800415, "rewards_train/margins": 18.360795736312866, "rewards_train/rejected": -19.97724151611328, "step": 5663 }, { "epoch": 2.79, "learning_rate": 1.3808705079873973e-08, "loss": 0.0, "step": 5664 }, { "epoch": 2.79, "logps_train/chosen": -76.12396240234375, "logps_train/ref_chosen": -61.71875, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -320.66448974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4391791820526123, "rewards_train/margins": 18.33298420906067, "rewards_train/rejected": -19.77216339111328, "step": 5664 }, { "epoch": 2.79, "learning_rate": 1.3745436649455711e-08, "loss": 0.0, "step": 5665 }, { "epoch": 2.79, "logps_train/chosen": -81.70101165771484, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -326.43365478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.649251937866211, "rewards_train/margins": 18.229318618774414, "rewards_train/rejected": -19.878570556640625, "step": 5665 }, { "epoch": 2.79, "learning_rate": 1.3682311472912988e-08, "loss": 0.0, "step": 5666 }, { "epoch": 2.79, "logps_train/chosen": -82.90010833740234, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -332.2528076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.784199833869934, "rewards_train/margins": 18.611491084098816, "rewards_train/rejected": -20.39569091796875, "step": 5666 }, { "epoch": 2.79, "learning_rate": 1.3619329568842763e-08, "loss": 0.0, "step": 5667 }, { "epoch": 2.79, "logps_train/chosen": -80.09527587890625, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -332.16131591796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6130425930023193, "rewards_train/margins": 18.924965143203735, "rewards_train/rejected": -20.538007736206055, "step": 5667 }, { "epoch": 2.79, "learning_rate": 1.3556490955800082e-08, "loss": 0.0, "step": 5668 }, { "epoch": 2.79, "logps_train/chosen": -75.58854675292969, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -123.3125, "logps_train/rejected": -321.2183837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2565360069274902, "rewards_train/margins": 18.531904697418213, "rewards_train/rejected": -19.788440704345703, "step": 5668 }, { "epoch": 2.79, "learning_rate": 1.3493795652297579e-08, "loss": 0.0, "step": 5669 }, { "epoch": 2.79, "logps_train/chosen": -78.064697265625, "logps_train/ref_chosen": -61.65625, "logps_train/ref_rejected": -119.6875, "logps_train/rejected": -309.4764709472656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6404048204421997, "rewards_train/margins": 17.335902094841003, "rewards_train/rejected": -18.976306915283203, "step": 5669 }, { "epoch": 2.79, "learning_rate": 1.3431243676805703e-08, "loss": 0.0001, "step": 5670 }, { "epoch": 2.79, "logps_train/chosen": -85.84861755371094, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -328.91839599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.83613121509552, "rewards_train/margins": 18.127095103263855, "rewards_train/rejected": -19.963226318359375, "step": 5670 }, { "epoch": 2.79, "learning_rate": 1.3368835047752658e-08, "loss": 0.0, "step": 5671 }, { "epoch": 2.79, "logps_train/chosen": -79.39564514160156, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -322.9470520019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6794084310531616, "rewards_train/margins": 18.12389051914215, "rewards_train/rejected": -19.803298950195312, "step": 5671 }, { "epoch": 2.79, "learning_rate": 1.3306569783524513e-08, "loss": 0.0, "step": 5672 }, { "epoch": 2.79, "logps_train/chosen": -75.78032684326172, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -323.2791748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1884815692901611, "rewards_train/margins": 18.716341733932495, "rewards_train/rejected": -19.904823303222656, "step": 5672 }, { "epoch": 2.79, "learning_rate": 1.3244447902464983e-08, "loss": 0.0, "step": 5673 }, { "epoch": 2.79, "logps_train/chosen": -82.84762573242188, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -321.88067626953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8950169086456299, "rewards_train/margins": 18.149691343307495, "rewards_train/rejected": -20.044708251953125, "step": 5673 }, { "epoch": 2.79, "learning_rate": 1.3182469422875598e-08, "loss": 0.0, "step": 5674 }, { "epoch": 2.79, "logps_train/chosen": -80.35674285888672, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -320.4583740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5944143533706665, "rewards_train/margins": 17.998053908348083, "rewards_train/rejected": -19.59246826171875, "step": 5674 }, { "epoch": 2.79, "learning_rate": 1.3120634363015747e-08, "loss": 0.0, "step": 5675 }, { "epoch": 2.79, "logps_train/chosen": -74.63838195800781, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -319.83770751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1839063167572021, "rewards_train/margins": 18.369590997695923, "rewards_train/rejected": -19.553497314453125, "step": 5675 }, { "epoch": 2.79, "learning_rate": 1.3058942741102252e-08, "loss": 0.0, "step": 5676 }, { "epoch": 2.79, "logps_train/chosen": -81.18315887451172, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -120.375, "logps_train/rejected": -315.83349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7749316692352295, "rewards_train/margins": 17.77399182319641, "rewards_train/rejected": -19.54892349243164, "step": 5676 }, { "epoch": 2.8, "learning_rate": 1.2997394575310018e-08, "loss": 0.0, "step": 5677 }, { "epoch": 2.8, "logps_train/chosen": -82.24217224121094, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -315.3861083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7749501466751099, "rewards_train/margins": 17.619617342948914, "rewards_train/rejected": -19.394567489624023, "step": 5677 }, { "epoch": 2.8, "learning_rate": 1.2935989883771536e-08, "loss": 0.0, "step": 5678 }, { "epoch": 2.8, "logps_train/chosen": -83.19805908203125, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -318.1903381347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7597227096557617, "rewards_train/margins": 17.464680671691895, "rewards_train/rejected": -19.224403381347656, "step": 5678 }, { "epoch": 2.8, "learning_rate": 1.2874728684576952e-08, "loss": 0.0, "step": 5679 }, { "epoch": 2.8, "logps_train/chosen": -80.65727233886719, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -328.218505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7115767002105713, "rewards_train/margins": 18.611348867416382, "rewards_train/rejected": -20.322925567626953, "step": 5679 }, { "epoch": 2.8, "learning_rate": 1.2813610995774382e-08, "loss": 0.0, "step": 5680 }, { "epoch": 2.8, "logps_train/chosen": -76.88020324707031, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.0625, "logps_train/rejected": -324.01806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2141921520233154, "rewards_train/margins": 18.48414921760559, "rewards_train/rejected": -19.698341369628906, "step": 5680 }, { "epoch": 2.8, "learning_rate": 1.2752636835369424e-08, "loss": 0.0001, "step": 5681 }, { "epoch": 2.8, "logps_train/chosen": -71.1589584350586, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -315.3681335449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.896608829498291, "rewards_train/margins": 18.537957668304443, "rewards_train/rejected": -19.434566497802734, "step": 5681 }, { "epoch": 2.8, "learning_rate": 1.2691806221325485e-08, "loss": 0.0003, "step": 5682 }, { "epoch": 2.8, "logps_train/chosen": -77.88370513916016, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -330.20428466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3686928749084473, "rewards_train/margins": 19.11130666732788, "rewards_train/rejected": -20.479999542236328, "step": 5682 }, { "epoch": 2.8, "learning_rate": 1.263111917156362e-08, "loss": 0.0, "step": 5683 }, { "epoch": 2.8, "logps_train/chosen": -80.71936798095703, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -323.37744140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6038216352462769, "rewards_train/margins": 18.38987648487091, "rewards_train/rejected": -19.993698120117188, "step": 5683 }, { "epoch": 2.8, "learning_rate": 1.2570575703962749e-08, "loss": 0.0, "step": 5684 }, { "epoch": 2.8, "logps_train/chosen": -82.6527328491211, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -322.8808898925781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8641014099121094, "rewards_train/margins": 17.902748107910156, "rewards_train/rejected": -19.766849517822266, "step": 5684 }, { "epoch": 2.8, "learning_rate": 1.2510175836359327e-08, "loss": 0.0001, "step": 5685 }, { "epoch": 2.8, "logps_train/chosen": -82.46453094482422, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -133.75, "logps_train/rejected": -343.86663818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6115403175354004, "rewards_train/margins": 19.406957149505615, "rewards_train/rejected": -21.018497467041016, "step": 5685 }, { "epoch": 2.8, "learning_rate": 1.2449919586547509e-08, "loss": 0.0, "step": 5686 }, { "epoch": 2.8, "logps_train/chosen": -79.35592651367188, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -330.6837158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4300752878189087, "rewards_train/margins": 18.8325856924057, "rewards_train/rejected": -20.26266098022461, "step": 5686 }, { "epoch": 2.8, "learning_rate": 1.2389806972279371e-08, "loss": 0.0, "step": 5687 }, { "epoch": 2.8, "logps_train/chosen": -76.51701354980469, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -329.1529846191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3011645078659058, "rewards_train/margins": 18.847334027290344, "rewards_train/rejected": -20.14849853515625, "step": 5687 }, { "epoch": 2.8, "learning_rate": 1.2329838011264304e-08, "loss": 0.0, "step": 5688 }, { "epoch": 2.8, "logps_train/chosen": -82.00040435791016, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -316.37738037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7286046743392944, "rewards_train/margins": 17.80024778842926, "rewards_train/rejected": -19.528852462768555, "step": 5688 }, { "epoch": 2.8, "learning_rate": 1.227001272116962e-08, "loss": 0.0001, "step": 5689 }, { "epoch": 2.8, "logps_train/chosen": -84.2874984741211, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -327.3895568847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.910097599029541, "rewards_train/margins": 18.167481899261475, "rewards_train/rejected": -20.077579498291016, "step": 5689 }, { "epoch": 2.8, "learning_rate": 1.2210331119620332e-08, "loss": 0.0, "step": 5690 }, { "epoch": 2.8, "logps_train/chosen": -82.30136108398438, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -119.1875, "logps_train/rejected": -308.55206298828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9666590690612793, "rewards_train/margins": 16.968918323516846, "rewards_train/rejected": -18.935577392578125, "step": 5690 }, { "epoch": 2.8, "learning_rate": 1.215079322419893e-08, "loss": 0.0, "step": 5691 }, { "epoch": 2.8, "logps_train/chosen": -80.51194763183594, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -324.51983642578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6621323823928833, "rewards_train/margins": 18.268173098564148, "rewards_train/rejected": -19.93030548095703, "step": 5691 }, { "epoch": 2.8, "learning_rate": 1.2091399052445772e-08, "loss": 0.0, "step": 5692 }, { "epoch": 2.8, "logps_train/chosen": -77.20481872558594, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -322.6561279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2825915813446045, "rewards_train/margins": 18.31827473640442, "rewards_train/rejected": -19.600866317749023, "step": 5692 }, { "epoch": 2.8, "learning_rate": 1.2032148621858751e-08, "loss": 0.0, "step": 5693 }, { "epoch": 2.8, "logps_train/chosen": -85.000244140625, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -332.82000732421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9470949172973633, "rewards_train/margins": 18.455756187438965, "rewards_train/rejected": -20.402851104736328, "step": 5693 }, { "epoch": 2.8, "learning_rate": 1.1973041949893348e-08, "loss": 0.0, "step": 5694 }, { "epoch": 2.8, "logps_train/chosen": -83.45991516113281, "logps_train/ref_chosen": -68.125, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -328.83843994140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5345165729522705, "rewards_train/margins": 18.696688413619995, "rewards_train/rejected": -20.231204986572266, "step": 5694 }, { "epoch": 2.8, "learning_rate": 1.1914079053962967e-08, "loss": 0.0, "step": 5695 }, { "epoch": 2.8, "logps_train/chosen": -81.58277893066406, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -125.5, "logps_train/rejected": -329.1271057128906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5547621250152588, "rewards_train/margins": 18.809707403182983, "rewards_train/rejected": -20.364469528198242, "step": 5695 }, { "epoch": 2.8, "learning_rate": 1.185525995143838e-08, "loss": 0.0, "step": 5696 }, { "epoch": 2.8, "logps_train/chosen": -78.23927307128906, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -134.0, "logps_train/rejected": -342.7271728515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3483655452728271, "rewards_train/margins": 19.520252466201782, "rewards_train/rejected": -20.86861801147461, "step": 5696 }, { "epoch": 2.81, "learning_rate": 1.1796584659648056e-08, "loss": 0.0, "step": 5697 }, { "epoch": 2.81, "logps_train/chosen": -85.52662658691406, "logps_train/ref_chosen": -63.65625, "logps_train/ref_rejected": -122.9375, "logps_train/rejected": -332.33251953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1879167556762695, "rewards_train/margins": 18.750123023986816, "rewards_train/rejected": -20.938039779663086, "step": 5697 }, { "epoch": 2.81, "learning_rate": 1.1738053195878173e-08, "loss": 0.0, "step": 5698 }, { "epoch": 2.81, "logps_train/chosen": -86.28530883789062, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -322.824462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1521146297454834, "rewards_train/margins": 17.621639490127563, "rewards_train/rejected": -19.773754119873047, "step": 5698 }, { "epoch": 2.81, "learning_rate": 1.1679665577372488e-08, "loss": 0.0003, "step": 5699 }, { "epoch": 2.81, "logps_train/chosen": -77.55783081054688, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -119.5625, "logps_train/rejected": -312.7330322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4763394594192505, "rewards_train/margins": 17.841052889823914, "rewards_train/rejected": -19.317392349243164, "step": 5699 }, { "epoch": 2.81, "learning_rate": 1.1621421821332466e-08, "loss": 0.0001, "step": 5700 }, { "epoch": 2.81, "logps_train/chosen": -78.16656494140625, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -320.9693298339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.341705083847046, "rewards_train/margins": 18.27314782142639, "rewards_train/rejected": -19.614852905273438, "step": 5700 }, { "epoch": 2.81, "learning_rate": 1.1563321944916937e-08, "loss": 0.0, "step": 5701 }, { "epoch": 2.81, "logps_train/chosen": -81.03614807128906, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -330.6944274902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5603041648864746, "rewards_train/margins": 18.868807315826416, "rewards_train/rejected": -20.42911148071289, "step": 5701 }, { "epoch": 2.81, "learning_rate": 1.1505365965242708e-08, "loss": 0.0, "step": 5702 }, { "epoch": 2.81, "logps_train/chosen": -80.5026626586914, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -336.4065856933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.524533987045288, "rewards_train/margins": 19.270323038101196, "rewards_train/rejected": -20.794857025146484, "step": 5702 }, { "epoch": 2.81, "learning_rate": 1.1447553899383844e-08, "loss": 0.0, "step": 5703 }, { "epoch": 2.81, "logps_train/chosen": -81.59549713134766, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -319.26141357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.714871883392334, "rewards_train/margins": 17.822014331817627, "rewards_train/rejected": -19.53688621520996, "step": 5703 }, { "epoch": 2.81, "learning_rate": 1.138988576437222e-08, "loss": 0.0, "step": 5704 }, { "epoch": 2.81, "logps_train/chosen": -81.45398712158203, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -315.9105529785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6783084869384766, "rewards_train/margins": 17.51157569885254, "rewards_train/rejected": -19.189884185791016, "step": 5704 }, { "epoch": 2.81, "learning_rate": 1.1332361577197413e-08, "loss": 0.0, "step": 5705 }, { "epoch": 2.81, "logps_train/chosen": -78.12214660644531, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -127.4375, "logps_train/rejected": -328.590087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3343827724456787, "rewards_train/margins": 18.78150773048401, "rewards_train/rejected": -20.115890502929688, "step": 5705 }, { "epoch": 2.81, "learning_rate": 1.1274981354806146e-08, "loss": 0.0, "step": 5706 }, { "epoch": 2.81, "logps_train/chosen": -77.31120300292969, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -326.113037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2743818759918213, "rewards_train/margins": 18.796884298324585, "rewards_train/rejected": -20.071266174316406, "step": 5706 }, { "epoch": 2.81, "learning_rate": 1.1217745114103227e-08, "loss": 0.0, "step": 5707 }, { "epoch": 2.81, "logps_train/chosen": -79.22157287597656, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -323.17291259765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4611221551895142, "rewards_train/margins": 18.206854224205017, "rewards_train/rejected": -19.66797637939453, "step": 5707 }, { "epoch": 2.81, "learning_rate": 1.1160652871950838e-08, "loss": 0.0, "step": 5708 }, { "epoch": 2.81, "logps_train/chosen": -82.32804870605469, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -339.7093200683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6924724578857422, "rewards_train/margins": 19.156780242919922, "rewards_train/rejected": -20.849252700805664, "step": 5708 }, { "epoch": 2.81, "learning_rate": 1.1103704645168576e-08, "loss": 0.0, "step": 5709 }, { "epoch": 2.81, "logps_train/chosen": -79.96337890625, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.1875, "logps_train/rejected": -319.226806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5350098609924316, "rewards_train/margins": 17.966872692108154, "rewards_train/rejected": -19.501882553100586, "step": 5709 }, { "epoch": 2.81, "learning_rate": 1.1046900450533968e-08, "loss": 0.0, "step": 5710 }, { "epoch": 2.81, "logps_train/chosen": -75.8643798828125, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -331.09197998046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2172479629516602, "rewards_train/margins": 18.758551597595215, "rewards_train/rejected": -19.975799560546875, "step": 5710 }, { "epoch": 2.81, "learning_rate": 1.0990240304781795e-08, "loss": 0.0004, "step": 5711 }, { "epoch": 2.81, "logps_train/chosen": -79.9637451171875, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -322.0535583496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.663219690322876, "rewards_train/margins": 18.28471350669861, "rewards_train/rejected": -19.947933197021484, "step": 5711 }, { "epoch": 2.81, "learning_rate": 1.0933724224604534e-08, "loss": 0.0002, "step": 5712 }, { "epoch": 2.81, "logps_train/chosen": -80.3880615234375, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -330.6581726074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4461793899536133, "rewards_train/margins": 18.6887788772583, "rewards_train/rejected": -20.134958267211914, "step": 5712 }, { "epoch": 2.81, "learning_rate": 1.0877352226652203e-08, "loss": 0.0, "step": 5713 }, { "epoch": 2.81, "logps_train/chosen": -80.1966552734375, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -117.6875, "logps_train/rejected": -307.37091064453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7897825241088867, "rewards_train/margins": 17.176020622253418, "rewards_train/rejected": -18.965803146362305, "step": 5713 }, { "epoch": 2.81, "learning_rate": 1.082112432753246e-08, "loss": 0.0, "step": 5714 }, { "epoch": 2.81, "logps_train/chosen": -77.38864135742188, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -335.4337158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.475558876991272, "rewards_train/margins": 19.205803275108337, "rewards_train/rejected": -20.68136215209961, "step": 5714 }, { "epoch": 2.81, "learning_rate": 1.0765040543810333e-08, "loss": 0.0, "step": 5715 }, { "epoch": 2.81, "logps_train/chosen": -77.89295959472656, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -317.5999450683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6002819538116455, "rewards_train/margins": 17.949751615524292, "rewards_train/rejected": -19.550033569335938, "step": 5715 }, { "epoch": 2.81, "learning_rate": 1.0709100892008439e-08, "loss": 0.0, "step": 5716 }, { "epoch": 2.81, "logps_train/chosen": -80.76275634765625, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -330.12933349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4874083995819092, "rewards_train/margins": 18.800965547561646, "rewards_train/rejected": -20.288373947143555, "step": 5716 }, { "epoch": 2.81, "learning_rate": 1.0653305388607092e-08, "loss": 0.0, "step": 5717 }, { "epoch": 2.81, "logps_train/chosen": -77.85853576660156, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -325.68316650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4946668148040771, "rewards_train/margins": 18.577556848526, "rewards_train/rejected": -20.072223663330078, "step": 5717 }, { "epoch": 2.82, "learning_rate": 1.0597654050043981e-08, "loss": 0.0, "step": 5718 }, { "epoch": 2.82, "logps_train/chosen": -81.27716064453125, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -329.6663818359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7084290981292725, "rewards_train/margins": 18.636724710464478, "rewards_train/rejected": -20.34515380859375, "step": 5718 }, { "epoch": 2.82, "learning_rate": 1.0542146892714377e-08, "loss": 0.0, "step": 5719 }, { "epoch": 2.82, "logps_train/chosen": -74.15177917480469, "logps_train/ref_chosen": -60.96875, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -314.9862060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.318522334098816, "rewards_train/margins": 18.255589604377747, "rewards_train/rejected": -19.574111938476562, "step": 5719 }, { "epoch": 2.82, "learning_rate": 1.0486783932970922e-08, "loss": 0.0, "step": 5720 }, { "epoch": 2.82, "logps_train/chosen": -73.087158203125, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -324.6482849121094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0071048736572266, "rewards_train/margins": 18.801671981811523, "rewards_train/rejected": -19.80877685546875, "step": 5720 }, { "epoch": 2.82, "learning_rate": 1.043156518712418e-08, "loss": 0.0, "step": 5721 }, { "epoch": 2.82, "logps_train/chosen": -83.07725524902344, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -322.6229248046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8082866668701172, "rewards_train/margins": 17.80961799621582, "rewards_train/rejected": -19.617904663085938, "step": 5721 }, { "epoch": 2.82, "learning_rate": 1.037649067144175e-08, "loss": 0.0, "step": 5722 }, { "epoch": 2.82, "logps_train/chosen": -76.24820709228516, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -320.8647766113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.151578664779663, "rewards_train/margins": 18.31321930885315, "rewards_train/rejected": -19.464797973632812, "step": 5722 }, { "epoch": 2.82, "learning_rate": 1.0321560402149044e-08, "loss": 0.0001, "step": 5723 }, { "epoch": 2.82, "logps_train/chosen": -83.00298309326172, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -326.5914306640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.939068078994751, "rewards_train/margins": 18.413875341415405, "rewards_train/rejected": -20.352943420410156, "step": 5723 }, { "epoch": 2.82, "learning_rate": 1.0266774395428945e-08, "loss": 0.0, "step": 5724 }, { "epoch": 2.82, "logps_train/chosen": -80.1375732421875, "logps_train/ref_chosen": -62.4375, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -319.68408203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7704225778579712, "rewards_train/margins": 17.951844573020935, "rewards_train/rejected": -19.722267150878906, "step": 5724 }, { "epoch": 2.82, "learning_rate": 1.0212132667421602e-08, "loss": 0.0, "step": 5725 }, { "epoch": 2.82, "logps_train/chosen": -76.71829223632812, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -316.295166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4743685722351074, "rewards_train/margins": 18.087963581085205, "rewards_train/rejected": -19.562332153320312, "step": 5725 }, { "epoch": 2.82, "learning_rate": 1.015763523422497e-08, "loss": 0.0, "step": 5726 }, { "epoch": 2.82, "logps_train/chosen": -81.68544006347656, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -329.6964111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6193981170654297, "rewards_train/margins": 18.906347274780273, "rewards_train/rejected": -20.525745391845703, "step": 5726 }, { "epoch": 2.82, "learning_rate": 1.0103282111894429e-08, "loss": 0.0, "step": 5727 }, { "epoch": 2.82, "logps_train/chosen": -81.88267517089844, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -327.1557312011719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7016950845718384, "rewards_train/margins": 18.490928053855896, "rewards_train/rejected": -20.192623138427734, "step": 5727 }, { "epoch": 2.82, "learning_rate": 1.0049073316442559e-08, "loss": 0.0, "step": 5728 }, { "epoch": 2.82, "logps_train/chosen": -75.07180786132812, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -314.1956481933594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.276468276977539, "rewards_train/margins": 17.741046905517578, "rewards_train/rejected": -19.017515182495117, "step": 5728 }, { "epoch": 2.82, "learning_rate": 9.99500886383986e-09, "loss": 0.0, "step": 5729 }, { "epoch": 2.82, "logps_train/chosen": -82.65087127685547, "logps_train/ref_chosen": -63.84375, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -333.474853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.881493330001831, "rewards_train/margins": 18.819703340530396, "rewards_train/rejected": -20.701196670532227, "step": 5729 }, { "epoch": 2.82, "learning_rate": 9.941088770013928e-09, "loss": 0.0, "step": 5730 }, { "epoch": 2.82, "logps_train/chosen": -85.14039611816406, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -337.1251220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.038306951522827, "rewards_train/margins": 18.74656891822815, "rewards_train/rejected": -20.784875869750977, "step": 5730 }, { "epoch": 2.82, "learning_rate": 9.887313050850055e-09, "loss": 0.0004, "step": 5731 }, { "epoch": 2.82, "logps_train/chosen": -77.15016174316406, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -310.479736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4586443901062012, "rewards_train/margins": 17.58332109451294, "rewards_train/rejected": -19.04196548461914, "step": 5731 }, { "epoch": 2.82, "learning_rate": 9.8336817221909e-09, "loss": 0.0, "step": 5732 }, { "epoch": 2.82, "logps_train/chosen": -71.08487701416016, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -315.5185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.773770809173584, "rewards_train/margins": 18.641905307769775, "rewards_train/rejected": -19.41567611694336, "step": 5732 }, { "epoch": 2.82, "learning_rate": 9.780194799836716e-09, "loss": 0.0, "step": 5733 }, { "epoch": 2.82, "logps_train/chosen": -78.61831665039062, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -330.6159362792969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5659091472625732, "rewards_train/margins": 18.852912187576294, "rewards_train/rejected": -20.418821334838867, "step": 5733 }, { "epoch": 2.82, "learning_rate": 9.726852299544952e-09, "loss": 0.0, "step": 5734 }, { "epoch": 2.82, "logps_train/chosen": -86.2420654296875, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -339.5412902832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.923961877822876, "rewards_train/margins": 19.14168906211853, "rewards_train/rejected": -21.065650939941406, "step": 5734 }, { "epoch": 2.82, "learning_rate": 9.67365423703076e-09, "loss": 0.0, "step": 5735 }, { "epoch": 2.82, "logps_train/chosen": -83.33794403076172, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -122.5625, "logps_train/rejected": -326.3188781738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0407767295837402, "rewards_train/margins": 18.337446689605713, "rewards_train/rejected": -20.378223419189453, "step": 5735 }, { "epoch": 2.82, "learning_rate": 9.620600627966658e-09, "loss": 0.0, "step": 5736 }, { "epoch": 2.82, "logps_train/chosen": -92.11201477050781, "logps_train/ref_chosen": -68.4375, "logps_train/ref_rejected": -135.125, "logps_train/rejected": -349.9935302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.369307518005371, "rewards_train/margins": 19.121893882751465, "rewards_train/rejected": -21.491201400756836, "step": 5736 }, { "epoch": 2.82, "learning_rate": 9.567691487982587e-09, "loss": 0.0, "step": 5737 }, { "epoch": 2.82, "logps_train/chosen": -80.86756896972656, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -326.28564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4850969314575195, "rewards_train/margins": 18.742053031921387, "rewards_train/rejected": -20.227149963378906, "step": 5737 }, { "epoch": 2.83, "learning_rate": 9.51492683266586e-09, "loss": 0.0, "step": 5738 }, { "epoch": 2.83, "logps_train/chosen": -73.92584228515625, "logps_train/ref_chosen": -59.6875, "logps_train/ref_rejected": -118.875, "logps_train/rejected": -308.8111572265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4224183559417725, "rewards_train/margins": 17.56768250465393, "rewards_train/rejected": -18.990100860595703, "step": 5738 }, { "epoch": 2.83, "learning_rate": 9.462306677561483e-09, "loss": 0.0, "step": 5739 }, { "epoch": 2.83, "logps_train/chosen": -78.66911315917969, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -330.5953369140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.338932752609253, "rewards_train/margins": 19.16435170173645, "rewards_train/rejected": -20.503284454345703, "step": 5739 }, { "epoch": 2.83, "learning_rate": 9.4098310381715e-09, "loss": 0.0, "step": 5740 }, { "epoch": 2.83, "logps_train/chosen": -82.86114501953125, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -127.8125, "logps_train/rejected": -335.80621337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.67429780960083, "rewards_train/margins": 19.125272274017334, "rewards_train/rejected": -20.799570083618164, "step": 5740 }, { "epoch": 2.83, "learning_rate": 9.357499929955659e-09, "loss": 0.0, "step": 5741 }, { "epoch": 2.83, "logps_train/chosen": -80.2486572265625, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -321.863525390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4555296897888184, "rewards_train/margins": 18.22940683364868, "rewards_train/rejected": -19.6849365234375, "step": 5741 }, { "epoch": 2.83, "learning_rate": 9.305313368331124e-09, "loss": 0.0, "step": 5742 }, { "epoch": 2.83, "logps_train/chosen": -82.6983642578125, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -337.0849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.713538408279419, "rewards_train/margins": 18.979531049728394, "rewards_train/rejected": -20.693069458007812, "step": 5742 }, { "epoch": 2.83, "learning_rate": 9.253271368672322e-09, "loss": 0.0, "step": 5743 }, { "epoch": 2.83, "logps_train/chosen": -78.35380554199219, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -327.33575439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.573563575744629, "rewards_train/margins": 18.657279014587402, "rewards_train/rejected": -20.23084259033203, "step": 5743 }, { "epoch": 2.83, "learning_rate": 9.201373946311264e-09, "loss": 0.0, "step": 5744 }, { "epoch": 2.83, "logps_train/chosen": -82.00199890136719, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -325.671630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6700730323791504, "rewards_train/margins": 18.28092908859253, "rewards_train/rejected": -19.95100212097168, "step": 5744 }, { "epoch": 2.83, "learning_rate": 9.14962111653722e-09, "loss": 0.0, "step": 5745 }, { "epoch": 2.83, "logps_train/chosen": -78.0320816040039, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -329.1431884765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3727390766143799, "rewards_train/margins": 18.908522367477417, "rewards_train/rejected": -20.281261444091797, "step": 5745 }, { "epoch": 2.83, "learning_rate": 9.098012894596885e-09, "loss": 0.0, "step": 5746 }, { "epoch": 2.83, "logps_train/chosen": -79.80403137207031, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -319.37445068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6758618354797363, "rewards_train/margins": 18.05758047103882, "rewards_train/rejected": -19.733442306518555, "step": 5746 }, { "epoch": 2.83, "learning_rate": 9.046549295694538e-09, "loss": 0.0003, "step": 5747 }, { "epoch": 2.83, "logps_train/chosen": -81.8797607421875, "logps_train/ref_chosen": -62.0, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -319.94171142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9895143508911133, "rewards_train/margins": 17.913399696350098, "rewards_train/rejected": -19.90291404724121, "step": 5747 }, { "epoch": 2.83, "learning_rate": 8.995230334991554e-09, "loss": 0.0, "step": 5748 }, { "epoch": 2.83, "logps_train/chosen": -78.42425537109375, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -117.625, "logps_train/rejected": -310.6693115234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4164975881576538, "rewards_train/margins": 17.890032410621643, "rewards_train/rejected": -19.306529998779297, "step": 5748 }, { "epoch": 2.83, "learning_rate": 8.944056027606895e-09, "loss": 0.0, "step": 5749 }, { "epoch": 2.83, "logps_train/chosen": -84.1755599975586, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -338.36639404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8210231065750122, "rewards_train/margins": 19.254286646842957, "rewards_train/rejected": -21.07530975341797, "step": 5749 }, { "epoch": 2.83, "learning_rate": 8.893026388616831e-09, "loss": 0.0, "step": 5750 }, { "epoch": 2.83, "logps_train/chosen": -81.06686401367188, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -322.55352783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7332980632781982, "rewards_train/margins": 18.0683434009552, "rewards_train/rejected": -19.8016414642334, "step": 5750 }, { "epoch": 2.83, "learning_rate": 8.842141433055118e-09, "loss": 0.0, "step": 5751 }, { "epoch": 2.83, "logps_train/chosen": -80.36540985107422, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -124.125, "logps_train/rejected": -318.08612060546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6429862976074219, "rewards_train/margins": 17.7509765625, "rewards_train/rejected": -19.393962860107422, "step": 5751 }, { "epoch": 2.83, "learning_rate": 8.791401175912705e-09, "loss": 0.0001, "step": 5752 }, { "epoch": 2.83, "logps_train/chosen": -81.99986267089844, "logps_train/ref_chosen": -67.4375, "logps_train/ref_rejected": -130.625, "logps_train/rejected": -337.8345031738281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4561381340026855, "rewards_train/margins": 19.266668796539307, "rewards_train/rejected": -20.722806930541992, "step": 5752 }, { "epoch": 2.83, "learning_rate": 8.740805632138082e-09, "loss": 0.0, "step": 5753 }, { "epoch": 2.83, "logps_train/chosen": -77.72517395019531, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -318.565673828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4050369262695312, "rewards_train/margins": 18.179847717285156, "rewards_train/rejected": -19.584884643554688, "step": 5753 }, { "epoch": 2.83, "learning_rate": 8.690354816637047e-09, "loss": 0.0, "step": 5754 }, { "epoch": 2.83, "logps_train/chosen": -88.13060760498047, "logps_train/ref_chosen": -66.8125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -331.67938232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.130199909210205, "rewards_train/margins": 18.212058544158936, "rewards_train/rejected": -20.34225845336914, "step": 5754 }, { "epoch": 2.83, "learning_rate": 8.64004874427271e-09, "loss": 0.0001, "step": 5755 }, { "epoch": 2.83, "logps_train/chosen": -77.94047546386719, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -322.4981384277344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5320117473602295, "rewards_train/margins": 18.59148335456848, "rewards_train/rejected": -20.12349510192871, "step": 5755 }, { "epoch": 2.83, "learning_rate": 8.58988742986555e-09, "loss": 0.0002, "step": 5756 }, { "epoch": 2.83, "logps_train/chosen": -78.43458557128906, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -313.1708984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4414567947387695, "rewards_train/margins": 17.731005668640137, "rewards_train/rejected": -19.172462463378906, "step": 5756 }, { "epoch": 2.83, "learning_rate": 8.53987088819358e-09, "loss": 0.0, "step": 5757 }, { "epoch": 2.83, "logps_train/chosen": -75.13206481933594, "logps_train/ref_chosen": -63.59375, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -328.16912841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1532459259033203, "rewards_train/margins": 19.1672306060791, "rewards_train/rejected": -20.320476531982422, "step": 5757 }, { "epoch": 2.84, "learning_rate": 8.489999133991788e-09, "loss": 0.0, "step": 5758 }, { "epoch": 2.84, "logps_train/chosen": -87.30381774902344, "logps_train/ref_chosen": -66.9375, "logps_train/ref_rejected": -131.625, "logps_train/rejected": -345.1810302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.035703659057617, "rewards_train/margins": 19.32478141784668, "rewards_train/rejected": -21.360485076904297, "step": 5758 }, { "epoch": 2.84, "learning_rate": 8.440272181952868e-09, "loss": 0.0, "step": 5759 }, { "epoch": 2.84, "logps_train/chosen": -81.61701202392578, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -336.167236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6264960765838623, "rewards_train/margins": 19.39842915534973, "rewards_train/rejected": -21.024925231933594, "step": 5759 }, { "epoch": 2.84, "learning_rate": 8.390690046726767e-09, "loss": 0.0, "step": 5760 }, { "epoch": 2.84, "logps_train/chosen": -79.27433013916016, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -324.2193603515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4976476430892944, "rewards_train/margins": 18.22350561618805, "rewards_train/rejected": -19.721153259277344, "step": 5760 }, { "epoch": 2.84, "learning_rate": 8.341252742920578e-09, "loss": 0.0, "step": 5761 }, { "epoch": 2.84, "logps_train/chosen": -87.34630584716797, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -330.1936340332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.140514612197876, "rewards_train/margins": 18.297698736190796, "rewards_train/rejected": -20.438213348388672, "step": 5761 }, { "epoch": 2.84, "learning_rate": 8.291960285098875e-09, "loss": 0.0, "step": 5762 }, { "epoch": 2.84, "logps_train/chosen": -77.03895568847656, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -316.43365478515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4401265382766724, "rewards_train/margins": 17.975995898246765, "rewards_train/rejected": -19.416122436523438, "step": 5762 }, { "epoch": 2.84, "learning_rate": 8.242812687783707e-09, "loss": 0.0, "step": 5763 }, { "epoch": 2.84, "logps_train/chosen": -76.63966369628906, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -325.2615051269531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1955091953277588, "rewards_train/margins": 18.642656087875366, "rewards_train/rejected": -19.838165283203125, "step": 5763 }, { "epoch": 2.84, "learning_rate": 8.193809965454101e-09, "loss": 0.0, "step": 5764 }, { "epoch": 2.84, "logps_train/chosen": -79.20147705078125, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -328.13641357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3880195617675781, "rewards_train/margins": 18.779430389404297, "rewards_train/rejected": -20.167449951171875, "step": 5764 }, { "epoch": 2.84, "learning_rate": 8.14495213254668e-09, "loss": 0.0, "step": 5765 }, { "epoch": 2.84, "logps_train/chosen": -78.06246948242188, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -327.84234619140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0715789794921875, "rewards_train/margins": 18.86851692199707, "rewards_train/rejected": -19.940095901489258, "step": 5765 }, { "epoch": 2.84, "learning_rate": 8.096239203455312e-09, "loss": 0.0, "step": 5766 }, { "epoch": 2.84, "logps_train/chosen": -84.31886291503906, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -132.5, "logps_train/rejected": -347.5877685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7058604955673218, "rewards_train/margins": 19.798526406288147, "rewards_train/rejected": -21.50438690185547, "step": 5766 }, { "epoch": 2.84, "learning_rate": 8.047671192531136e-09, "loss": 0.0, "step": 5767 }, { "epoch": 2.84, "logps_train/chosen": -84.12703704833984, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -122.8125, "logps_train/rejected": -320.82598876953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8942465782165527, "rewards_train/margins": 17.90773630142212, "rewards_train/rejected": -19.801982879638672, "step": 5767 }, { "epoch": 2.84, "learning_rate": 7.999248114082536e-09, "loss": 0.0, "step": 5768 }, { "epoch": 2.84, "logps_train/chosen": -78.48174285888672, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -331.9393310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4482229948043823, "rewards_train/margins": 19.098739743232727, "rewards_train/rejected": -20.54696273803711, "step": 5768 }, { "epoch": 2.84, "learning_rate": 7.950969982375433e-09, "loss": 0.0, "step": 5769 }, { "epoch": 2.84, "logps_train/chosen": -77.27543640136719, "logps_train/ref_chosen": -61.5625, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -321.97119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5725138187408447, "rewards_train/margins": 18.600536108016968, "rewards_train/rejected": -20.173049926757812, "step": 5769 }, { "epoch": 2.84, "learning_rate": 7.902836811632785e-09, "loss": 0.0, "step": 5770 }, { "epoch": 2.84, "logps_train/chosen": -80.35623168945312, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -326.0997314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4753692150115967, "rewards_train/margins": 18.491441011428833, "rewards_train/rejected": -19.96681022644043, "step": 5770 }, { "epoch": 2.84, "learning_rate": 7.854848616034916e-09, "loss": 0.0, "step": 5771 }, { "epoch": 2.84, "logps_train/chosen": -75.01983642578125, "logps_train/ref_chosen": -60.0625, "logps_train/ref_rejected": -119.125, "logps_train/rejected": -309.1123962402344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4962222576141357, "rewards_train/margins": 17.504956483840942, "rewards_train/rejected": -19.001178741455078, "step": 5771 }, { "epoch": 2.84, "learning_rate": 7.807005409719513e-09, "loss": 0.0001, "step": 5772 }, { "epoch": 2.84, "logps_train/chosen": -81.63324737548828, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -335.4358825683594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7165228128433228, "rewards_train/margins": 19.061047911643982, "rewards_train/rejected": -20.777570724487305, "step": 5772 }, { "epoch": 2.84, "learning_rate": 7.759307206781585e-09, "loss": 0.0, "step": 5773 }, { "epoch": 2.84, "logps_train/chosen": -80.74388885498047, "logps_train/ref_chosen": -62.28125, "logps_train/ref_rejected": -120.5, "logps_train/rejected": -317.4220886230469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8473381996154785, "rewards_train/margins": 17.841989040374756, "rewards_train/rejected": -19.689327239990234, "step": 5773 }, { "epoch": 2.84, "learning_rate": 7.711754021273276e-09, "loss": 0.0, "step": 5774 }, { "epoch": 2.84, "logps_train/chosen": -83.51860046386719, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -330.33734130859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.752396821975708, "rewards_train/margins": 18.725477933883667, "rewards_train/rejected": -20.477874755859375, "step": 5774 }, { "epoch": 2.84, "learning_rate": 7.664345867203992e-09, "loss": 0.0, "step": 5775 }, { "epoch": 2.84, "logps_train/chosen": -78.66381072998047, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -327.025390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.681078553199768, "rewards_train/margins": 18.678927779197693, "rewards_train/rejected": -20.36000633239746, "step": 5775 }, { "epoch": 2.84, "learning_rate": 7.617082758540672e-09, "loss": 0.0, "step": 5776 }, { "epoch": 2.84, "logps_train/chosen": -83.50656127929688, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -125.1875, "logps_train/rejected": -320.71966552734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7976527214050293, "rewards_train/margins": 17.754299640655518, "rewards_train/rejected": -19.551952362060547, "step": 5776 }, { "epoch": 2.84, "learning_rate": 7.569964709207177e-09, "loss": 0.0001, "step": 5777 }, { "epoch": 2.84, "logps_train/chosen": -84.5830078125, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -327.77410888671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7990481853485107, "rewards_train/margins": 18.191256761550903, "rewards_train/rejected": -19.990304946899414, "step": 5777 }, { "epoch": 2.84, "learning_rate": 7.522991733084905e-09, "loss": 0.0, "step": 5778 }, { "epoch": 2.84, "logps_train/chosen": -83.1093978881836, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -332.65081787109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7529807090759277, "rewards_train/margins": 18.872942447662354, "rewards_train/rejected": -20.62592315673828, "step": 5778 }, { "epoch": 2.85, "learning_rate": 7.47616384401245e-09, "loss": 0.0, "step": 5779 }, { "epoch": 2.85, "logps_train/chosen": -80.22381591796875, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -333.6557922363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4816102981567383, "rewards_train/margins": 18.774495124816895, "rewards_train/rejected": -20.256105422973633, "step": 5779 }, { "epoch": 2.85, "learning_rate": 7.429481055785502e-09, "loss": 0.0001, "step": 5780 }, { "epoch": 2.85, "logps_train/chosen": -76.49819946289062, "logps_train/ref_chosen": -60.53125, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -315.36822509765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5969643592834473, "rewards_train/margins": 17.791032314300537, "rewards_train/rejected": -19.387996673583984, "step": 5780 }, { "epoch": 2.85, "learning_rate": 7.38294338215717e-09, "loss": 0.0, "step": 5781 }, { "epoch": 2.85, "logps_train/chosen": -90.01753997802734, "logps_train/ref_chosen": -68.875, "logps_train/ref_rejected": -133.625, "logps_train/rejected": -352.0203857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1118617057800293, "rewards_train/margins": 19.729435443878174, "rewards_train/rejected": -21.841297149658203, "step": 5781 }, { "epoch": 2.85, "learning_rate": 7.336550836837818e-09, "loss": 0.0, "step": 5782 }, { "epoch": 2.85, "logps_train/chosen": -82.66883850097656, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -339.86083984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6496474742889404, "rewards_train/margins": 19.310555696487427, "rewards_train/rejected": -20.960203170776367, "step": 5782 }, { "epoch": 2.85, "learning_rate": 7.290303433494904e-09, "loss": 0.0001, "step": 5783 }, { "epoch": 2.85, "logps_train/chosen": -77.10582733154297, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -320.935791015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3155633211135864, "rewards_train/margins": 18.277088046073914, "rewards_train/rejected": -19.5926513671875, "step": 5783 }, { "epoch": 2.85, "learning_rate": 7.244201185753362e-09, "loss": 0.0001, "step": 5784 }, { "epoch": 2.85, "logps_train/chosen": -84.84870910644531, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -331.1059265136719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9561108350753784, "rewards_train/margins": 18.361905217170715, "rewards_train/rejected": -20.318016052246094, "step": 5784 }, { "epoch": 2.85, "learning_rate": 7.198244107195106e-09, "loss": 0.0, "step": 5785 }, { "epoch": 2.85, "logps_train/chosen": -80.70268249511719, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -337.5079345703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5143108367919922, "rewards_train/margins": 19.225839614868164, "rewards_train/rejected": -20.740150451660156, "step": 5785 }, { "epoch": 2.85, "learning_rate": 7.152432211359472e-09, "loss": 0.0, "step": 5786 }, { "epoch": 2.85, "logps_train/chosen": -79.22560119628906, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -330.8746643066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6411147117614746, "rewards_train/margins": 18.919106006622314, "rewards_train/rejected": -20.56022071838379, "step": 5786 }, { "epoch": 2.85, "learning_rate": 7.106765511742885e-09, "loss": 0.0, "step": 5787 }, { "epoch": 2.85, "logps_train/chosen": -85.57801818847656, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -331.5384521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.907484769821167, "rewards_train/margins": 18.377169847488403, "rewards_train/rejected": -20.28465461730957, "step": 5787 }, { "epoch": 2.85, "learning_rate": 7.06124402179914e-09, "loss": 0.0, "step": 5788 }, { "epoch": 2.85, "logps_train/chosen": -79.47457885742188, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -331.4727783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5857878923416138, "rewards_train/margins": 18.966471552848816, "rewards_train/rejected": -20.55225944519043, "step": 5788 }, { "epoch": 2.85, "learning_rate": 7.01586775493912e-09, "loss": 0.0, "step": 5789 }, { "epoch": 2.85, "logps_train/chosen": -77.58180236816406, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -329.988037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.240015983581543, "rewards_train/margins": 18.949217796325684, "rewards_train/rejected": -20.189233779907227, "step": 5789 }, { "epoch": 2.85, "learning_rate": 6.9706367245310205e-09, "loss": 0.0, "step": 5790 }, { "epoch": 2.85, "logps_train/chosen": -73.5281753540039, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -119.1875, "logps_train/rejected": -310.8992919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1772316694259644, "rewards_train/margins": 17.992823719978333, "rewards_train/rejected": -19.170055389404297, "step": 5790 }, { "epoch": 2.85, "learning_rate": 6.925550943900183e-09, "loss": 0.0, "step": 5791 }, { "epoch": 2.85, "logps_train/chosen": -72.14362335205078, "logps_train/ref_chosen": -61.59375, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -314.09735107421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0560126304626465, "rewards_train/margins": 17.95103693008423, "rewards_train/rejected": -19.007049560546875, "step": 5791 }, { "epoch": 2.85, "learning_rate": 6.880610426329148e-09, "loss": 0.0, "step": 5792 }, { "epoch": 2.85, "logps_train/chosen": -80.31367492675781, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -326.57257080078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5935747623443604, "rewards_train/margins": 18.61387801170349, "rewards_train/rejected": -20.20745277404785, "step": 5792 }, { "epoch": 2.85, "learning_rate": 6.8358151850577695e-09, "loss": 0.0, "step": 5793 }, { "epoch": 2.85, "logps_train/chosen": -80.93415832519531, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -327.1631774902344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4646068811416626, "rewards_train/margins": 18.31636083126068, "rewards_train/rejected": -19.780967712402344, "step": 5793 }, { "epoch": 2.85, "learning_rate": 6.791165233282992e-09, "loss": 0.0, "step": 5794 }, { "epoch": 2.85, "logps_train/chosen": -80.40403747558594, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -319.66278076171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6030993461608887, "rewards_train/margins": 17.854732990264893, "rewards_train/rejected": -19.45783233642578, "step": 5794 }, { "epoch": 2.85, "learning_rate": 6.791165233282992e-09, "loss": 0.0008, "step": 5795 }, { "epoch": 2.85, "logps_train/chosen": -80.29550170898438, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -324.59857177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5588958263397217, "rewards_train/margins": 18.22771954536438, "rewards_train/rejected": -19.7866153717041, "step": 5795 }, { "epoch": 2.85, "learning_rate": 6.746660584158959e-09, "loss": 0.0, "step": 5796 }, { "epoch": 2.85, "logps_train/chosen": -80.02278137207031, "logps_train/ref_chosen": -62.25, "logps_train/ref_rejected": -121.8125, "logps_train/rejected": -318.10565185546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7793285846710205, "rewards_train/margins": 17.851011991500854, "rewards_train/rejected": -19.630340576171875, "step": 5796 }, { "epoch": 2.85, "learning_rate": 6.702301250797127e-09, "loss": 0.0, "step": 5797 }, { "epoch": 2.85, "logps_train/chosen": -82.47608184814453, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -340.4056701660156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7022955417633057, "rewards_train/margins": 19.337833166122437, "rewards_train/rejected": -21.040128707885742, "step": 5797 }, { "epoch": 2.85, "learning_rate": 6.6580872462659864e-09, "loss": 0.0, "step": 5798 }, { "epoch": 2.85, "logps_train/chosen": -85.21686553955078, "logps_train/ref_chosen": -68.5, "logps_train/ref_rejected": -133.25, "logps_train/rejected": -336.52001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6732490062713623, "rewards_train/margins": 18.65531849861145, "rewards_train/rejected": -20.328567504882812, "step": 5798 }, { "epoch": 2.86, "learning_rate": 6.614018583591285e-09, "loss": 0.0, "step": 5799 }, { "epoch": 2.86, "logps_train/chosen": -78.0177001953125, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -323.6026611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4929075241088867, "rewards_train/margins": 18.33161449432373, "rewards_train/rejected": -19.824522018432617, "step": 5799 }, { "epoch": 2.86, "learning_rate": 6.570095275755916e-09, "loss": 0.0, "step": 5800 }, { "epoch": 2.86, "logps_train/chosen": -84.75386810302734, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -318.62945556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9040248394012451, "rewards_train/margins": 17.479230165481567, "rewards_train/rejected": -19.383255004882812, "step": 5800 }, { "epoch": 2.86, "learning_rate": 6.526317335700082e-09, "loss": 0.0, "step": 5801 }, { "epoch": 2.86, "logps_train/chosen": -84.9970474243164, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -324.790283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8891335725784302, "rewards_train/margins": 17.972755312919617, "rewards_train/rejected": -19.861888885498047, "step": 5801 }, { "epoch": 2.86, "learning_rate": 6.482684776321023e-09, "loss": 0.0003, "step": 5802 }, { "epoch": 2.86, "logps_train/chosen": -75.22805786132812, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -322.07281494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3328888416290283, "rewards_train/margins": 18.721123456954956, "rewards_train/rejected": -20.054012298583984, "step": 5802 }, { "epoch": 2.86, "learning_rate": 6.439197610473124e-09, "loss": 0.0, "step": 5803 }, { "epoch": 2.86, "logps_train/chosen": -82.96792602539062, "logps_train/ref_chosen": -67.5, "logps_train/ref_rejected": -135.75, "logps_train/rejected": -350.045166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5485508441925049, "rewards_train/margins": 19.87373995780945, "rewards_train/rejected": -21.422290802001953, "step": 5803 }, { "epoch": 2.86, "learning_rate": 6.395855850968024e-09, "loss": 0.0001, "step": 5804 }, { "epoch": 2.86, "logps_train/chosen": -74.19236755371094, "logps_train/ref_chosen": -60.21875, "logps_train/ref_rejected": -120.1875, "logps_train/rejected": -315.2855224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3961408138275146, "rewards_train/margins": 18.111950159072876, "rewards_train/rejected": -19.50809097290039, "step": 5804 }, { "epoch": 2.86, "learning_rate": 6.352659510574565e-09, "loss": 0.0, "step": 5805 }, { "epoch": 2.86, "logps_train/chosen": -80.31684875488281, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -318.0789489746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5791940689086914, "rewards_train/margins": 17.798672676086426, "rewards_train/rejected": -19.377866744995117, "step": 5805 }, { "epoch": 2.86, "learning_rate": 6.309608602018568e-09, "loss": 0.0, "step": 5806 }, { "epoch": 2.86, "logps_train/chosen": -78.17633819580078, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -127.875, "logps_train/rejected": -330.8037109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4667061567306519, "rewards_train/margins": 18.823432087898254, "rewards_train/rejected": -20.290138244628906, "step": 5806 }, { "epoch": 2.86, "learning_rate": 6.26670313798322e-09, "loss": 0.0, "step": 5807 }, { "epoch": 2.86, "logps_train/chosen": -80.53268432617188, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -122.125, "logps_train/rejected": -323.498779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7893037796020508, "rewards_train/margins": 18.35105609893799, "rewards_train/rejected": -20.14035987854004, "step": 5807 }, { "epoch": 2.86, "learning_rate": 6.223943131108744e-09, "loss": 0.0002, "step": 5808 }, { "epoch": 2.86, "logps_train/chosen": -74.4568862915039, "logps_train/ref_chosen": -61.28125, "logps_train/ref_rejected": -122.5, "logps_train/rejected": -320.04620361328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.317490577697754, "rewards_train/margins": 18.435324668884277, "rewards_train/rejected": -19.75281524658203, "step": 5808 }, { "epoch": 2.86, "learning_rate": 6.181328593992508e-09, "loss": 0.0, "step": 5809 }, { "epoch": 2.86, "logps_train/chosen": -78.43311309814453, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -326.9773864746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3071784973144531, "rewards_train/margins": 18.542709350585938, "rewards_train/rejected": -19.84988784790039, "step": 5809 }, { "epoch": 2.86, "learning_rate": 6.138859539189023e-09, "loss": 0.0, "step": 5810 }, { "epoch": 2.86, "logps_train/chosen": -85.61200714111328, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -340.3851318359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1257030963897705, "rewards_train/margins": 18.9775550365448, "rewards_train/rejected": -21.10325813293457, "step": 5810 }, { "epoch": 2.86, "learning_rate": 6.096535979209893e-09, "loss": 0.0, "step": 5811 }, { "epoch": 2.86, "logps_train/chosen": -81.25163269042969, "logps_train/ref_chosen": -63.71875, "logps_train/ref_rejected": -124.5, "logps_train/rejected": -321.7394714355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7529464960098267, "rewards_train/margins": 17.970804572105408, "rewards_train/rejected": -19.723751068115234, "step": 5811 }, { "epoch": 2.86, "learning_rate": 6.05435792652409e-09, "loss": 0.0, "step": 5812 }, { "epoch": 2.86, "logps_train/chosen": -77.05130004882812, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -326.2587585449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4689979553222656, "rewards_train/margins": 18.6865177154541, "rewards_train/rejected": -20.155515670776367, "step": 5812 }, { "epoch": 2.86, "learning_rate": 6.0123253935575045e-09, "loss": 0.0007, "step": 5813 }, { "epoch": 2.86, "logps_train/chosen": -75.26908874511719, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -323.95068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2446328401565552, "rewards_train/margins": 18.913324236869812, "rewards_train/rejected": -20.157957077026367, "step": 5813 }, { "epoch": 2.86, "learning_rate": 5.970438392693122e-09, "loss": 0.0, "step": 5814 }, { "epoch": 2.86, "logps_train/chosen": -78.04431915283203, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -316.79571533203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4700082540512085, "rewards_train/margins": 17.837982773780823, "rewards_train/rejected": -19.30799102783203, "step": 5814 }, { "epoch": 2.86, "learning_rate": 5.928696936271127e-09, "loss": 0.0002, "step": 5815 }, { "epoch": 2.86, "logps_train/chosen": -77.96002197265625, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.8125, "logps_train/rejected": -331.5679931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.373151183128357, "rewards_train/margins": 19.202398896217346, "rewards_train/rejected": -20.575550079345703, "step": 5815 }, { "epoch": 2.86, "learning_rate": 5.88710103658896e-09, "loss": 0.0, "step": 5816 }, { "epoch": 2.86, "logps_train/chosen": -80.70780944824219, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -332.20599365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5637986660003662, "rewards_train/margins": 18.793230295181274, "rewards_train/rejected": -20.35702896118164, "step": 5816 }, { "epoch": 2.86, "learning_rate": 5.845650705900984e-09, "loss": 0.0, "step": 5817 }, { "epoch": 2.86, "logps_train/chosen": -77.57515716552734, "logps_train/ref_chosen": -61.34375, "logps_train/ref_rejected": -119.75, "logps_train/rejected": -307.403564453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6225546598434448, "rewards_train/margins": 17.14309322834015, "rewards_train/rejected": -18.765647888183594, "step": 5817 }, { "epoch": 2.86, "learning_rate": 5.80434595641871e-09, "loss": 0.0002, "step": 5818 }, { "epoch": 2.86, "logps_train/chosen": -78.32965850830078, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -323.85302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6568429470062256, "rewards_train/margins": 18.439886331558228, "rewards_train/rejected": -20.096729278564453, "step": 5818 }, { "epoch": 2.87, "learning_rate": 5.763186800310849e-09, "loss": 0.0, "step": 5819 }, { "epoch": 2.87, "logps_train/chosen": -79.6610107421875, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -121.875, "logps_train/rejected": -319.6551513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.56097412109375, "rewards_train/margins": 18.215478897094727, "rewards_train/rejected": -19.776453018188477, "step": 5819 }, { "epoch": 2.87, "learning_rate": 5.722173249703144e-09, "loss": 0.0, "step": 5820 }, { "epoch": 2.87, "logps_train/chosen": -79.60250854492188, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -318.1071472167969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6172335147857666, "rewards_train/margins": 17.57263159751892, "rewards_train/rejected": -19.189865112304688, "step": 5820 }, { "epoch": 2.87, "learning_rate": 5.681305316678486e-09, "loss": 0.0, "step": 5821 }, { "epoch": 2.87, "logps_train/chosen": -83.86602783203125, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -326.18328857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.903106927871704, "rewards_train/margins": 18.026792287826538, "rewards_train/rejected": -19.929899215698242, "step": 5821 }, { "epoch": 2.87, "learning_rate": 5.640583013276801e-09, "loss": 0.0, "step": 5822 }, { "epoch": 2.87, "logps_train/chosen": -74.47412109375, "logps_train/ref_chosen": -61.21875, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -324.5451965332031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3249516487121582, "rewards_train/margins": 18.865310192108154, "rewards_train/rejected": -20.190261840820312, "step": 5822 }, { "epoch": 2.87, "learning_rate": 5.6000063514952126e-09, "loss": 0.0, "step": 5823 }, { "epoch": 2.87, "logps_train/chosen": -76.47807312011719, "logps_train/ref_chosen": -60.75, "logps_train/ref_rejected": -121.375, "logps_train/rejected": -316.01226806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5733935832977295, "rewards_train/margins": 17.893460512161255, "rewards_train/rejected": -19.466854095458984, "step": 5823 }, { "epoch": 2.87, "learning_rate": 5.559575343287826e-09, "loss": 0.0, "step": 5824 }, { "epoch": 2.87, "logps_train/chosen": -78.13338470458984, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -324.6867980957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4256190061569214, "rewards_train/margins": 18.64184010028839, "rewards_train/rejected": -20.067459106445312, "step": 5824 }, { "epoch": 2.87, "learning_rate": 5.51929000056589e-09, "loss": 0.0001, "step": 5825 }, { "epoch": 2.87, "logps_train/chosen": -77.14065551757812, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -325.497314453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2940949201583862, "rewards_train/margins": 18.65148651599884, "rewards_train/rejected": -19.945581436157227, "step": 5825 }, { "epoch": 2.87, "learning_rate": 5.479150335197802e-09, "loss": 0.0, "step": 5826 }, { "epoch": 2.87, "logps_train/chosen": -81.80204772949219, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -328.88323974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.501835823059082, "rewards_train/margins": 18.71071147918701, "rewards_train/rejected": -20.212547302246094, "step": 5826 }, { "epoch": 2.87, "learning_rate": 5.439156359008934e-09, "loss": 0.0, "step": 5827 }, { "epoch": 2.87, "logps_train/chosen": -83.15138244628906, "logps_train/ref_chosen": -66.4375, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -318.3680419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6714367866516113, "rewards_train/margins": 17.898520946502686, "rewards_train/rejected": -19.569957733154297, "step": 5827 }, { "epoch": 2.87, "learning_rate": 5.399308083781695e-09, "loss": 0.0, "step": 5828 }, { "epoch": 2.87, "logps_train/chosen": -85.08174133300781, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -122.625, "logps_train/rejected": -318.9531555175781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8112996816635132, "rewards_train/margins": 17.8221994638443, "rewards_train/rejected": -19.633499145507812, "step": 5828 }, { "epoch": 2.87, "learning_rate": 5.359605521255861e-09, "loss": 0.0, "step": 5829 }, { "epoch": 2.87, "logps_train/chosen": -80.33750915527344, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -317.492919921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.724961757659912, "rewards_train/margins": 17.59527826309204, "rewards_train/rejected": -19.320240020751953, "step": 5829 }, { "epoch": 2.87, "learning_rate": 5.320048683127909e-09, "loss": 0.0, "step": 5830 }, { "epoch": 2.87, "logps_train/chosen": -82.25286865234375, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -331.0943298339844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6480900049209595, "rewards_train/margins": 18.802896857261658, "rewards_train/rejected": -20.450986862182617, "step": 5830 }, { "epoch": 2.87, "learning_rate": 5.280637581051517e-09, "loss": 0.0, "step": 5831 }, { "epoch": 2.87, "logps_train/chosen": -78.9178695678711, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -334.0223693847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3610254526138306, "rewards_train/margins": 19.285451292991638, "rewards_train/rejected": -20.64647674560547, "step": 5831 }, { "epoch": 2.87, "learning_rate": 5.241372226637675e-09, "loss": 0.0, "step": 5832 }, { "epoch": 2.87, "logps_train/chosen": -80.92022705078125, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -126.25, "logps_train/rejected": -320.72027587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5179498195648193, "rewards_train/margins": 17.930395364761353, "rewards_train/rejected": -19.448345184326172, "step": 5832 }, { "epoch": 2.87, "learning_rate": 5.202252631454018e-09, "loss": 0.0, "step": 5833 }, { "epoch": 2.87, "logps_train/chosen": -85.84557342529297, "logps_train/ref_chosen": -67.0625, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -337.0804443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.878014087677002, "rewards_train/margins": 18.759326457977295, "rewards_train/rejected": -20.637340545654297, "step": 5833 }, { "epoch": 2.87, "learning_rate": 5.163278807025495e-09, "loss": 0.0, "step": 5834 }, { "epoch": 2.87, "logps_train/chosen": -82.01612091064453, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -126.625, "logps_train/rejected": -328.070556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8463876247406006, "rewards_train/margins": 18.295923471450806, "rewards_train/rejected": -20.142311096191406, "step": 5834 }, { "epoch": 2.87, "learning_rate": 5.1244507648341424e-09, "loss": 0.0, "step": 5835 }, { "epoch": 2.87, "logps_train/chosen": -82.98858642578125, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -330.50390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8002746105194092, "rewards_train/margins": 18.44098401069641, "rewards_train/rejected": -20.24125862121582, "step": 5835 }, { "epoch": 2.87, "learning_rate": 5.085768516318812e-09, "loss": 0.0, "step": 5836 }, { "epoch": 2.87, "logps_train/chosen": -85.70068359375, "logps_train/ref_chosen": -66.0625, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -331.95037841796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9629878997802734, "rewards_train/margins": 18.514371871948242, "rewards_train/rejected": -20.477359771728516, "step": 5836 }, { "epoch": 2.87, "learning_rate": 5.047232072875718e-09, "loss": 0.0, "step": 5837 }, { "epoch": 2.87, "logps_train/chosen": -75.0842056274414, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -123.9375, "logps_train/rejected": -318.8927001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2994849681854248, "rewards_train/margins": 18.197108507156372, "rewards_train/rejected": -19.496593475341797, "step": 5837 }, { "epoch": 2.87, "learning_rate": 5.008841445857836e-09, "loss": 0.0, "step": 5838 }, { "epoch": 2.87, "logps_train/chosen": -76.87702178955078, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -123.4375, "logps_train/rejected": -320.37823486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2648992538452148, "rewards_train/margins": 18.42888355255127, "rewards_train/rejected": -19.693782806396484, "step": 5838 }, { "epoch": 2.87, "learning_rate": 4.970596646575398e-09, "loss": 0.0, "step": 5839 }, { "epoch": 2.87, "logps_train/chosen": -82.08766174316406, "logps_train/ref_chosen": -67.375, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -336.97064208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4715099334716797, "rewards_train/margins": 19.319597244262695, "rewards_train/rejected": -20.791107177734375, "step": 5839 }, { "epoch": 2.88, "learning_rate": 4.932497686295445e-09, "loss": 0.0, "step": 5840 }, { "epoch": 2.88, "logps_train/chosen": -78.06576538085938, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -326.43743896484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3483250141143799, "rewards_train/margins": 18.41739535331726, "rewards_train/rejected": -19.76572036743164, "step": 5840 }, { "epoch": 2.88, "learning_rate": 4.894544576242332e-09, "loss": 0.0, "step": 5841 }, { "epoch": 2.88, "logps_train/chosen": -83.46151733398438, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -336.5252685546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7183678150177002, "rewards_train/margins": 19.347737073898315, "rewards_train/rejected": -21.066104888916016, "step": 5841 }, { "epoch": 2.88, "learning_rate": 4.856737327597227e-09, "loss": 0.0, "step": 5842 }, { "epoch": 2.88, "logps_train/chosen": -81.82389831542969, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -118.375, "logps_train/rejected": -310.05218505859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8375657796859741, "rewards_train/margins": 17.33176100254059, "rewards_train/rejected": -19.169326782226562, "step": 5842 }, { "epoch": 2.88, "learning_rate": 4.819075951498386e-09, "loss": 0.0, "step": 5843 }, { "epoch": 2.88, "logps_train/chosen": -81.56099700927734, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -124.9375, "logps_train/rejected": -332.643310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7410612106323242, "rewards_train/margins": 19.030789375305176, "rewards_train/rejected": -20.7718505859375, "step": 5843 }, { "epoch": 2.88, "learning_rate": 4.781560459041211e-09, "loss": 0.0, "step": 5844 }, { "epoch": 2.88, "logps_train/chosen": -79.51606750488281, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -328.2944641113281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5273878574371338, "rewards_train/margins": 18.57959532737732, "rewards_train/rejected": -20.106983184814453, "step": 5844 }, { "epoch": 2.88, "learning_rate": 4.744190861277864e-09, "loss": 0.0, "step": 5845 }, { "epoch": 2.88, "logps_train/chosen": -81.39727783203125, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -334.4823303222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5872867107391357, "rewards_train/margins": 19.175347566604614, "rewards_train/rejected": -20.76263427734375, "step": 5845 }, { "epoch": 2.88, "learning_rate": 4.706967169217757e-09, "loss": 0.0001, "step": 5846 }, { "epoch": 2.88, "logps_train/chosen": -76.46438598632812, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -325.96710205078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.148343563079834, "rewards_train/margins": 18.85978937149048, "rewards_train/rejected": -20.008132934570312, "step": 5846 }, { "epoch": 2.88, "learning_rate": 4.669889393827286e-09, "loss": 0.0, "step": 5847 }, { "epoch": 2.88, "logps_train/chosen": -83.27394104003906, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -126.125, "logps_train/rejected": -326.31103515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8780772686004639, "rewards_train/margins": 18.140916109085083, "rewards_train/rejected": -20.018993377685547, "step": 5847 }, { "epoch": 2.88, "learning_rate": 4.632957546029714e-09, "loss": 0.0001, "step": 5848 }, { "epoch": 2.88, "logps_train/chosen": -82.27310943603516, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -336.92474365234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6414960622787476, "rewards_train/margins": 19.137158036231995, "rewards_train/rejected": -20.778654098510742, "step": 5848 }, { "epoch": 2.88, "learning_rate": 4.596171636705504e-09, "loss": 0.0, "step": 5849 }, { "epoch": 2.88, "logps_train/chosen": -83.2607650756836, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -320.7562561035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8026386499404907, "rewards_train/margins": 17.805702567100525, "rewards_train/rejected": -19.608341217041016, "step": 5849 }, { "epoch": 2.88, "learning_rate": 4.5595316766919855e-09, "loss": 0.0, "step": 5850 }, { "epoch": 2.88, "logps_train/chosen": -74.64733123779297, "logps_train/ref_chosen": -62.8125, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -329.80584716796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.183922529220581, "rewards_train/margins": 19.232306718826294, "rewards_train/rejected": -20.416229248046875, "step": 5850 }, { "epoch": 2.88, "learning_rate": 4.523037676783581e-09, "loss": 0.0, "step": 5851 }, { "epoch": 2.88, "logps_train/chosen": -90.32295989990234, "logps_train/ref_chosen": -68.4375, "logps_train/ref_rejected": -134.625, "logps_train/rejected": -346.5849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.191866636276245, "rewards_train/margins": 19.003251314163208, "rewards_train/rejected": -21.195117950439453, "step": 5851 }, { "epoch": 2.88, "learning_rate": 4.486689647731578e-09, "loss": 0.0, "step": 5852 }, { "epoch": 2.88, "logps_train/chosen": -71.43510437011719, "logps_train/ref_chosen": -61.75, "logps_train/ref_rejected": -118.25, "logps_train/rejected": -309.0309143066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.967387318611145, "rewards_train/margins": 18.10962951183319, "rewards_train/rejected": -19.077016830444336, "step": 5852 }, { "epoch": 2.88, "learning_rate": 4.450487600244468e-09, "loss": 0.0, "step": 5853 }, { "epoch": 2.88, "logps_train/chosen": -77.21039581298828, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -318.2248229980469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.394086480140686, "rewards_train/margins": 18.406423926353455, "rewards_train/rejected": -19.80051040649414, "step": 5853 }, { "epoch": 2.88, "learning_rate": 4.414431544987551e-09, "loss": 0.0, "step": 5854 }, { "epoch": 2.88, "logps_train/chosen": -82.99314880371094, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -327.54754638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6229966878890991, "rewards_train/margins": 18.366427063941956, "rewards_train/rejected": -19.989423751831055, "step": 5854 }, { "epoch": 2.88, "learning_rate": 4.378521492583165e-09, "loss": 0.0, "step": 5855 }, { "epoch": 2.88, "logps_train/chosen": -84.48190307617188, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -341.12176513671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7600557804107666, "rewards_train/margins": 19.271995306015015, "rewards_train/rejected": -21.03205108642578, "step": 5855 }, { "epoch": 2.88, "learning_rate": 4.342757453610679e-09, "loss": 0.0, "step": 5856 }, { "epoch": 2.88, "logps_train/chosen": -79.0719223022461, "logps_train/ref_chosen": -61.84375, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -316.92498779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7216452360153198, "rewards_train/margins": 17.84756100177765, "rewards_train/rejected": -19.56920623779297, "step": 5856 }, { "epoch": 2.88, "learning_rate": 4.307139438606444e-09, "loss": 0.0, "step": 5857 }, { "epoch": 2.88, "logps_train/chosen": -80.59524536132812, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -331.4820556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4705588817596436, "rewards_train/margins": 19.024279832839966, "rewards_train/rejected": -20.49483871459961, "step": 5857 }, { "epoch": 2.88, "learning_rate": 4.271667458063732e-09, "loss": 0.0, "step": 5858 }, { "epoch": 2.88, "logps_train/chosen": -74.55622100830078, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -321.4540710449219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2034250497817993, "rewards_train/margins": 18.615081191062927, "rewards_train/rejected": -19.818506240844727, "step": 5858 }, { "epoch": 2.88, "learning_rate": 4.236341522432907e-09, "loss": 0.0001, "step": 5859 }, { "epoch": 2.88, "logps_train/chosen": -80.08003234863281, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -130.125, "logps_train/rejected": -336.4578857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3692824840545654, "rewards_train/margins": 19.26508069038391, "rewards_train/rejected": -20.634363174438477, "step": 5859 }, { "epoch": 2.89, "learning_rate": 4.201161642121087e-09, "loss": 0.0, "step": 5860 }, { "epoch": 2.89, "logps_train/chosen": -77.12228393554688, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -127.5625, "logps_train/rejected": -330.5474853515625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.379122257232666, "rewards_train/margins": 18.921281337738037, "rewards_train/rejected": -20.300403594970703, "step": 5860 }, { "epoch": 2.89, "learning_rate": 4.166127827492649e-09, "loss": 0.0, "step": 5861 }, { "epoch": 2.89, "logps_train/chosen": -79.72030639648438, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -323.287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7953708171844482, "rewards_train/margins": 18.073865175247192, "rewards_train/rejected": -19.86923599243164, "step": 5861 }, { "epoch": 2.89, "learning_rate": 4.1312400888686705e-09, "loss": 0.0, "step": 5862 }, { "epoch": 2.89, "logps_train/chosen": -79.83609008789062, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -123.1875, "logps_train/rejected": -323.0596008300781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6383451223373413, "rewards_train/margins": 18.348719239234924, "rewards_train/rejected": -19.987064361572266, "step": 5862 }, { "epoch": 2.89, "learning_rate": 4.0964984365273735e-09, "loss": 0.0, "step": 5863 }, { "epoch": 2.89, "logps_train/chosen": -81.69705200195312, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -331.5889892578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7745875120162964, "rewards_train/margins": 18.634897351264954, "rewards_train/rejected": -20.40948486328125, "step": 5863 }, { "epoch": 2.89, "learning_rate": 4.061902880703849e-09, "loss": 0.0, "step": 5864 }, { "epoch": 2.89, "logps_train/chosen": -82.60517883300781, "logps_train/ref_chosen": -63.9375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -330.350830078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8659379482269287, "rewards_train/margins": 18.538187265396118, "rewards_train/rejected": -20.404125213623047, "step": 5864 }, { "epoch": 2.89, "learning_rate": 4.0274534315902774e-09, "loss": 0.0, "step": 5865 }, { "epoch": 2.89, "logps_train/chosen": -77.33565521240234, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -334.9517822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2007286548614502, "rewards_train/margins": 19.454506635665894, "rewards_train/rejected": -20.655235290527344, "step": 5865 }, { "epoch": 2.89, "learning_rate": 3.993150099335596e-09, "loss": 0.0, "step": 5866 }, { "epoch": 2.89, "logps_train/chosen": -73.64830017089844, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -121.0, "logps_train/rejected": -310.70635986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1075541973114014, "rewards_train/margins": 17.861222505569458, "rewards_train/rejected": -18.96877670288086, "step": 5866 }, { "epoch": 2.89, "learning_rate": 3.958992894045776e-09, "loss": 0.0, "step": 5867 }, { "epoch": 2.89, "logps_train/chosen": -81.97466278076172, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -332.0374755859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7559623718261719, "rewards_train/margins": 18.912090301513672, "rewards_train/rejected": -20.668052673339844, "step": 5867 }, { "epoch": 2.89, "learning_rate": 3.924981825783879e-09, "loss": 0.0, "step": 5868 }, { "epoch": 2.89, "logps_train/chosen": -82.30724334716797, "logps_train/ref_chosen": -68.625, "logps_train/ref_rejected": -130.875, "logps_train/rejected": -337.656982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3701045513153076, "rewards_train/margins": 19.309998750686646, "rewards_train/rejected": -20.680103302001953, "step": 5868 }, { "epoch": 2.89, "learning_rate": 3.8911169045697245e-09, "loss": 0.0, "step": 5869 }, { "epoch": 2.89, "logps_train/chosen": -81.03457641601562, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -333.91729736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4330966472625732, "rewards_train/margins": 19.068007707595825, "rewards_train/rejected": -20.5011043548584, "step": 5869 }, { "epoch": 2.89, "learning_rate": 3.857398140380164e-09, "loss": 0.0, "step": 5870 }, { "epoch": 2.89, "logps_train/chosen": -78.83792877197266, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -325.8405456542969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4195351600646973, "rewards_train/margins": 18.482099056243896, "rewards_train/rejected": -19.901634216308594, "step": 5870 }, { "epoch": 2.89, "learning_rate": 3.823825543148918e-09, "loss": 0.0, "step": 5871 }, { "epoch": 2.89, "logps_train/chosen": -74.88809204101562, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -318.4990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.233926773071289, "rewards_train/margins": 18.414609909057617, "rewards_train/rejected": -19.648536682128906, "step": 5871 }, { "epoch": 2.89, "learning_rate": 3.790399122766797e-09, "loss": 0.0, "step": 5872 }, { "epoch": 2.89, "logps_train/chosen": -77.80313873291016, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -331.7412414550781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4745763540267944, "rewards_train/margins": 18.950621247291565, "rewards_train/rejected": -20.42519760131836, "step": 5872 }, { "epoch": 2.89, "learning_rate": 3.757118889081367e-09, "loss": 0.0, "step": 5873 }, { "epoch": 2.89, "logps_train/chosen": -78.28250122070312, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -120.8125, "logps_train/rejected": -313.9034423828125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5327427387237549, "rewards_train/margins": 17.776110410690308, "rewards_train/rejected": -19.308853149414062, "step": 5873 }, { "epoch": 2.89, "learning_rate": 3.7239848518972327e-09, "loss": 0.0, "step": 5874 }, { "epoch": 2.89, "logps_train/chosen": -85.06681823730469, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -326.4169006347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1026291847229004, "rewards_train/margins": 18.01659917831421, "rewards_train/rejected": -20.11922836303711, "step": 5874 }, { "epoch": 2.89, "learning_rate": 3.6909970209759723e-09, "loss": 0.0, "step": 5875 }, { "epoch": 2.89, "logps_train/chosen": -78.74991607666016, "logps_train/ref_chosen": -63.25, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -333.03533935546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5503822565078735, "rewards_train/margins": 19.199833273887634, "rewards_train/rejected": -20.750215530395508, "step": 5875 }, { "epoch": 2.89, "learning_rate": 3.658155406035923e-09, "loss": 0.0, "step": 5876 }, { "epoch": 2.89, "logps_train/chosen": -83.63685607910156, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -333.54638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8479136228561401, "rewards_train/margins": 18.665708661079407, "rewards_train/rejected": -20.513622283935547, "step": 5876 }, { "epoch": 2.89, "learning_rate": 3.6254600167524576e-09, "loss": 0.0, "step": 5877 }, { "epoch": 2.89, "logps_train/chosen": -77.50163269042969, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -316.5719909667969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.418766736984253, "rewards_train/margins": 18.116118669509888, "rewards_train/rejected": -19.53488540649414, "step": 5877 }, { "epoch": 2.89, "learning_rate": 3.592910862757981e-09, "loss": 0.0, "step": 5878 }, { "epoch": 2.89, "logps_train/chosen": -79.28997802734375, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -327.3621826171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5392029285430908, "rewards_train/margins": 18.57929301261902, "rewards_train/rejected": -20.11849594116211, "step": 5878 }, { "epoch": 2.89, "learning_rate": 3.5605079536414897e-09, "loss": 0.0, "step": 5879 }, { "epoch": 2.89, "logps_train/chosen": -81.77682495117188, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -336.33966064453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7429168224334717, "rewards_train/margins": 19.006869554519653, "rewards_train/rejected": -20.749786376953125, "step": 5879 }, { "epoch": 2.9, "learning_rate": 3.5282512989492363e-09, "loss": 0.0, "step": 5880 }, { "epoch": 2.9, "logps_train/chosen": -74.41625213623047, "logps_train/ref_chosen": -62.3125, "logps_train/ref_rejected": -118.8125, "logps_train/rejected": -305.03790283203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2098623514175415, "rewards_train/margins": 17.414042830467224, "rewards_train/rejected": -18.623905181884766, "step": 5880 }, { "epoch": 2.9, "learning_rate": 3.4961409081842864e-09, "loss": 0.0, "step": 5881 }, { "epoch": 2.9, "logps_train/chosen": -77.57178497314453, "logps_train/ref_chosen": -62.09375, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -323.8742370605469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5483895540237427, "rewards_train/margins": 18.393430590629578, "rewards_train/rejected": -19.94182014465332, "step": 5881 }, { "epoch": 2.9, "learning_rate": 3.4641767908064613e-09, "loss": 0.0, "step": 5882 }, { "epoch": 2.9, "logps_train/chosen": -79.9609146118164, "logps_train/ref_chosen": -65.1875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -326.120849609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4803686141967773, "rewards_train/margins": 18.569804191589355, "rewards_train/rejected": -20.050172805786133, "step": 5882 }, { "epoch": 2.9, "learning_rate": 3.4323589562326727e-09, "loss": 0.0, "step": 5883 }, { "epoch": 2.9, "logps_train/chosen": -83.15325927734375, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -323.29754638671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6396428346633911, "rewards_train/margins": 18.046117663383484, "rewards_train/rejected": -19.685760498046875, "step": 5883 }, { "epoch": 2.9, "learning_rate": 3.400687413836645e-09, "loss": 0.0, "step": 5884 }, { "epoch": 2.9, "logps_train/chosen": -83.54065704345703, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -334.5238342285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5962047576904297, "rewards_train/margins": 19.047584533691406, "rewards_train/rejected": -20.643789291381836, "step": 5884 }, { "epoch": 2.9, "learning_rate": 3.369162172949025e-09, "loss": 0.0, "step": 5885 }, { "epoch": 2.9, "logps_train/chosen": -78.98509216308594, "logps_train/ref_chosen": -63.75, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -319.5231018066406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5220932960510254, "rewards_train/margins": 18.08109426498413, "rewards_train/rejected": -19.603187561035156, "step": 5885 }, { "epoch": 2.9, "learning_rate": 3.337783242857439e-09, "loss": 0.0001, "step": 5886 }, { "epoch": 2.9, "logps_train/chosen": -79.19593048095703, "logps_train/ref_chosen": -63.21875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -330.4964599609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5984995365142822, "rewards_train/margins": 18.94645857810974, "rewards_train/rejected": -20.544958114624023, "step": 5886 }, { "epoch": 2.9, "learning_rate": 3.3065506328062152e-09, "loss": 0.0, "step": 5887 }, { "epoch": 2.9, "logps_train/chosen": -79.26337432861328, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -317.5183410644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4969921112060547, "rewards_train/margins": 17.942584991455078, "rewards_train/rejected": -19.439577102661133, "step": 5887 }, { "epoch": 2.9, "learning_rate": 3.2754643519967705e-09, "loss": 0.0, "step": 5888 }, { "epoch": 2.9, "logps_train/chosen": -76.63343811035156, "logps_train/ref_chosen": -62.625, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -323.160400390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.402064561843872, "rewards_train/margins": 18.350403547286987, "rewards_train/rejected": -19.75246810913086, "step": 5888 }, { "epoch": 2.9, "learning_rate": 3.2445244095872793e-09, "loss": 0.0, "step": 5889 }, { "epoch": 2.9, "logps_train/chosen": -80.80196380615234, "logps_train/ref_chosen": -66.5625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -326.4886474609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421749234199524, "rewards_train/margins": 18.35192096233368, "rewards_train/rejected": -19.773670196533203, "step": 5889 }, { "epoch": 2.9, "learning_rate": 3.213730814692894e-09, "loss": 0.0, "step": 5890 }, { "epoch": 2.9, "logps_train/chosen": -78.26493835449219, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -333.8368225097656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4050092697143555, "rewards_train/margins": 19.115099906921387, "rewards_train/rejected": -20.520109176635742, "step": 5890 }, { "epoch": 2.9, "learning_rate": 3.1830835763856368e-09, "loss": 0.0, "step": 5891 }, { "epoch": 2.9, "logps_train/chosen": -79.85307312011719, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -122.75, "logps_train/rejected": -317.791748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.65471613407135, "rewards_train/margins": 17.847065329551697, "rewards_train/rejected": -19.501781463623047, "step": 5891 }, { "epoch": 2.9, "learning_rate": 3.15258270369434e-09, "loss": 0.0, "step": 5892 }, { "epoch": 2.9, "logps_train/chosen": -79.16521453857422, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -325.0771179199219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5135431289672852, "rewards_train/margins": 18.785624504089355, "rewards_train/rejected": -20.29916763305664, "step": 5892 }, { "epoch": 2.9, "learning_rate": 3.12222820560476e-09, "loss": 0.0, "step": 5893 }, { "epoch": 2.9, "logps_train/chosen": -79.98600769042969, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -120.9375, "logps_train/rejected": -314.49041748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.730876088142395, "rewards_train/margins": 17.625149607658386, "rewards_train/rejected": -19.35602569580078, "step": 5893 }, { "epoch": 2.9, "learning_rate": 3.0920200910595774e-09, "loss": 0.0, "step": 5894 }, { "epoch": 2.9, "logps_train/chosen": -77.27078247070312, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -125.125, "logps_train/rejected": -320.1714172363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2170686721801758, "rewards_train/margins": 18.28859806060791, "rewards_train/rejected": -19.505666732788086, "step": 5894 }, { "epoch": 2.9, "learning_rate": 3.061958368958284e-09, "loss": 0.0, "step": 5895 }, { "epoch": 2.9, "logps_train/chosen": -79.91661071777344, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -313.286865234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6381456851959229, "rewards_train/margins": 17.543372869491577, "rewards_train/rejected": -19.1815185546875, "step": 5895 }, { "epoch": 2.9, "learning_rate": 3.0320430481573513e-09, "loss": 0.0, "step": 5896 }, { "epoch": 2.9, "logps_train/chosen": -74.269287109375, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -318.1324462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0894780158996582, "rewards_train/margins": 18.34510564804077, "rewards_train/rejected": -19.43458366394043, "step": 5896 }, { "epoch": 2.9, "learning_rate": 3.0022741374698403e-09, "loss": 0.0, "step": 5897 }, { "epoch": 2.9, "logps_train/chosen": -77.10126495361328, "logps_train/ref_chosen": -63.5625, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -326.173095703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3555853366851807, "rewards_train/margins": 18.463433980941772, "rewards_train/rejected": -19.819019317626953, "step": 5897 }, { "epoch": 2.9, "learning_rate": 2.972651645666069e-09, "loss": 0.0001, "step": 5898 }, { "epoch": 2.9, "logps_train/chosen": -82.66384887695312, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -337.09442138671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.855154275894165, "rewards_train/margins": 18.92479681968689, "rewards_train/rejected": -20.779951095581055, "step": 5898 }, { "epoch": 2.9, "learning_rate": 2.943175581472945e-09, "loss": 0.0, "step": 5899 }, { "epoch": 2.9, "logps_train/chosen": -83.45281219482422, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.125, "logps_train/rejected": -330.364990234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7460623979568481, "rewards_train/margins": 18.374714732170105, "rewards_train/rejected": -20.120777130126953, "step": 5899 }, { "epoch": 2.9, "learning_rate": 2.9138459535742455e-09, "loss": 0.0, "step": 5900 }, { "epoch": 2.9, "logps_train/chosen": -82.86402893066406, "logps_train/ref_chosen": -68.6875, "logps_train/ref_rejected": -130.25, "logps_train/rejected": -343.8349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4169204235076904, "rewards_train/margins": 19.94108510017395, "rewards_train/rejected": -21.35800552368164, "step": 5900 }, { "epoch": 2.91, "learning_rate": 2.8846627706108352e-09, "loss": 0.0, "step": 5901 }, { "epoch": 2.91, "logps_train/chosen": -79.54641723632812, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -128.125, "logps_train/rejected": -331.2401123046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3808135986328125, "rewards_train/margins": 18.932945251464844, "rewards_train/rejected": -20.313758850097656, "step": 5901 }, { "epoch": 2.91, "learning_rate": 2.8556260411801147e-09, "loss": 0.0, "step": 5902 }, { "epoch": 2.91, "logps_train/chosen": -73.95523071289062, "logps_train/ref_chosen": -63.34375, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -314.1206359863281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0606114864349365, "rewards_train/margins": 18.025964975357056, "rewards_train/rejected": -19.086576461791992, "step": 5902 }, { "epoch": 2.91, "learning_rate": 2.8267357738366304e-09, "loss": 0.0, "step": 5903 }, { "epoch": 2.91, "logps_train/chosen": -80.62384033203125, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -131.375, "logps_train/rejected": -343.8252868652344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5963687896728516, "rewards_train/margins": 19.650907516479492, "rewards_train/rejected": -21.247276306152344, "step": 5903 }, { "epoch": 2.91, "learning_rate": 2.7979919770915737e-09, "loss": 0.0, "step": 5904 }, { "epoch": 2.91, "logps_train/chosen": -76.48619079589844, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -329.73193359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2073098421096802, "rewards_train/margins": 18.919395327568054, "rewards_train/rejected": -20.126705169677734, "step": 5904 }, { "epoch": 2.91, "learning_rate": 2.76939465941306e-09, "loss": 0.0, "step": 5905 }, { "epoch": 2.91, "logps_train/chosen": -81.90510559082031, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -131.875, "logps_train/rejected": -338.63092041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.66165292263031, "rewards_train/margins": 19.01564848423004, "rewards_train/rejected": -20.67730140686035, "step": 5905 }, { "epoch": 2.91, "learning_rate": 2.7409438292261276e-09, "loss": 0.0, "step": 5906 }, { "epoch": 2.91, "logps_train/chosen": -78.13264465332031, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -339.17950439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.269319772720337, "rewards_train/margins": 19.709470510482788, "rewards_train/rejected": -20.978790283203125, "step": 5906 }, { "epoch": 2.91, "learning_rate": 2.712639494912461e-09, "loss": 0.0, "step": 5907 }, { "epoch": 2.91, "logps_train/chosen": -84.45034790039062, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -330.79010009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8944493532180786, "rewards_train/margins": 18.452821612358093, "rewards_train/rejected": -20.347270965576172, "step": 5907 }, { "epoch": 2.91, "learning_rate": 2.684481664810834e-09, "loss": 0.0001, "step": 5908 }, { "epoch": 2.91, "logps_train/chosen": -85.18537902832031, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -128.625, "logps_train/rejected": -335.168212890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7961742877960205, "rewards_train/margins": 18.853066205978394, "rewards_train/rejected": -20.649240493774414, "step": 5908 }, { "epoch": 2.91, "learning_rate": 2.656470347216666e-09, "loss": 0.0, "step": 5909 }, { "epoch": 2.91, "logps_train/chosen": -78.82682037353516, "logps_train/ref_chosen": -63.625, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -324.09063720703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.520572304725647, "rewards_train/margins": 18.282047629356384, "rewards_train/rejected": -19.80261993408203, "step": 5909 }, { "epoch": 2.91, "learning_rate": 2.628605550382246e-09, "loss": 0.0001, "step": 5910 }, { "epoch": 2.91, "logps_train/chosen": -82.44867706298828, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -321.8436279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.745063066482544, "rewards_train/margins": 17.859366178512573, "rewards_train/rejected": -19.604429244995117, "step": 5910 }, { "epoch": 2.91, "learning_rate": 2.6008872825168393e-09, "loss": 0.0, "step": 5911 }, { "epoch": 2.91, "logps_train/chosen": -79.63006591796875, "logps_train/ref_chosen": -62.53125, "logps_train/ref_rejected": -121.25, "logps_train/rejected": -312.5679931640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7095398902893066, "rewards_train/margins": 17.424066066741943, "rewards_train/rejected": -19.13360595703125, "step": 5911 }, { "epoch": 2.91, "learning_rate": 2.5733155517863038e-09, "loss": 0.0, "step": 5912 }, { "epoch": 2.91, "logps_train/chosen": -79.68511962890625, "logps_train/ref_chosen": -62.1875, "logps_train/ref_rejected": -119.25, "logps_train/rejected": -314.83624267578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7490782737731934, "rewards_train/margins": 17.807154178619385, "rewards_train/rejected": -19.556232452392578, "step": 5912 }, { "epoch": 2.91, "learning_rate": 2.54589036631353e-09, "loss": 0.0, "step": 5913 }, { "epoch": 2.91, "logps_train/chosen": -83.96385192871094, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -330.8873596191406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.865476369857788, "rewards_train/margins": 18.521942377090454, "rewards_train/rejected": -20.387418746948242, "step": 5913 }, { "epoch": 2.91, "learning_rate": 2.518611734178222e-09, "loss": 0.0, "step": 5914 }, { "epoch": 2.91, "logps_train/chosen": -79.69449615478516, "logps_train/ref_chosen": -61.9375, "logps_train/ref_rejected": -120.0625, "logps_train/rejected": -313.63641357421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.776236653327942, "rewards_train/margins": 17.582083582878113, "rewards_train/rejected": -19.358320236206055, "step": 5914 }, { "epoch": 2.91, "learning_rate": 2.4914796634166736e-09, "loss": 0.0, "step": 5915 }, { "epoch": 2.91, "logps_train/chosen": -82.12863159179688, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -323.09564208984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8997290134429932, "rewards_train/margins": 17.93083357810974, "rewards_train/rejected": -19.830562591552734, "step": 5915 }, { "epoch": 2.91, "learning_rate": 2.4644941620223792e-09, "loss": 0.0, "step": 5916 }, { "epoch": 2.91, "logps_train/chosen": -80.6762924194336, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -331.03399658203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7288591861724854, "rewards_train/margins": 18.814971685409546, "rewards_train/rejected": -20.54383087158203, "step": 5916 }, { "epoch": 2.91, "learning_rate": 2.437655237945313e-09, "loss": 0.0, "step": 5917 }, { "epoch": 2.91, "logps_train/chosen": -81.12313842773438, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -339.672119140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6411707401275635, "rewards_train/margins": 19.455730199813843, "rewards_train/rejected": -21.096900939941406, "step": 5917 }, { "epoch": 2.91, "learning_rate": 2.4109628990924834e-09, "loss": 0.0, "step": 5918 }, { "epoch": 2.91, "logps_train/chosen": -76.5551528930664, "logps_train/ref_chosen": -61.09375, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -322.9515686035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5468723773956299, "rewards_train/margins": 18.119477033615112, "rewards_train/rejected": -19.666349411010742, "step": 5918 }, { "epoch": 2.91, "learning_rate": 2.3844171533275448e-09, "loss": 0.0, "step": 5919 }, { "epoch": 2.91, "logps_train/chosen": -84.36805725097656, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -321.7840576171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7691293954849243, "rewards_train/margins": 17.928759932518005, "rewards_train/rejected": -19.69788932800293, "step": 5919 }, { "epoch": 2.91, "learning_rate": 2.35801800847113e-09, "loss": 0.0, "step": 5920 }, { "epoch": 2.91, "logps_train/chosen": -83.19033813476562, "logps_train/ref_chosen": -63.78125, "logps_train/ref_rejected": -127.1875, "logps_train/rejected": -338.7186279296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9404208660125732, "rewards_train/margins": 19.211812257766724, "rewards_train/rejected": -21.152233123779297, "step": 5920 }, { "epoch": 2.92, "learning_rate": 2.3317654723006287e-09, "loss": 0.0, "step": 5921 }, { "epoch": 2.92, "logps_train/chosen": -81.58746337890625, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -125.375, "logps_train/rejected": -328.2508239746094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.557232141494751, "rewards_train/margins": 18.73162055015564, "rewards_train/rejected": -20.28885269165039, "step": 5921 }, { "epoch": 2.92, "learning_rate": 2.3056595525500766e-09, "loss": 0.0, "step": 5922 }, { "epoch": 2.92, "logps_train/chosen": -76.7437744140625, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -125.4375, "logps_train/rejected": -327.16729736328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3886845111846924, "rewards_train/margins": 18.786736249923706, "rewards_train/rejected": -20.1754207611084, "step": 5922 }, { "epoch": 2.92, "learning_rate": 2.2797002569105993e-09, "loss": 0.0, "step": 5923 }, { "epoch": 2.92, "logps_train/chosen": -76.81085968017578, "logps_train/ref_chosen": -63.09375, "logps_train/ref_rejected": -120.625, "logps_train/rejected": -314.05621337890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3709295988082886, "rewards_train/margins": 17.971607327461243, "rewards_train/rejected": -19.34253692626953, "step": 5923 }, { "epoch": 2.92, "learning_rate": 2.2538875930299127e-09, "loss": 0.0, "step": 5924 }, { "epoch": 2.92, "logps_train/chosen": -77.8779067993164, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -320.83685302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3789530992507935, "rewards_train/margins": 18.20658767223358, "rewards_train/rejected": -19.585540771484375, "step": 5924 }, { "epoch": 2.92, "learning_rate": 2.2282215685126004e-09, "loss": 0.0, "step": 5925 }, { "epoch": 2.92, "logps_train/chosen": -82.01554870605469, "logps_train/ref_chosen": -68.375, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -333.3455505371094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.361125111579895, "rewards_train/margins": 18.877434611320496, "rewards_train/rejected": -20.23855972290039, "step": 5925 }, { "epoch": 2.92, "learning_rate": 2.2027021909200584e-09, "loss": 0.0, "step": 5926 }, { "epoch": 2.92, "logps_train/chosen": -83.82134246826172, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -336.33795166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8560600280761719, "rewards_train/margins": 18.906003952026367, "rewards_train/rejected": -20.76206398010254, "step": 5926 }, { "epoch": 2.92, "learning_rate": 2.1773294677704946e-09, "loss": 0.0, "step": 5927 }, { "epoch": 2.92, "logps_train/chosen": -78.60692596435547, "logps_train/ref_chosen": -61.78125, "logps_train/ref_rejected": -119.875, "logps_train/rejected": -315.17938232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6808583736419678, "rewards_train/margins": 17.84665322303772, "rewards_train/rejected": -19.527511596679688, "step": 5927 }, { "epoch": 2.92, "learning_rate": 2.1521034065388188e-09, "loss": 0.0002, "step": 5928 }, { "epoch": 2.92, "logps_train/chosen": -73.37638092041016, "logps_train/ref_chosen": -62.90625, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -327.0953063964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0481362342834473, "rewards_train/margins": 19.072137355804443, "rewards_train/rejected": -20.12027359008789, "step": 5928 }, { "epoch": 2.92, "learning_rate": 2.127024014656864e-09, "loss": 0.0, "step": 5929 }, { "epoch": 2.92, "logps_train/chosen": -74.21735382080078, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -336.9530029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8848699331283569, "rewards_train/margins": 19.605597138404846, "rewards_train/rejected": -20.490467071533203, "step": 5929 }, { "epoch": 2.92, "learning_rate": 2.1020912995131646e-09, "loss": 0.0, "step": 5930 }, { "epoch": 2.92, "logps_train/chosen": -80.49932861328125, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -131.125, "logps_train/rejected": -338.7930603027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3169246912002563, "rewards_train/margins": 19.447929739952087, "rewards_train/rejected": -20.764854431152344, "step": 5930 }, { "epoch": 2.92, "learning_rate": 2.0773052684530667e-09, "loss": 0.0, "step": 5931 }, { "epoch": 2.92, "logps_train/chosen": -78.74654388427734, "logps_train/ref_chosen": -63.1875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -312.36053466796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5568807125091553, "rewards_train/margins": 17.205150842666626, "rewards_train/rejected": -18.76203155517578, "step": 5931 }, { "epoch": 2.92, "learning_rate": 2.052665928778674e-09, "loss": 0.0002, "step": 5932 }, { "epoch": 2.92, "logps_train/chosen": -82.35348510742188, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -327.19012451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.570943832397461, "rewards_train/margins": 18.257638931274414, "rewards_train/rejected": -19.828582763671875, "step": 5932 }, { "epoch": 2.92, "learning_rate": 2.028173287748958e-09, "loss": 0.0, "step": 5933 }, { "epoch": 2.92, "logps_train/chosen": -78.88482666015625, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -127.6875, "logps_train/rejected": -329.93170166015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5511293411254883, "rewards_train/margins": 18.670506477355957, "rewards_train/rejected": -20.221635818481445, "step": 5933 }, { "epoch": 2.92, "learning_rate": 2.00382735257959e-09, "loss": 0.0001, "step": 5934 }, { "epoch": 2.92, "logps_train/chosen": -80.23074340820312, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -124.4375, "logps_train/rejected": -320.8398742675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.80198073387146, "rewards_train/margins": 17.842260599136353, "rewards_train/rejected": -19.644241333007812, "step": 5934 }, { "epoch": 2.92, "learning_rate": 1.9796281304430564e-09, "loss": 0.0, "step": 5935 }, { "epoch": 2.92, "logps_train/chosen": -85.58164978027344, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -330.76885986328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9839463233947754, "rewards_train/margins": 18.63542127609253, "rewards_train/rejected": -20.619367599487305, "step": 5935 }, { "epoch": 2.92, "learning_rate": 1.9555756284685423e-09, "loss": 0.0, "step": 5936 }, { "epoch": 2.92, "logps_train/chosen": -77.80804443359375, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -127.3125, "logps_train/rejected": -333.1814270019531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1463804244995117, "rewards_train/margins": 19.44178295135498, "rewards_train/rejected": -20.588163375854492, "step": 5936 }, { "epoch": 2.92, "learning_rate": 1.931669853742157e-09, "loss": 0.0, "step": 5937 }, { "epoch": 2.92, "logps_train/chosen": -77.54585266113281, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -123.8125, "logps_train/rejected": -322.54522705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2499959468841553, "rewards_train/margins": 18.62234902381897, "rewards_train/rejected": -19.872344970703125, "step": 5937 }, { "epoch": 2.92, "learning_rate": 1.9079108133067124e-09, "loss": 0.0001, "step": 5938 }, { "epoch": 2.92, "logps_train/chosen": -83.35345458984375, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -324.9813232421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.910003662109375, "rewards_train/margins": 18.029144287109375, "rewards_train/rejected": -19.93914794921875, "step": 5938 }, { "epoch": 2.92, "learning_rate": 1.8842985141617198e-09, "loss": 0.0001, "step": 5939 }, { "epoch": 2.92, "logps_train/chosen": -80.2227783203125, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -129.375, "logps_train/rejected": -330.1981506347656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3335082530975342, "rewards_train/margins": 18.750075578689575, "rewards_train/rejected": -20.08358383178711, "step": 5939 }, { "epoch": 2.92, "learning_rate": 1.8608329632635588e-09, "loss": 0.0, "step": 5940 }, { "epoch": 2.92, "logps_train/chosen": -84.93142700195312, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -340.65350341796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.735134482383728, "rewards_train/margins": 19.408342003822327, "rewards_train/rejected": -21.143476486206055, "step": 5940 }, { "epoch": 2.93, "learning_rate": 1.8375141675253113e-09, "loss": 0.0, "step": 5941 }, { "epoch": 2.93, "logps_train/chosen": -85.14234161376953, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.5, "logps_train/rejected": -327.3832092285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8510503768920898, "rewards_train/margins": 18.043715476989746, "rewards_train/rejected": -19.894765853881836, "step": 5941 }, { "epoch": 2.93, "learning_rate": 1.8143421338168708e-09, "loss": 0.0, "step": 5942 }, { "epoch": 2.93, "logps_train/chosen": -72.96675109863281, "logps_train/ref_chosen": -60.28125, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -320.6870422363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2688919305801392, "rewards_train/margins": 18.68227994441986, "rewards_train/rejected": -19.951171875, "step": 5942 }, { "epoch": 2.93, "learning_rate": 1.7913168689648872e-09, "loss": 0.0, "step": 5943 }, { "epoch": 2.93, "logps_train/chosen": -83.4224624633789, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -334.4630126953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7587013244628906, "rewards_train/margins": 18.98379135131836, "rewards_train/rejected": -20.74249267578125, "step": 5943 }, { "epoch": 2.93, "learning_rate": 1.768438379752768e-09, "loss": 0.0, "step": 5944 }, { "epoch": 2.93, "logps_train/chosen": -87.26014709472656, "logps_train/ref_chosen": -67.6875, "logps_train/ref_rejected": -130.5, "logps_train/rejected": -340.20977783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9596084356307983, "rewards_train/margins": 19.012542366981506, "rewards_train/rejected": -20.972150802612305, "step": 5944 }, { "epoch": 2.93, "learning_rate": 1.7457066729206772e-09, "loss": 0.0, "step": 5945 }, { "epoch": 2.93, "logps_train/chosen": -77.70866394042969, "logps_train/ref_chosen": -63.6875, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -323.9031982421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.402165174484253, "rewards_train/margins": 18.50724720954895, "rewards_train/rejected": -19.909412384033203, "step": 5945 }, { "epoch": 2.93, "learning_rate": 1.7231217551654243e-09, "loss": 0.0001, "step": 5946 }, { "epoch": 2.93, "logps_train/chosen": -80.61839294433594, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -337.0179443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6372301578521729, "rewards_train/margins": 19.142688512802124, "rewards_train/rejected": -20.779918670654297, "step": 5946 }, { "epoch": 2.93, "learning_rate": 1.7006836331407982e-09, "loss": 0.0, "step": 5947 }, { "epoch": 2.93, "logps_train/chosen": -77.56013488769531, "logps_train/ref_chosen": -63.375, "logps_train/ref_rejected": -122.0625, "logps_train/rejected": -318.95269775390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4171948432922363, "rewards_train/margins": 18.272700786590576, "rewards_train/rejected": -19.689895629882812, "step": 5947 }, { "epoch": 2.93, "learning_rate": 1.6783923134571776e-09, "loss": 0.0, "step": 5948 }, { "epoch": 2.93, "logps_train/chosen": -81.42021179199219, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -341.3526611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5733692646026611, "rewards_train/margins": 19.67498278617859, "rewards_train/rejected": -21.24835205078125, "step": 5948 }, { "epoch": 2.93, "learning_rate": 1.6562478026816983e-09, "loss": 0.0, "step": 5949 }, { "epoch": 2.93, "logps_train/chosen": -79.55813598632812, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -342.8598327636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3547143936157227, "rewards_train/margins": 19.83497905731201, "rewards_train/rejected": -21.189693450927734, "step": 5949 }, { "epoch": 2.93, "learning_rate": 1.634250107338364e-09, "loss": 0.0, "step": 5950 }, { "epoch": 2.93, "logps_train/chosen": -79.76383972167969, "logps_train/ref_chosen": -65.75, "logps_train/ref_rejected": -130.0, "logps_train/rejected": -336.30291748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4007494449615479, "rewards_train/margins": 19.233596563339233, "rewards_train/rejected": -20.63434600830078, "step": 5950 }, { "epoch": 2.93, "learning_rate": 1.6123992339077685e-09, "loss": 0.0, "step": 5951 }, { "epoch": 2.93, "logps_train/chosen": -74.88175201416016, "logps_train/ref_chosen": -62.125, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -323.77105712890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.275626301765442, "rewards_train/margins": 18.41226851940155, "rewards_train/rejected": -19.687894821166992, "step": 5951 }, { "epoch": 2.93, "learning_rate": 1.5906951888273734e-09, "loss": 0.0, "step": 5952 }, { "epoch": 2.93, "logps_train/chosen": -81.28743743896484, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -329.45452880859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.692171335220337, "rewards_train/margins": 18.459726095199585, "rewards_train/rejected": -20.151897430419922, "step": 5952 }, { "epoch": 2.93, "learning_rate": 1.5691379784913416e-09, "loss": 0.0, "step": 5953 }, { "epoch": 2.93, "logps_train/chosen": -80.67407989501953, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -326.2383117675781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5208749771118164, "rewards_train/margins": 18.328932762145996, "rewards_train/rejected": -19.849807739257812, "step": 5953 }, { "epoch": 2.93, "learning_rate": 1.5477276092505377e-09, "loss": 0.0, "step": 5954 }, { "epoch": 2.93, "logps_train/chosen": -77.78910827636719, "logps_train/ref_chosen": -65.6875, "logps_train/ref_rejected": -129.0, "logps_train/rejected": -336.71026611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2113820314407349, "rewards_train/margins": 19.563066363334656, "rewards_train/rejected": -20.77444839477539, "step": 5954 }, { "epoch": 2.93, "learning_rate": 1.5264640874126377e-09, "loss": 0.0, "step": 5955 }, { "epoch": 2.93, "logps_train/chosen": -76.53009796142578, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -121.9375, "logps_train/rejected": -313.2136535644531, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3664376735687256, "rewards_train/margins": 17.76357102394104, "rewards_train/rejected": -19.130008697509766, "step": 5955 }, { "epoch": 2.93, "learning_rate": 1.5053474192419647e-09, "loss": 0.0001, "step": 5956 }, { "epoch": 2.93, "logps_train/chosen": -81.21939086914062, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -123.875, "logps_train/rejected": -317.1949462890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8108556270599365, "rewards_train/margins": 17.524067163467407, "rewards_train/rejected": -19.334922790527344, "step": 5956 }, { "epoch": 2.93, "learning_rate": 1.4843776109597083e-09, "loss": 0.0, "step": 5957 }, { "epoch": 2.93, "logps_train/chosen": -75.14824676513672, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -321.8590087890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1830377578735352, "rewards_train/margins": 18.535090446472168, "rewards_train/rejected": -19.718128204345703, "step": 5957 }, { "epoch": 2.93, "learning_rate": 1.4635546687436494e-09, "loss": 0.0, "step": 5958 }, { "epoch": 2.93, "logps_train/chosen": -79.71379089355469, "logps_train/ref_chosen": -63.0, "logps_train/ref_rejected": -124.8125, "logps_train/rejected": -327.7347717285156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6714274883270264, "rewards_train/margins": 18.624463319778442, "rewards_train/rejected": -20.29589080810547, "step": 5958 }, { "epoch": 2.93, "learning_rate": 1.4428785987283809e-09, "loss": 0.0, "step": 5959 }, { "epoch": 2.93, "logps_train/chosen": -75.02615356445312, "logps_train/ref_chosen": -59.8125, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -320.4688720703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5205349922180176, "rewards_train/margins": 18.23890256881714, "rewards_train/rejected": -19.759437561035156, "step": 5959 }, { "epoch": 2.93, "learning_rate": 1.4223494070052522e-09, "loss": 0.0002, "step": 5960 }, { "epoch": 2.93, "logps_train/chosen": -75.87818908691406, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -123.375, "logps_train/rejected": -323.91888427734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3118916749954224, "rewards_train/margins": 18.742595553398132, "rewards_train/rejected": -20.054487228393555, "step": 5960 }, { "epoch": 2.94, "learning_rate": 1.4019670996222032e-09, "loss": 0.0, "step": 5961 }, { "epoch": 2.94, "logps_train/chosen": -79.98526000976562, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -326.086181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.59159255027771, "rewards_train/margins": 18.563608407974243, "rewards_train/rejected": -20.155200958251953, "step": 5961 }, { "epoch": 2.94, "learning_rate": 1.381731682584153e-09, "loss": 0.0, "step": 5962 }, { "epoch": 2.94, "logps_train/chosen": -82.61891174316406, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -131.25, "logps_train/rejected": -335.6990966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6451432704925537, "rewards_train/margins": 18.80552887916565, "rewards_train/rejected": -20.450672149658203, "step": 5962 }, { "epoch": 2.94, "learning_rate": 1.3616431618524438e-09, "loss": 0.0, "step": 5963 }, { "epoch": 2.94, "logps_train/chosen": -76.59941101074219, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -325.6575622558594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3618942499160767, "rewards_train/margins": 18.744194388389587, "rewards_train/rejected": -20.106088638305664, "step": 5963 }, { "epoch": 2.94, "learning_rate": 1.341701543345397e-09, "loss": 0.0, "step": 5964 }, { "epoch": 2.94, "logps_train/chosen": -73.52641296386719, "logps_train/ref_chosen": -61.28125, "logps_train/ref_rejected": -125.875, "logps_train/rejected": -328.6176452636719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2237844467163086, "rewards_train/margins": 19.04970073699951, "rewards_train/rejected": -20.27348518371582, "step": 5964 }, { "epoch": 2.94, "learning_rate": 1.321906832937869e-09, "loss": 0.0, "step": 5965 }, { "epoch": 2.94, "logps_train/chosen": -82.15983581542969, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -336.3481140136719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6283860206604004, "rewards_train/margins": 19.169217586517334, "rewards_train/rejected": -20.797603607177734, "step": 5965 }, { "epoch": 2.94, "learning_rate": 1.3022590364615838e-09, "loss": 0.0, "step": 5966 }, { "epoch": 2.94, "logps_train/chosen": -79.11029052734375, "logps_train/ref_chosen": -62.9375, "logps_train/ref_rejected": -122.375, "logps_train/rejected": -318.9666748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.618841528892517, "rewards_train/margins": 18.041057229042053, "rewards_train/rejected": -19.65989875793457, "step": 5966 }, { "epoch": 2.94, "learning_rate": 1.2827581597048554e-09, "loss": 0.0, "step": 5967 }, { "epoch": 2.94, "logps_train/chosen": -77.54608917236328, "logps_train/ref_chosen": -62.75, "logps_train/ref_rejected": -120.6875, "logps_train/rejected": -314.77777099609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4808783531188965, "rewards_train/margins": 17.926634311676025, "rewards_train/rejected": -19.407512664794922, "step": 5967 }, { "epoch": 2.94, "learning_rate": 1.263404208412755e-09, "loss": 0.0, "step": 5968 }, { "epoch": 2.94, "logps_train/chosen": -78.30620574951172, "logps_train/ref_chosen": -66.6875, "logps_train/ref_rejected": -124.75, "logps_train/rejected": -327.63739013671875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1605520248413086, "rewards_train/margins": 19.126282691955566, "rewards_train/rejected": -20.286834716796875, "step": 5968 }, { "epoch": 2.94, "learning_rate": 1.244197188287166e-09, "loss": 0.0, "step": 5969 }, { "epoch": 2.94, "logps_train/chosen": -87.90909576416016, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -126.0625, "logps_train/rejected": -335.2691650390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.2004313468933105, "rewards_train/margins": 18.72174882888794, "rewards_train/rejected": -20.92218017578125, "step": 5969 }, { "epoch": 2.94, "learning_rate": 1.2251371049866177e-09, "loss": 0.0, "step": 5970 }, { "epoch": 2.94, "logps_train/chosen": -75.3194351196289, "logps_train/ref_chosen": -62.65625, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -329.15167236328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2668800354003906, "rewards_train/margins": 19.120750427246094, "rewards_train/rejected": -20.387630462646484, "step": 5970 }, { "epoch": 2.94, "learning_rate": 1.2062239641262295e-09, "loss": 0.0, "step": 5971 }, { "epoch": 2.94, "logps_train/chosen": -75.61457824707031, "logps_train/ref_chosen": -63.03125, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -318.2828063964844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2596023082733154, "rewards_train/margins": 18.20354390144348, "rewards_train/rejected": -19.463146209716797, "step": 5971 }, { "epoch": 2.94, "learning_rate": 1.1874577712780442e-09, "loss": 0.0, "step": 5972 }, { "epoch": 2.94, "logps_train/chosen": -74.49853515625, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -123.6875, "logps_train/rejected": -319.08038330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1949700117111206, "rewards_train/margins": 18.346076130867004, "rewards_train/rejected": -19.541046142578125, "step": 5972 }, { "epoch": 2.94, "learning_rate": 1.1688385319706395e-09, "loss": 0.0, "step": 5973 }, { "epoch": 2.94, "logps_train/chosen": -79.0904312133789, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -327.65740966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421347975730896, "rewards_train/margins": 18.705231308937073, "rewards_train/rejected": -20.12657928466797, "step": 5973 }, { "epoch": 2.94, "learning_rate": 1.1503662516894052e-09, "loss": 0.0, "step": 5974 }, { "epoch": 2.94, "logps_train/chosen": -78.04805755615234, "logps_train/ref_chosen": -62.03125, "logps_train/ref_rejected": -124.6875, "logps_train/rejected": -316.40106201171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6012165546417236, "rewards_train/margins": 17.568773984909058, "rewards_train/rejected": -19.16999053955078, "step": 5974 }, { "epoch": 2.94, "learning_rate": 1.1320409358763773e-09, "loss": 0.0, "step": 5975 }, { "epoch": 2.94, "logps_train/chosen": -86.71946716308594, "logps_train/ref_chosen": -64.9375, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -328.07061767578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.180589437484741, "rewards_train/margins": 18.062998056411743, "rewards_train/rejected": -20.243587493896484, "step": 5975 }, { "epoch": 2.94, "learning_rate": 1.1138625899303477e-09, "loss": 0.0, "step": 5976 }, { "epoch": 2.94, "logps_train/chosen": -77.63816833496094, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -124.375, "logps_train/rejected": -323.643310546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2306134700775146, "rewards_train/margins": 18.697681665420532, "rewards_train/rejected": -19.928295135498047, "step": 5976 }, { "epoch": 2.94, "learning_rate": 1.095831219206811e-09, "loss": 0.0, "step": 5977 }, { "epoch": 2.94, "logps_train/chosen": -83.13389587402344, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -341.5794677734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5810655355453491, "rewards_train/margins": 19.35071051120758, "rewards_train/rejected": -20.93177604675293, "step": 5977 }, { "epoch": 2.94, "learning_rate": 1.0779468290177951e-09, "loss": 0.0, "step": 5978 }, { "epoch": 2.94, "logps_train/chosen": -78.89368438720703, "logps_train/ref_chosen": -63.0625, "logps_train/ref_rejected": -119.1875, "logps_train/rejected": -309.1971435546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5815558433532715, "rewards_train/margins": 17.42106866836548, "rewards_train/rejected": -19.00262451171875, "step": 5978 }, { "epoch": 2.94, "learning_rate": 1.060209424632308e-09, "loss": 0.0001, "step": 5979 }, { "epoch": 2.94, "logps_train/chosen": -81.48860168457031, "logps_train/ref_chosen": -66.375, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -326.8410949707031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5091139078140259, "rewards_train/margins": 18.59584367275238, "rewards_train/rejected": -20.104957580566406, "step": 5979 }, { "epoch": 2.94, "learning_rate": 1.042619011275836e-09, "loss": 0.0, "step": 5980 }, { "epoch": 2.94, "logps_train/chosen": -85.30504608154297, "logps_train/ref_chosen": -67.25, "logps_train/ref_rejected": -130.75, "logps_train/rejected": -344.59136962890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8036006689071655, "rewards_train/margins": 19.583762764930725, "rewards_train/rejected": -21.38736343383789, "step": 5980 }, { "epoch": 2.94, "learning_rate": 1.0251755941306227e-09, "loss": 0.0, "step": 5981 }, { "epoch": 2.94, "logps_train/chosen": -81.08973693847656, "logps_train/ref_chosen": -63.875, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -335.11639404296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7203025817871094, "rewards_train/margins": 19.481229782104492, "rewards_train/rejected": -21.2015323638916, "step": 5981 }, { "epoch": 2.95, "learning_rate": 1.0078791783357243e-09, "loss": 0.0, "step": 5982 }, { "epoch": 2.95, "logps_train/chosen": -76.74408721923828, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -328.378662109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.275775671005249, "rewards_train/margins": 19.03093934059143, "rewards_train/rejected": -20.30671501159668, "step": 5982 }, { "epoch": 2.95, "learning_rate": 9.907297689866202e-10, "loss": 0.0, "step": 5983 }, { "epoch": 2.95, "logps_train/chosen": -78.94125366210938, "logps_train/ref_chosen": -62.15625, "logps_train/ref_rejected": -120.0, "logps_train/rejected": -312.27642822265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6776702404022217, "rewards_train/margins": 17.54719090461731, "rewards_train/rejected": -19.22486114501953, "step": 5983 }, { "epoch": 2.95, "learning_rate": 9.73727371135713e-10, "loss": 0.0001, "step": 5984 }, { "epoch": 2.95, "logps_train/chosen": -79.86810302734375, "logps_train/ref_chosen": -62.84375, "logps_train/ref_rejected": -121.5, "logps_train/rejected": -312.964111328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7040469646453857, "rewards_train/margins": 17.44011616706848, "rewards_train/rejected": -19.144163131713867, "step": 5984 }, { "epoch": 2.95, "learning_rate": 9.568719897921074e-10, "loss": 0.0, "step": 5985 }, { "epoch": 2.95, "logps_train/chosen": -81.54703521728516, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -331.84320068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5675451755523682, "rewards_train/margins": 19.08640456199646, "rewards_train/rejected": -20.653949737548828, "step": 5985 }, { "epoch": 2.95, "learning_rate": 9.401636299213866e-10, "loss": 0.0, "step": 5986 }, { "epoch": 2.95, "logps_train/chosen": -82.29629516601562, "logps_train/ref_chosen": -65.5625, "logps_train/ref_rejected": -129.75, "logps_train/rejected": -335.95513916015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6751863956451416, "rewards_train/margins": 18.94425368309021, "rewards_train/rejected": -20.61944007873535, "step": 5986 }, { "epoch": 2.95, "learning_rate": 9.236022964460022e-10, "loss": 0.0005, "step": 5987 }, { "epoch": 2.95, "logps_train/chosen": -80.01858520507812, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -124.25, "logps_train/rejected": -324.39727783203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.451125979423523, "rewards_train/margins": 18.56643497943878, "rewards_train/rejected": -20.017560958862305, "step": 5987 }, { "epoch": 2.95, "learning_rate": 9.071879942450511e-10, "loss": 0.0, "step": 5988 }, { "epoch": 2.95, "logps_train/chosen": -69.69699096679688, "logps_train/ref_chosen": -62.6875, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -319.5449523925781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6996795535087585, "rewards_train/margins": 19.037384808063507, "rewards_train/rejected": -19.737064361572266, "step": 5988 }, { "epoch": 2.95, "learning_rate": 8.909207281542208e-10, "loss": 0.0, "step": 5989 }, { "epoch": 2.95, "logps_train/chosen": -77.61536407470703, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -315.554443359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.521155595779419, "rewards_train/margins": 17.83126139640808, "rewards_train/rejected": -19.3524169921875, "step": 5989 }, { "epoch": 2.95, "learning_rate": 8.748005029660666e-10, "loss": 0.0, "step": 5990 }, { "epoch": 2.95, "logps_train/chosen": -78.17046356201172, "logps_train/ref_chosen": -62.71875, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -330.47808837890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5450735092163086, "rewards_train/margins": 18.996874809265137, "rewards_train/rejected": -20.541948318481445, "step": 5990 }, { "epoch": 2.95, "learning_rate": 8.588273234296228e-10, "loss": 0.0, "step": 5991 }, { "epoch": 2.95, "logps_train/chosen": -81.11598205566406, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -325.5457458496094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.631837248802185, "rewards_train/margins": 18.41834318637848, "rewards_train/rejected": -20.050180435180664, "step": 5991 }, { "epoch": 2.95, "learning_rate": 8.430011942507919e-10, "loss": 0.0, "step": 5992 }, { "epoch": 2.95, "logps_train/chosen": -73.5938949584961, "logps_train/ref_chosen": -60.4375, "logps_train/ref_rejected": -118.6875, "logps_train/rejected": -315.68548583984375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.315834403038025, "rewards_train/margins": 18.383721947669983, "rewards_train/rejected": -19.699556350708008, "step": 5992 }, { "epoch": 2.95, "learning_rate": 8.273221200919555e-10, "loss": 0.0001, "step": 5993 }, { "epoch": 2.95, "logps_train/chosen": -81.96636962890625, "logps_train/ref_chosen": -67.5625, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -329.1882019042969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.439361572265625, "rewards_train/margins": 18.55729103088379, "rewards_train/rejected": -19.996652603149414, "step": 5993 }, { "epoch": 2.95, "learning_rate": 8.117901055723075e-10, "loss": 0.0, "step": 5994 }, { "epoch": 2.95, "logps_train/chosen": -83.13177490234375, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -329.2207336425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9676449298858643, "rewards_train/margins": 18.7762553691864, "rewards_train/rejected": -20.743900299072266, "step": 5994 }, { "epoch": 2.95, "learning_rate": 7.964051552677431e-10, "loss": 0.0, "step": 5995 }, { "epoch": 2.95, "logps_train/chosen": -83.52591705322266, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -132.25, "logps_train/rejected": -345.0682067871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8850135803222656, "rewards_train/margins": 19.39524269104004, "rewards_train/rejected": -21.280256271362305, "step": 5995 }, { "epoch": 2.95, "learning_rate": 7.811672737106923e-10, "loss": 0.0, "step": 5996 }, { "epoch": 2.95, "logps_train/chosen": -75.11470794677734, "logps_train/ref_chosen": -60.59375, "logps_train/ref_rejected": -121.125, "logps_train/rejected": -308.20538330078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4528770446777344, "rewards_train/margins": 17.25418472290039, "rewards_train/rejected": -18.707061767578125, "step": 5996 }, { "epoch": 2.95, "learning_rate": 7.660764653903973e-10, "loss": 0.0, "step": 5997 }, { "epoch": 2.95, "logps_train/chosen": -82.38992309570312, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -331.49273681640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.676199197769165, "rewards_train/margins": 18.770925283432007, "rewards_train/rejected": -20.447124481201172, "step": 5997 }, { "epoch": 2.95, "learning_rate": 7.511327347526908e-10, "loss": 0.0, "step": 5998 }, { "epoch": 2.95, "logps_train/chosen": -77.31781005859375, "logps_train/ref_chosen": -61.5, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -322.4582214355469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5826603174209595, "rewards_train/margins": 18.208034873008728, "rewards_train/rejected": -19.790695190429688, "step": 5998 }, { "epoch": 2.95, "learning_rate": 7.363360862000512e-10, "loss": 0.0, "step": 5999 }, { "epoch": 2.95, "logps_train/chosen": -79.81828308105469, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -126.75, "logps_train/rejected": -324.6988830566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4475021362304688, "rewards_train/margins": 18.34396743774414, "rewards_train/rejected": -19.79146957397461, "step": 5999 }, { "epoch": 2.95, "learning_rate": 7.216865240917691e-10, "loss": 0.0001, "step": 6000 }, { "epoch": 2.95, "logps_train/chosen": -78.69281005859375, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -121.0625, "logps_train/rejected": -316.53607177734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.556634545326233, "rewards_train/margins": 17.990087866783142, "rewards_train/rejected": -19.546722412109375, "step": 6000 }, { "epoch": 2.95, "learning_rate": 7.071840527436146e-10, "loss": 0.0, "step": 6001 }, { "epoch": 2.95, "logps_train/chosen": -79.14473724365234, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -121.6875, "logps_train/rejected": -313.24884033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4969933032989502, "rewards_train/margins": 17.658947706222534, "rewards_train/rejected": -19.155941009521484, "step": 6001 }, { "epoch": 2.96, "learning_rate": 6.928286764281699e-10, "loss": 0.0001, "step": 6002 }, { "epoch": 2.96, "logps_train/chosen": -79.42424011230469, "logps_train/ref_chosen": -64.0625, "logps_train/ref_rejected": -127.75, "logps_train/rejected": -334.6748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5349537134170532, "rewards_train/margins": 19.157331824302673, "rewards_train/rejected": -20.692285537719727, "step": 6002 }, { "epoch": 2.96, "learning_rate": 6.786203993745521e-10, "loss": 0.0, "step": 6003 }, { "epoch": 2.96, "logps_train/chosen": -80.95146179199219, "logps_train/ref_chosen": -64.875, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -328.9810485839844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6083781719207764, "rewards_train/margins": 18.856766939163208, "rewards_train/rejected": -20.465145111083984, "step": 6003 }, { "epoch": 2.96, "learning_rate": 6.64559225768746e-10, "loss": 0.0, "step": 6004 }, { "epoch": 2.96, "logps_train/chosen": -74.57231903076172, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -321.6932373046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.221245288848877, "rewards_train/margins": 18.57386350631714, "rewards_train/rejected": -19.795108795166016, "step": 6004 }, { "epoch": 2.96, "learning_rate": 6.506451597531049e-10, "loss": 0.0, "step": 6005 }, { "epoch": 2.96, "logps_train/chosen": -74.09024047851562, "logps_train/ref_chosen": -64.25, "logps_train/ref_rejected": -124.625, "logps_train/rejected": -315.8149108886719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.980800986289978, "rewards_train/margins": 18.136043190956116, "rewards_train/rejected": -19.116844177246094, "step": 6005 }, { "epoch": 2.96, "learning_rate": 6.368782054269606e-10, "loss": 0.0, "step": 6006 }, { "epoch": 2.96, "logps_train/chosen": -75.15900421142578, "logps_train/ref_chosen": -62.875, "logps_train/ref_rejected": -122.875, "logps_train/rejected": -317.2847900390625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2276678085327148, "rewards_train/margins": 18.21125888824463, "rewards_train/rejected": -19.438926696777344, "step": 6006 }, { "epoch": 2.96, "learning_rate": 6.232583668460134e-10, "loss": 0.0, "step": 6007 }, { "epoch": 2.96, "logps_train/chosen": -78.05393981933594, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -331.7522888183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3436263799667358, "rewards_train/margins": 18.998984932899475, "rewards_train/rejected": -20.34261131286621, "step": 6007 }, { "epoch": 2.96, "learning_rate": 6.097856480228869e-10, "loss": 0.0, "step": 6008 }, { "epoch": 2.96, "logps_train/chosen": -80.31436157226562, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -126.875, "logps_train/rejected": -328.0882263183594, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.565542221069336, "rewards_train/margins": 18.555585861206055, "rewards_train/rejected": -20.12112808227539, "step": 6008 }, { "epoch": 2.96, "learning_rate": 5.964600529266283e-10, "loss": 0.0, "step": 6009 }, { "epoch": 2.96, "logps_train/chosen": -75.41558837890625, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -319.5604553222656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1387509107589722, "rewards_train/margins": 18.48989975452423, "rewards_train/rejected": -19.628650665283203, "step": 6009 }, { "epoch": 2.96, "learning_rate": 5.832815854830975e-10, "loss": 0.0, "step": 6010 }, { "epoch": 2.96, "logps_train/chosen": -80.02528381347656, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -322.41156005859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5741103887557983, "rewards_train/margins": 18.237456917762756, "rewards_train/rejected": -19.811567306518555, "step": 6010 }, { "epoch": 2.96, "learning_rate": 5.702502495747996e-10, "loss": 0.0, "step": 6011 }, { "epoch": 2.96, "logps_train/chosen": -80.52058410644531, "logps_train/ref_chosen": -62.375, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -321.5325927734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.814680814743042, "rewards_train/margins": 17.8528835773468, "rewards_train/rejected": -19.667564392089844, "step": 6011 }, { "epoch": 2.96, "learning_rate": 5.57366049040775e-10, "loss": 0.0, "step": 6012 }, { "epoch": 2.96, "logps_train/chosen": -79.95919036865234, "logps_train/ref_chosen": -66.75, "logps_train/ref_rejected": -131.5, "logps_train/rejected": -343.06634521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3232626914978027, "rewards_train/margins": 19.838154315948486, "rewards_train/rejected": -21.16141700744629, "step": 6012 }, { "epoch": 2.96, "learning_rate": 5.446289876768206e-10, "loss": 0.0, "step": 6013 }, { "epoch": 2.96, "logps_train/chosen": -75.17308044433594, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -127.25, "logps_train/rejected": -322.9258728027344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0100326538085938, "rewards_train/margins": 18.558530807495117, "rewards_train/rejected": -19.56856346130371, "step": 6013 }, { "epoch": 2.96, "learning_rate": 5.320390692354348e-10, "loss": 0.0, "step": 6014 }, { "epoch": 2.96, "logps_train/chosen": -81.66258239746094, "logps_train/ref_chosen": -62.40625, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -324.3583068847656, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9248026609420776, "rewards_train/margins": 18.292863965034485, "rewards_train/rejected": -20.217666625976562, "step": 6014 }, { "epoch": 2.96, "learning_rate": 5.195962974255951e-10, "loss": 0.0001, "step": 6015 }, { "epoch": 2.96, "logps_train/chosen": -80.88121032714844, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -337.76226806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.485386848449707, "rewards_train/margins": 19.360957145690918, "rewards_train/rejected": -20.846343994140625, "step": 6015 }, { "epoch": 2.96, "learning_rate": 5.073006759130361e-10, "loss": 0.0, "step": 6016 }, { "epoch": 2.96, "logps_train/chosen": -80.05188751220703, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -336.137451171875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4627573490142822, "rewards_train/margins": 19.16983389854431, "rewards_train/rejected": -20.632591247558594, "step": 6016 }, { "epoch": 2.96, "learning_rate": 4.951522083201376e-10, "loss": 0.0, "step": 6017 }, { "epoch": 2.96, "logps_train/chosen": -79.9283447265625, "logps_train/ref_chosen": -64.125, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -335.76727294921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5821906328201294, "rewards_train/margins": 19.0474671125412, "rewards_train/rejected": -20.629657745361328, "step": 6017 }, { "epoch": 2.96, "learning_rate": 4.831508982259813e-10, "loss": 0.0, "step": 6018 }, { "epoch": 2.96, "logps_train/chosen": -80.09268188476562, "logps_train/ref_chosen": -61.96875, "logps_train/ref_rejected": -122.6875, "logps_train/rejected": -313.9055480957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8135656118392944, "rewards_train/margins": 17.309216141700745, "rewards_train/rejected": -19.12278175354004, "step": 6018 }, { "epoch": 2.96, "learning_rate": 4.712967491661834e-10, "loss": 0.0, "step": 6019 }, { "epoch": 2.96, "logps_train/chosen": -84.23773956298828, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -333.0256042480469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0269479751586914, "rewards_train/margins": 18.717211723327637, "rewards_train/rejected": -20.744159698486328, "step": 6019 }, { "epoch": 2.96, "learning_rate": 4.5958976463306154e-10, "loss": 0.0, "step": 6020 }, { "epoch": 2.96, "logps_train/chosen": -81.48226928710938, "logps_train/ref_chosen": -62.34375, "logps_train/ref_rejected": -126.375, "logps_train/rejected": -328.3974609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9152188301086426, "rewards_train/margins": 18.28712797164917, "rewards_train/rejected": -20.202346801757812, "step": 6020 }, { "epoch": 2.96, "learning_rate": 4.480299480755789e-10, "loss": 0.0, "step": 6021 }, { "epoch": 2.96, "logps_train/chosen": -78.29454040527344, "logps_train/ref_chosen": -63.28125, "logps_train/ref_rejected": -125.0625, "logps_train/rejected": -321.30291748046875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5007431507110596, "rewards_train/margins": 18.12471652030945, "rewards_train/rejected": -19.625459671020508, "step": 6021 }, { "epoch": 2.97, "learning_rate": 4.366173028993447e-10, "loss": 0.0001, "step": 6022 }, { "epoch": 2.97, "logps_train/chosen": -76.292236328125, "logps_train/ref_chosen": -61.6875, "logps_train/ref_rejected": -122.25, "logps_train/rejected": -317.4503479003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4604978561401367, "rewards_train/margins": 18.05851459503174, "rewards_train/rejected": -19.519012451171875, "step": 6022 }, { "epoch": 2.97, "learning_rate": 4.2535183246655836e-10, "loss": 0.0, "step": 6023 }, { "epoch": 2.97, "logps_train/chosen": -86.39663696289062, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -335.95782470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9091943502426147, "rewards_train/margins": 18.864521622657776, "rewards_train/rejected": -20.77371597290039, "step": 6023 }, { "epoch": 2.97, "learning_rate": 4.1423354009617606e-10, "loss": 0.0, "step": 6024 }, { "epoch": 2.97, "logps_train/chosen": -81.6225814819336, "logps_train/ref_chosen": -65.5, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -329.8695068359375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.609621524810791, "rewards_train/margins": 18.49187994003296, "rewards_train/rejected": -20.10150146484375, "step": 6024 }, { "epoch": 2.97, "learning_rate": 4.032624290636333e-10, "loss": 0.0, "step": 6025 }, { "epoch": 2.97, "logps_train/chosen": -84.67436981201172, "logps_train/ref_chosen": -65.875, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -333.0047302246094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8828179836273193, "rewards_train/margins": 18.62053894996643, "rewards_train/rejected": -20.50335693359375, "step": 6025 }, { "epoch": 2.97, "learning_rate": 3.9243850260117783e-10, "loss": 0.0, "step": 6026 }, { "epoch": 2.97, "logps_train/chosen": -83.1656265258789, "logps_train/ref_chosen": -65.4375, "logps_train/ref_rejected": -125.0, "logps_train/rejected": -326.9096374511719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7729594707489014, "rewards_train/margins": 18.41683316230774, "rewards_train/rejected": -20.18979263305664, "step": 6026 }, { "epoch": 2.97, "learning_rate": 3.8176176389753677e-10, "loss": 0.0001, "step": 6027 }, { "epoch": 2.97, "logps_train/chosen": -81.27652740478516, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -126.3125, "logps_train/rejected": -335.3150939941406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7108561992645264, "rewards_train/margins": 19.19155240058899, "rewards_train/rejected": -20.902408599853516, "step": 6027 }, { "epoch": 2.97, "learning_rate": 3.71232216098194e-10, "loss": 0.0, "step": 6028 }, { "epoch": 2.97, "logps_train/chosen": -73.9169692993164, "logps_train/ref_chosen": -60.28125, "logps_train/ref_rejected": -120.4375, "logps_train/rejected": -311.35455322265625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.362107276916504, "rewards_train/margins": 17.727152824401855, "rewards_train/rejected": -19.08926010131836, "step": 6028 }, { "epoch": 2.97, "learning_rate": 3.6084986230522363e-10, "loss": 0.0, "step": 6029 }, { "epoch": 2.97, "logps_train/chosen": -76.67599487304688, "logps_train/ref_chosen": -63.125, "logps_train/ref_rejected": -125.5625, "logps_train/rejected": -325.92962646484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3566136360168457, "rewards_train/margins": 18.680049419403076, "rewards_train/rejected": -20.036663055419922, "step": 6029 }, { "epoch": 2.97, "learning_rate": 3.506147055773456e-10, "loss": 0.0, "step": 6030 }, { "epoch": 2.97, "logps_train/chosen": -76.48748016357422, "logps_train/ref_chosen": -63.4375, "logps_train/ref_rejected": -124.5625, "logps_train/rejected": -319.5771484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.303631067276001, "rewards_train/margins": 18.201057195663452, "rewards_train/rejected": -19.504688262939453, "step": 6030 }, { "epoch": 2.97, "learning_rate": 3.405267489298702e-10, "loss": 0.0, "step": 6031 }, { "epoch": 2.97, "logps_train/chosen": -80.20768737792969, "logps_train/ref_chosen": -65.8125, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -335.73480224609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.437760591506958, "rewards_train/margins": 19.300955533981323, "rewards_train/rejected": -20.73871612548828, "step": 6031 }, { "epoch": 2.97, "learning_rate": 3.305859953347534e-10, "loss": 0.0, "step": 6032 }, { "epoch": 2.97, "logps_train/chosen": -79.68489074707031, "logps_train/ref_chosen": -62.78125, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -313.10260009765625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6909503936767578, "rewards_train/margins": 17.46452522277832, "rewards_train/rejected": -19.155475616455078, "step": 6032 }, { "epoch": 2.97, "learning_rate": 3.20792447720708e-10, "loss": 0.0, "step": 6033 }, { "epoch": 2.97, "logps_train/chosen": -81.30127716064453, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -333.406494140625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.519482970237732, "rewards_train/margins": 18.993332505226135, "rewards_train/rejected": -20.512815475463867, "step": 6033 }, { "epoch": 2.97, "learning_rate": 3.111461089729261e-10, "loss": 0.0, "step": 6034 }, { "epoch": 2.97, "logps_train/chosen": -84.66497039794922, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -125.6875, "logps_train/rejected": -326.64825439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9029719829559326, "rewards_train/margins": 18.19344449043274, "rewards_train/rejected": -20.096416473388672, "step": 6034 }, { "epoch": 2.97, "learning_rate": 3.0164698193324566e-10, "loss": 0.0002, "step": 6035 }, { "epoch": 2.97, "logps_train/chosen": -79.36812591552734, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -330.8120422363281, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4118130207061768, "rewards_train/margins": 18.974276304244995, "rewards_train/rejected": -20.386089324951172, "step": 6035 }, { "epoch": 2.97, "learning_rate": 2.9229506940026125e-10, "loss": 0.0, "step": 6036 }, { "epoch": 2.97, "logps_train/chosen": -85.19306945800781, "logps_train/ref_chosen": -64.375, "logps_train/ref_rejected": -123.625, "logps_train/rejected": -330.8788757324219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0819039344787598, "rewards_train/margins": 18.643775463104248, "rewards_train/rejected": -20.725679397583008, "step": 6036 }, { "epoch": 2.97, "learning_rate": 2.830903741290469e-10, "loss": 0.0001, "step": 6037 }, { "epoch": 2.97, "logps_train/chosen": -84.91633605957031, "logps_train/ref_chosen": -66.25, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -338.34698486328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8660483360290527, "rewards_train/margins": 19.0953106880188, "rewards_train/rejected": -20.96135902404785, "step": 6037 }, { "epoch": 2.97, "learning_rate": 2.740328988313223e-10, "loss": 0.0, "step": 6038 }, { "epoch": 2.97, "logps_train/chosen": -82.08570861816406, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -125.75, "logps_train/rejected": -323.3726806640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6769295930862427, "rewards_train/margins": 18.082311511039734, "rewards_train/rejected": -19.759241104125977, "step": 6038 }, { "epoch": 2.97, "learning_rate": 2.65122646175564e-10, "loss": 0.0, "step": 6039 }, { "epoch": 2.97, "logps_train/chosen": -83.51483154296875, "logps_train/ref_chosen": -67.625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -336.2862854003906, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5913752317428589, "rewards_train/margins": 19.155758023262024, "rewards_train/rejected": -20.747133255004883, "step": 6039 }, { "epoch": 2.97, "learning_rate": 2.5635961878678334e-10, "loss": 0.0004, "step": 6040 }, { "epoch": 2.97, "logps_train/chosen": -77.0372314453125, "logps_train/ref_chosen": -65.25, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -327.0980529785156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1755003929138184, "rewards_train/margins": 18.709063053131104, "rewards_train/rejected": -19.884563446044922, "step": 6040 }, { "epoch": 2.97, "learning_rate": 2.4774381924663747e-10, "loss": 0.0, "step": 6041 }, { "epoch": 2.97, "logps_train/chosen": -79.41544342041016, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -125.625, "logps_train/rejected": -322.045654296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3239662647247314, "rewards_train/margins": 18.322787046432495, "rewards_train/rejected": -19.646753311157227, "step": 6041 }, { "epoch": 2.97, "learning_rate": 2.392752500933182e-10, "loss": 0.0001, "step": 6042 }, { "epoch": 2.97, "logps_train/chosen": -76.93547821044922, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -121.5625, "logps_train/rejected": -324.7615966796875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4712334871292114, "rewards_train/margins": 18.84886920452118, "rewards_train/rejected": -20.32010269165039, "step": 6042 }, { "epoch": 2.98, "learning_rate": 2.3095391382182973e-10, "loss": 0.0, "step": 6043 }, { "epoch": 2.98, "logps_train/chosen": -81.23954772949219, "logps_train/ref_chosen": -64.8125, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -323.3847351074219, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6439255475997925, "rewards_train/margins": 18.047284245491028, "rewards_train/rejected": -19.69120979309082, "step": 6043 }, { "epoch": 2.98, "learning_rate": 2.2277981288360004e-10, "loss": 0.0, "step": 6044 }, { "epoch": 2.98, "logps_train/chosen": -79.40144348144531, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -324.9183349609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4843332767486572, "rewards_train/margins": 18.30520796775818, "rewards_train/rejected": -19.789541244506836, "step": 6044 }, { "epoch": 2.98, "learning_rate": 2.147529496868139e-10, "loss": 0.0, "step": 6045 }, { "epoch": 2.98, "logps_train/chosen": -85.83941650390625, "logps_train/ref_chosen": -67.8125, "logps_train/ref_rejected": -133.25, "logps_train/rejected": -340.99884033203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8032294511795044, "rewards_train/margins": 18.969799876213074, "rewards_train/rejected": -20.773029327392578, "step": 6045 }, { "epoch": 2.98, "learning_rate": 2.0687332659624634e-10, "loss": 0.0, "step": 6046 }, { "epoch": 2.98, "logps_train/chosen": -79.89447784423828, "logps_train/ref_chosen": -66.1875, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -331.5447692871094, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.373920202255249, "rewards_train/margins": 18.849062204360962, "rewards_train/rejected": -20.22298240661621, "step": 6046 }, { "epoch": 2.98, "learning_rate": 1.991409459332627e-10, "loss": 0.0, "step": 6047 }, { "epoch": 2.98, "logps_train/chosen": -75.34384155273438, "logps_train/ref_chosen": -63.3125, "logps_train/ref_rejected": -123.25, "logps_train/rejected": -317.13092041015625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.202523946762085, "rewards_train/margins": 18.18634819984436, "rewards_train/rejected": -19.388872146606445, "step": 6047 }, { "epoch": 2.98, "learning_rate": 1.9155580997592957e-10, "loss": 0.0, "step": 6048 }, { "epoch": 2.98, "logps_train/chosen": -74.30821228027344, "logps_train/ref_chosen": -61.21875, "logps_train/ref_rejected": -121.75, "logps_train/rejected": -316.626220703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3082866668701172, "rewards_train/margins": 18.17875099182129, "rewards_train/rejected": -19.487037658691406, "step": 6048 }, { "epoch": 2.98, "learning_rate": 1.8411792095884837e-10, "loss": 0.0, "step": 6049 }, { "epoch": 2.98, "logps_train/chosen": -76.13545227050781, "logps_train/ref_chosen": -61.90625, "logps_train/ref_rejected": -120.25, "logps_train/rejected": -319.68328857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421651005744934, "rewards_train/margins": 18.524508357048035, "rewards_train/rejected": -19.94615936279297, "step": 6049 }, { "epoch": 2.98, "learning_rate": 1.7682728107321076e-10, "loss": 0.0, "step": 6050 }, { "epoch": 2.98, "logps_train/chosen": -76.32768249511719, "logps_train/ref_chosen": -62.5625, "logps_train/ref_rejected": -123.125, "logps_train/rejected": -324.0097961425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3776410818099976, "rewards_train/margins": 18.71034872531891, "rewards_train/rejected": -20.087989807128906, "step": 6050 }, { "epoch": 2.98, "learning_rate": 1.6968389246702075e-10, "loss": 0.0009, "step": 6051 }, { "epoch": 2.98, "logps_train/chosen": -86.465087890625, "logps_train/ref_chosen": -66.3125, "logps_train/ref_rejected": -127.625, "logps_train/rejected": -336.677001953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.012866258621216, "rewards_train/margins": 18.89584994316101, "rewards_train/rejected": -20.908716201782227, "step": 6051 }, { "epoch": 2.98, "learning_rate": 1.6268775724465056e-10, "loss": 0.0003, "step": 6052 }, { "epoch": 2.98, "logps_train/chosen": -78.28799438476562, "logps_train/ref_chosen": -63.90625, "logps_train/ref_rejected": -128.75, "logps_train/rejected": -332.1540222167969, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4392483234405518, "rewards_train/margins": 18.900179624557495, "rewards_train/rejected": -20.339427947998047, "step": 6052 }, { "epoch": 2.98, "learning_rate": 1.5583887746722924e-10, "loss": 0.0, "step": 6053 }, { "epoch": 2.98, "logps_train/chosen": -75.71739196777344, "logps_train/ref_chosen": -64.0, "logps_train/ref_rejected": -126.8125, "logps_train/rejected": -333.8078308105469, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.169249415397644, "rewards_train/margins": 19.530381321907043, "rewards_train/rejected": -20.699630737304688, "step": 6053 }, { "epoch": 2.98, "learning_rate": 1.4913725515253163e-10, "loss": 0.0, "step": 6054 }, { "epoch": 2.98, "logps_train/chosen": -82.00677490234375, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -124.875, "logps_train/rejected": -327.9551086425781, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7243592739105225, "rewards_train/margins": 18.582041025161743, "rewards_train/rejected": -20.306400299072266, "step": 6054 }, { "epoch": 2.98, "learning_rate": 1.425828922748673e-10, "loss": 0.0, "step": 6055 }, { "epoch": 2.98, "logps_train/chosen": -79.46576690673828, "logps_train/ref_chosen": -64.5, "logps_train/ref_rejected": -125.25, "logps_train/rejected": -318.40399169921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4976019859313965, "rewards_train/margins": 17.81437921524048, "rewards_train/rejected": -19.311981201171875, "step": 6055 }, { "epoch": 2.98, "learning_rate": 1.361757907651917e-10, "loss": 0.0001, "step": 6056 }, { "epoch": 2.98, "logps_train/chosen": -80.3022689819336, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -121.4375, "logps_train/rejected": -321.6598205566406, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6106956005096436, "rewards_train/margins": 18.41348958015442, "rewards_train/rejected": -20.024185180664062, "step": 6056 }, { "epoch": 2.98, "learning_rate": 1.2991595251110597e-10, "loss": 0.0, "step": 6057 }, { "epoch": 2.98, "logps_train/chosen": -82.36502838134766, "logps_train/ref_chosen": -65.375, "logps_train/ref_rejected": -124.3125, "logps_train/rejected": -331.37890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7008583545684814, "rewards_train/margins": 19.005005598068237, "rewards_train/rejected": -20.70586395263672, "step": 6057 }, { "epoch": 2.98, "learning_rate": 1.2380337935680164e-10, "loss": 0.0, "step": 6058 }, { "epoch": 2.98, "logps_train/chosen": -79.75341033935547, "logps_train/ref_chosen": -62.21875, "logps_train/ref_rejected": -122.1875, "logps_train/rejected": -323.0810546875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7526601552963257, "rewards_train/margins": 18.337328553199768, "rewards_train/rejected": -20.089988708496094, "step": 6058 }, { "epoch": 2.98, "learning_rate": 1.1783807310300487e-10, "loss": 0.0, "step": 6059 }, { "epoch": 2.98, "logps_train/chosen": -85.03507995605469, "logps_train/ref_chosen": -65.3125, "logps_train/ref_rejected": -128.25, "logps_train/rejected": -336.6761474609375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.97469961643219, "rewards_train/margins": 18.872798323631287, "rewards_train/rejected": -20.847497940063477, "step": 6059 }, { "epoch": 2.98, "learning_rate": 1.1202003550725425e-10, "loss": 0.0, "step": 6060 }, { "epoch": 2.98, "logps_train/chosen": -84.50106811523438, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -325.405029296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9418060779571533, "rewards_train/margins": 17.94547724723816, "rewards_train/rejected": -19.887283325195312, "step": 6060 }, { "epoch": 2.98, "learning_rate": 1.063492682835121e-10, "loss": 0.0001, "step": 6061 }, { "epoch": 2.98, "logps_train/chosen": -77.25554656982422, "logps_train/ref_chosen": -63.5, "logps_train/ref_rejected": -122.0, "logps_train/rejected": -322.9512939453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3751156330108643, "rewards_train/margins": 18.716889142990112, "rewards_train/rejected": -20.092004776000977, "step": 6061 }, { "epoch": 2.98, "learning_rate": 1.00825773102442e-10, "loss": 0.0, "step": 6062 }, { "epoch": 2.98, "logps_train/chosen": -75.44924926757812, "logps_train/ref_chosen": -61.8125, "logps_train/ref_rejected": -123.75, "logps_train/rejected": -319.6495056152344, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3628451824188232, "rewards_train/margins": 18.225396394729614, "rewards_train/rejected": -19.588241577148438, "step": 6062 }, { "epoch": 2.99, "learning_rate": 9.544955159129786e-11, "loss": 0.0, "step": 6063 }, { "epoch": 2.99, "logps_train/chosen": -77.79313659667969, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -128.375, "logps_train/rejected": -329.0595703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1793138980865479, "rewards_train/margins": 18.885040044784546, "rewards_train/rejected": -20.064353942871094, "step": 6063 }, { "epoch": 2.99, "learning_rate": 9.022060533392384e-11, "loss": 0.0, "step": 6064 }, { "epoch": 2.99, "logps_train/chosen": -80.75663757324219, "logps_train/ref_chosen": -66.5, "logps_train/ref_rejected": -131.0, "logps_train/rejected": -343.6029357910156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4273242950439453, "rewards_train/margins": 19.828279495239258, "rewards_train/rejected": -21.255603790283203, "step": 6064 }, { "epoch": 2.99, "learning_rate": 8.51389358708654e-11, "loss": 0.0, "step": 6065 }, { "epoch": 2.99, "logps_train/chosen": -78.84306335449219, "logps_train/ref_chosen": -66.0, "logps_train/ref_rejected": -127.5, "logps_train/rejected": -324.13055419921875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2830369472503662, "rewards_train/margins": 18.37830948829651, "rewards_train/rejected": -19.661346435546875, "step": 6065 }, { "epoch": 2.99, "learning_rate": 8.020454469914728e-11, "loss": 0.0, "step": 6066 }, { "epoch": 2.99, "logps_train/chosen": -83.2264404296875, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -329.9373779296875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.726453423500061, "rewards_train/margins": 18.56874692440033, "rewards_train/rejected": -20.29520034790039, "step": 6066 }, { "epoch": 2.99, "learning_rate": 7.541743327255102e-11, "loss": 0.0, "step": 6067 }, { "epoch": 2.99, "logps_train/chosen": -87.8567886352539, "logps_train/ref_chosen": -65.9375, "logps_train/ref_rejected": -129.25, "logps_train/rejected": -336.51739501953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.190366268157959, "rewards_train/margins": 18.541887760162354, "rewards_train/rejected": -20.732254028320312, "step": 6067 }, { "epoch": 2.99, "learning_rate": 7.077760300133739e-11, "loss": 0.0, "step": 6068 }, { "epoch": 2.99, "logps_train/chosen": -84.8466567993164, "logps_train/ref_chosen": -64.4375, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -326.5375671386719, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0428690910339355, "rewards_train/margins": 17.965330600738525, "rewards_train/rejected": -20.00819969177246, "step": 6068 }, { "epoch": 2.99, "learning_rate": 6.628505525246852e-11, "loss": 0.0, "step": 6069 }, { "epoch": 2.99, "logps_train/chosen": -80.01212310791016, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -121.1875, "logps_train/rejected": -314.37884521484375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5380778312683105, "rewards_train/margins": 17.77949571609497, "rewards_train/rejected": -19.31757354736328, "step": 6069 }, { "epoch": 2.99, "learning_rate": 6.193979134938576e-11, "loss": 0.0, "step": 6070 }, { "epoch": 2.99, "logps_train/chosen": -82.80441284179688, "logps_train/ref_chosen": -67.3125, "logps_train/ref_rejected": -129.875, "logps_train/rejected": -331.2024230957031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5491917133331299, "rewards_train/margins": 18.587942838668823, "rewards_train/rejected": -20.137134552001953, "step": 6070 }, { "epoch": 2.99, "learning_rate": 5.7741812572398293e-11, "loss": 0.0, "step": 6071 }, { "epoch": 2.99, "logps_train/chosen": -81.68711853027344, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -339.45556640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6035261154174805, "rewards_train/margins": 19.546765327453613, "rewards_train/rejected": -21.150291442871094, "step": 6071 }, { "epoch": 2.99, "learning_rate": 5.369112015807253e-11, "loss": 0.0, "step": 6072 }, { "epoch": 2.99, "logps_train/chosen": -82.50077819824219, "logps_train/ref_chosen": -62.59375, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -326.36407470703125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9915578365325928, "rewards_train/margins": 18.293628931045532, "rewards_train/rejected": -20.285186767578125, "step": 6072 }, { "epoch": 2.99, "learning_rate": 4.9787715299898224e-11, "loss": 0.0, "step": 6073 }, { "epoch": 2.99, "logps_train/chosen": -80.34505462646484, "logps_train/ref_chosen": -64.1875, "logps_train/ref_rejected": -129.625, "logps_train/rejected": -337.17083740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6148273944854736, "rewards_train/margins": 19.14444613456726, "rewards_train/rejected": -20.759273529052734, "step": 6073 }, { "epoch": 2.99, "learning_rate": 4.603159914784438e-11, "loss": 0.0, "step": 6074 }, { "epoch": 2.99, "logps_train/chosen": -81.11465454101562, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -329.9046630859375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.640176773071289, "rewards_train/margins": 18.46371841430664, "rewards_train/rejected": -20.10389518737793, "step": 6074 }, { "epoch": 2.99, "learning_rate": 4.242277280841478e-11, "loss": 0.0, "step": 6075 }, { "epoch": 2.99, "logps_train/chosen": -80.87583923339844, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -126.4375, "logps_train/rejected": -325.586181640625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6330435276031494, "rewards_train/margins": 18.28040862083435, "rewards_train/rejected": -19.9134521484375, "step": 6075 }, { "epoch": 2.99, "learning_rate": 3.896123734481449e-11, "loss": 0.0, "step": 6076 }, { "epoch": 2.99, "logps_train/chosen": -80.52631378173828, "logps_train/ref_chosen": -64.6875, "logps_train/ref_rejected": -126.1875, "logps_train/rejected": -331.7027587890625, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5840764045715332, "rewards_train/margins": 18.96691083908081, "rewards_train/rejected": -20.550987243652344, "step": 6076 }, { "epoch": 2.99, "learning_rate": 3.5646993776894394e-11, "loss": 0.0, "step": 6077 }, { "epoch": 2.99, "logps_train/chosen": -79.83734893798828, "logps_train/ref_chosen": -65.0625, "logps_train/ref_rejected": -123.5, "logps_train/rejected": -322.4200439453125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4766545295715332, "rewards_train/margins": 18.41735029220581, "rewards_train/rejected": -19.894004821777344, "step": 6077 }, { "epoch": 2.99, "learning_rate": 3.248004308098462e-11, "loss": 0.0, "step": 6078 }, { "epoch": 2.99, "logps_train/chosen": -80.03105926513672, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -132.0, "logps_train/rejected": -336.72894287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.44114351272583, "rewards_train/margins": 19.026671886444092, "rewards_train/rejected": -20.467815399169922, "step": 6078 }, { "epoch": 2.99, "learning_rate": 2.946038619011659e-11, "loss": 0.0, "step": 6079 }, { "epoch": 2.99, "logps_train/chosen": -78.11859893798828, "logps_train/ref_chosen": -65.625, "logps_train/ref_rejected": -126.9375, "logps_train/rejected": -336.89935302734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2474794387817383, "rewards_train/margins": 19.749293327331543, "rewards_train/rejected": -20.99677276611328, "step": 6079 }, { "epoch": 2.99, "learning_rate": 2.6588023993912024e-11, "loss": 0.0, "step": 6080 }, { "epoch": 2.99, "logps_train/chosen": -81.92399597167969, "logps_train/ref_chosen": -64.75, "logps_train/ref_rejected": -126.5, "logps_train/rejected": -332.04779052734375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7161786556243896, "rewards_train/margins": 18.838503122329712, "rewards_train/rejected": -20.5546817779541, "step": 6080 }, { "epoch": 2.99, "learning_rate": 2.386295733852739e-11, "loss": 0.0, "step": 6081 }, { "epoch": 2.99, "logps_train/chosen": -82.37710571289062, "logps_train/ref_chosen": -63.96875, "logps_train/ref_rejected": -127.0, "logps_train/rejected": -334.01397705078125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.842397689819336, "rewards_train/margins": 18.85509490966797, "rewards_train/rejected": -20.697492599487305, "step": 6081 }, { "epoch": 2.99, "learning_rate": 2.1285187026875982e-11, "loss": 0.0, "step": 6082 }, { "epoch": 2.99, "logps_train/chosen": -81.15603637695312, "logps_train/ref_chosen": -66.875, "logps_train/ref_rejected": -129.5, "logps_train/rejected": -334.79583740234375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4293240308761597, "rewards_train/margins": 19.10680115222931, "rewards_train/rejected": -20.53612518310547, "step": 6082 }, { "epoch": 3.0, "learning_rate": 1.8854713818350354e-11, "loss": 0.0, "step": 6083 }, { "epoch": 3.0, "logps_train/chosen": -86.34475708007812, "logps_train/ref_chosen": -67.0, "logps_train/ref_rejected": -125.3125, "logps_train/rejected": -325.6753845214844, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9314488172531128, "rewards_train/margins": 18.10420525074005, "rewards_train/rejected": -20.035654067993164, "step": 6083 }, { "epoch": 3.0, "learning_rate": 1.6571538428988842e-11, "loss": 0.0, "step": 6084 }, { "epoch": 3.0, "logps_train/chosen": -80.84585571289062, "logps_train/ref_chosen": -64.3125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -324.8890686035156, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6507470607757568, "rewards_train/margins": 18.243627786636353, "rewards_train/rejected": -19.89437484741211, "step": 6084 }, { "epoch": 3.0, "learning_rate": 1.4435661531420062e-11, "loss": 0.0, "step": 6085 }, { "epoch": 3.0, "logps_train/chosen": -80.62020874023438, "logps_train/ref_chosen": -66.625, "logps_train/ref_rejected": -128.875, "logps_train/rejected": -339.74395751953125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4013274908065796, "rewards_train/margins": 19.68815767765045, "rewards_train/rejected": -21.08948516845703, "step": 6085 }, { "epoch": 3.0, "learning_rate": 1.2447083754862919e-11, "loss": 0.0, "step": 6086 }, { "epoch": 3.0, "logps_train/chosen": -78.21109771728516, "logps_train/ref_chosen": -62.96875, "logps_train/ref_rejected": -123.0625, "logps_train/rejected": -323.6644287109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5231117010116577, "rewards_train/margins": 18.53585970401764, "rewards_train/rejected": -20.058971405029297, "step": 6086 }, { "epoch": 3.0, "learning_rate": 1.0605805685237611e-11, "loss": 0.0, "step": 6087 }, { "epoch": 3.0, "logps_train/chosen": -83.68192291259766, "logps_train/ref_chosen": -64.625, "logps_train/ref_rejected": -124.0, "logps_train/rejected": -318.4796142578125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9030065536499023, "rewards_train/margins": 17.5414400100708, "rewards_train/rejected": -19.444446563720703, "step": 6087 }, { "epoch": 3.0, "learning_rate": 8.911827864888088e-12, "loss": 0.0, "step": 6088 }, { "epoch": 3.0, "logps_train/chosen": -79.75194549560547, "logps_train/ref_chosen": -65.0, "logps_train/ref_rejected": -122.4375, "logps_train/rejected": -315.284912109375, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4779291152954102, "rewards_train/margins": 17.807887077331543, "rewards_train/rejected": -19.285816192626953, "step": 6088 }, { "epoch": 3.0, "learning_rate": 7.36515079297062e-12, "loss": 0.0, "step": 6089 }, { "epoch": 3.0, "logps_train/chosen": -78.05223083496094, "logps_train/ref_chosen": -64.5625, "logps_train/ref_rejected": -127.9375, "logps_train/rejected": -332.99578857421875, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3500467538833618, "rewards_train/margins": 19.15832269191742, "rewards_train/rejected": -20.50836944580078, "step": 6089 }, { "epoch": 3.0, "learning_rate": 5.9657749251207364e-12, "loss": 0.0001, "step": 6090 }, { "epoch": 3.0, "logps_train/chosen": -81.60260009765625, "logps_train/ref_chosen": -66.125, "logps_train/ref_rejected": -128.0, "logps_train/rejected": -330.9397277832031, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.544904112815857, "rewards_train/margins": 18.74965536594391, "rewards_train/rejected": -20.294559478759766, "step": 6090 }, { "epoch": 3.0, "learning_rate": 4.713700673564247e-12, "loss": 0.0, "step": 6091 }, { "epoch": 3.0, "logps_train/chosen": -78.11026000976562, "logps_train/ref_chosen": -63.53125, "logps_train/ref_rejected": -126.5625, "logps_train/rejected": -323.3087158203125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4580482244491577, "rewards_train/margins": 18.21735918521881, "rewards_train/rejected": -19.67540740966797, "step": 6091 }, { "epoch": 3.0, "learning_rate": 3.6089284072282624e-12, "loss": 0.0001, "step": 6092 }, { "epoch": 3.0, "logps_train/chosen": -86.25201416015625, "logps_train/ref_chosen": -65.125, "logps_train/ref_rejected": -125.9375, "logps_train/rejected": -330.14776611328125, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.115386962890625, "rewards_train/margins": 18.307106018066406, "rewards_train/rejected": -20.42249298095703, "step": 6092 }, { "epoch": 3.0, "learning_rate": 2.6514584515191507e-12, "loss": 0.0001, "step": 6093 }, { "epoch": 3.0, "step": 6093, "total_flos": 0.0, "train_loss": 0.019245373732807495, "train_runtime": 95784.7885, "train_samples_per_second": 4.071, "train_steps_per_second": 0.064 } ], "logging_steps": 1.0, "max_steps": 6093, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }