{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998972954467648, "eval_steps": 100, "global_step": 6570, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.5662100456621e-10, "logits/chosen": -4.212166786193848, "logits/rejected": -4.198864459991455, "logps/chosen": -405.7208557128906, "logps/rejected": -370.718505859375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.5662100456621e-09, "logits/chosen": -4.189076900482178, "logits/rejected": -4.141063213348389, "logps/chosen": -426.806884765625, "logps/rejected": -266.2579345703125, "loss": 0.6923, "rewards/accuracies": 0.375, "rewards/chosen": -0.008347246795892715, "rewards/margins": -0.006428453605622053, "rewards/rejected": -0.001918792724609375, "step": 10 }, { "epoch": 0.01, "learning_rate": 9.1324200913242e-09, "logits/chosen": -4.183960914611816, "logits/rejected": -4.144045829772949, "logps/chosen": -432.1515197753906, "logps/rejected": -282.266357421875, "loss": 0.6914, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.0014503479469567537, "rewards/margins": 0.0017772674327716231, "rewards/rejected": -0.0003269195440225303, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.36986301369863e-08, "logits/chosen": -4.178932189941406, "logits/rejected": -4.130121231079102, "logps/chosen": -450.54052734375, "logps/rejected": -273.3040466308594, "loss": 0.6789, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.059673309326171875, "rewards/margins": 0.020709943026304245, "rewards/rejected": 0.03896336629986763, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.82648401826484e-08, "logits/chosen": -4.164882659912109, "logits/rejected": -4.118993282318115, "logps/chosen": -446.67572021484375, "logps/rejected": -277.1219177246094, "loss": 0.6559, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.16719122231006622, "rewards/margins": 0.10298871994018555, "rewards/rejected": 0.06420250236988068, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.28310502283105e-08, "logits/chosen": -4.158358573913574, "logits/rejected": -4.1271514892578125, "logps/chosen": -387.9163513183594, "logps/rejected": -261.80706787109375, "loss": 0.6392, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.19570569694042206, "rewards/margins": 0.08695220947265625, "rewards/rejected": 0.10875348746776581, "step": 50 }, { "epoch": 0.03, "learning_rate": 2.73972602739726e-08, "logits/chosen": -4.152237892150879, "logits/rejected": -4.111782073974609, "logps/chosen": -444.46466064453125, "logps/rejected": -275.1703796386719, "loss": 0.6083, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.44040411710739136, "rewards/margins": 0.23903369903564453, "rewards/rejected": 0.20137043297290802, "step": 60 }, { "epoch": 0.03, "learning_rate": 3.19634703196347e-08, "logits/chosen": -4.131418704986572, "logits/rejected": -4.097787380218506, "logps/chosen": -432.1783752441406, "logps/rejected": -279.2029724121094, "loss": 0.5722, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.5671070218086243, "rewards/margins": 0.3263644874095917, "rewards/rejected": 0.2407425343990326, "step": 70 }, { "epoch": 0.04, "learning_rate": 3.65296803652968e-08, "logits/chosen": -4.150516510009766, "logits/rejected": -4.1139373779296875, "logps/chosen": -425.799560546875, "logps/rejected": -278.6962890625, "loss": 0.5519, "rewards/accuracies": 0.75, "rewards/chosen": 0.7141006588935852, "rewards/margins": 0.400613009929657, "rewards/rejected": 0.3134876489639282, "step": 80 }, { "epoch": 0.04, "learning_rate": 4.10958904109589e-08, "logits/chosen": -4.123137474060059, "logits/rejected": -4.0936150550842285, "logps/chosen": -392.0987243652344, "logps/rejected": -262.5712585449219, "loss": 0.5323, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.7629528641700745, "rewards/margins": 0.4368765950202942, "rewards/rejected": 0.3260762691497803, "step": 90 }, { "epoch": 0.05, "learning_rate": 4.5662100456621e-08, "logits/chosen": -4.116466522216797, "logits/rejected": -4.078573703765869, "logps/chosen": -429.51641845703125, "logps/rejected": -275.288330078125, "loss": 0.5056, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.8561823964118958, "rewards/margins": 0.5154320597648621, "rewards/rejected": 0.3407503068447113, "step": 100 }, { "epoch": 0.05, "eval_logits/chosen": -4.093513488769531, "eval_logits/rejected": -4.062286376953125, "eval_logps/chosen": -410.0293273925781, "eval_logps/rejected": -264.459228515625, "eval_loss": 0.48802417516708374, "eval_rewards/accuracies": 0.7709497213363647, "eval_rewards/chosen": 0.937190055847168, "eval_rewards/margins": 0.5233241319656372, "eval_rewards/rejected": 0.4138658344745636, "eval_runtime": 232.6818, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.769, "step": 100 }, { "epoch": 0.05, "learning_rate": 5.02283105022831e-08, "logits/chosen": -4.125654697418213, "logits/rejected": -4.086058616638184, "logps/chosen": -433.34356689453125, "logps/rejected": -276.3565368652344, "loss": 0.4933, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 1.194875717163086, "rewards/margins": 0.714809775352478, "rewards/rejected": 0.4800659120082855, "step": 110 }, { "epoch": 0.05, "learning_rate": 5.47945205479452e-08, "logits/chosen": -4.123453140258789, "logits/rejected": -4.079206466674805, "logps/chosen": -455.00103759765625, "logps/rejected": -268.443115234375, "loss": 0.4715, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 1.2100108861923218, "rewards/margins": 0.7140718698501587, "rewards/rejected": 0.4959389567375183, "step": 120 }, { "epoch": 0.06, "learning_rate": 5.93607305936073e-08, "logits/chosen": -4.141152381896973, "logits/rejected": -4.098879337310791, "logps/chosen": -452.7408142089844, "logps/rejected": -273.12615966796875, "loss": 0.4433, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 1.4682495594024658, "rewards/margins": 0.9250625371932983, "rewards/rejected": 0.5431869626045227, "step": 130 }, { "epoch": 0.06, "learning_rate": 6.39269406392694e-08, "logits/chosen": -4.114712715148926, "logits/rejected": -4.0743913650512695, "logps/chosen": -436.9576110839844, "logps/rejected": -282.03692626953125, "loss": 0.422, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 1.5205237865447998, "rewards/margins": 0.9940062761306763, "rewards/rejected": 0.5265175700187683, "step": 140 }, { "epoch": 0.07, "learning_rate": 6.84931506849315e-08, "logits/chosen": -4.116663455963135, "logits/rejected": -4.072527885437012, "logps/chosen": -448.979736328125, "logps/rejected": -267.4598388671875, "loss": 0.4038, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 1.6759980916976929, "rewards/margins": 1.1759095191955566, "rewards/rejected": 0.5000885725021362, "step": 150 }, { "epoch": 0.07, "learning_rate": 7.30593607305936e-08, "logits/chosen": -4.114321231842041, "logits/rejected": -4.083165168762207, "logps/chosen": -410.10711669921875, "logps/rejected": -282.03326416015625, "loss": 0.3839, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 1.314010739326477, "rewards/margins": 0.9137832522392273, "rewards/rejected": 0.40022745728492737, "step": 160 }, { "epoch": 0.08, "learning_rate": 7.76255707762557e-08, "logits/chosen": -4.094411373138428, "logits/rejected": -4.0628228187561035, "logps/chosen": -425.72674560546875, "logps/rejected": -274.60693359375, "loss": 0.3598, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 1.7310760021209717, "rewards/margins": 1.1754043102264404, "rewards/rejected": 0.5556716322898865, "step": 170 }, { "epoch": 0.08, "learning_rate": 8.21917808219178e-08, "logits/chosen": -4.100892066955566, "logits/rejected": -4.052018165588379, "logps/chosen": -477.96575927734375, "logps/rejected": -275.4220886230469, "loss": 0.3589, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 2.231424331665039, "rewards/margins": 1.6545913219451904, "rewards/rejected": 0.5768328905105591, "step": 180 }, { "epoch": 0.09, "learning_rate": 8.67579908675799e-08, "logits/chosen": -4.083650588989258, "logits/rejected": -4.043179988861084, "logps/chosen": -436.105712890625, "logps/rejected": -263.28546142578125, "loss": 0.3278, "rewards/accuracies": 0.875, "rewards/chosen": 2.068589687347412, "rewards/margins": 1.5653464794158936, "rewards/rejected": 0.5032432079315186, "step": 190 }, { "epoch": 0.09, "learning_rate": 9.1324200913242e-08, "logits/chosen": -4.08852481842041, "logits/rejected": -4.056266784667969, "logps/chosen": -393.2903137207031, "logps/rejected": -265.4389343261719, "loss": 0.3169, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 1.793810486793518, "rewards/margins": 1.2856613397598267, "rewards/rejected": 0.5081491470336914, "step": 200 }, { "epoch": 0.09, "eval_logits/chosen": -4.0734100341796875, "eval_logits/rejected": -4.043949604034424, "eval_logps/chosen": -408.1737365722656, "eval_logps/rejected": -264.34954833984375, "eval_loss": 0.31823238730430603, "eval_rewards/accuracies": 0.8715083599090576, "eval_rewards/chosen": 1.864997386932373, "eval_rewards/margins": 1.396305799484253, "eval_rewards/rejected": 0.46869152784347534, "eval_runtime": 245.3758, "eval_samples_per_second": 11.664, "eval_steps_per_second": 0.729, "step": 200 }, { "epoch": 0.1, "learning_rate": 9.58904109589041e-08, "logits/chosen": -4.088995933532715, "logits/rejected": -4.05702018737793, "logps/chosen": -409.3319091796875, "logps/rejected": -264.1188049316406, "loss": 0.3238, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 1.9572970867156982, "rewards/margins": 1.4365333318710327, "rewards/rejected": 0.5207639336585999, "step": 210 }, { "epoch": 0.1, "learning_rate": 1.004566210045662e-07, "logits/chosen": -4.100948810577393, "logits/rejected": -4.071681499481201, "logps/chosen": -393.6602478027344, "logps/rejected": -270.72686767578125, "loss": 0.3152, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 2.030446767807007, "rewards/margins": 1.5107183456420898, "rewards/rejected": 0.519728422164917, "step": 220 }, { "epoch": 0.1, "learning_rate": 1.050228310502283e-07, "logits/chosen": -4.098292827606201, "logits/rejected": -4.058206081390381, "logps/chosen": -464.8057556152344, "logps/rejected": -274.8346862792969, "loss": 0.3078, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.4693195819854736, "rewards/margins": 1.8937368392944336, "rewards/rejected": 0.5755828619003296, "step": 230 }, { "epoch": 0.11, "learning_rate": 1.095890410958904e-07, "logits/chosen": -4.085076332092285, "logits/rejected": -4.052942276000977, "logps/chosen": -404.45697021484375, "logps/rejected": -263.5602111816406, "loss": 0.2946, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 2.038395404815674, "rewards/margins": 1.7134144306182861, "rewards/rejected": 0.32498103380203247, "step": 240 }, { "epoch": 0.11, "learning_rate": 1.141552511415525e-07, "logits/chosen": -4.091891288757324, "logits/rejected": -4.054966926574707, "logps/chosen": -425.76837158203125, "logps/rejected": -263.34881591796875, "loss": 0.3025, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 2.0233395099639893, "rewards/margins": 1.6882222890853882, "rewards/rejected": 0.33511701226234436, "step": 250 }, { "epoch": 0.12, "learning_rate": 1.187214611872146e-07, "logits/chosen": -4.09063720703125, "logits/rejected": -4.051945686340332, "logps/chosen": -432.9930725097656, "logps/rejected": -262.3544616699219, "loss": 0.301, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 2.624361753463745, "rewards/margins": 2.0358402729034424, "rewards/rejected": 0.588521420955658, "step": 260 }, { "epoch": 0.12, "learning_rate": 1.232876712328767e-07, "logits/chosen": -4.0815887451171875, "logits/rejected": -4.041827201843262, "logps/chosen": -462.268798828125, "logps/rejected": -264.4634094238281, "loss": 0.2772, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 2.6719746589660645, "rewards/margins": 2.2658581733703613, "rewards/rejected": 0.4061163365840912, "step": 270 }, { "epoch": 0.13, "learning_rate": 1.278538812785388e-07, "logits/chosen": -4.075201988220215, "logits/rejected": -4.047876834869385, "logps/chosen": -417.8416442871094, "logps/rejected": -284.02484130859375, "loss": 0.2776, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 2.4951508045196533, "rewards/margins": 1.8715187311172485, "rewards/rejected": 0.6236318945884705, "step": 280 }, { "epoch": 0.13, "learning_rate": 1.324200913242009e-07, "logits/chosen": -4.0798563957214355, "logits/rejected": -4.047537326812744, "logps/chosen": -425.5148010253906, "logps/rejected": -276.49267578125, "loss": 0.2587, "rewards/accuracies": 0.875, "rewards/chosen": 2.965132474899292, "rewards/margins": 2.2990920543670654, "rewards/rejected": 0.6660404205322266, "step": 290 }, { "epoch": 0.14, "learning_rate": 1.36986301369863e-07, "logits/chosen": -4.083587646484375, "logits/rejected": -4.0480194091796875, "logps/chosen": -419.197021484375, "logps/rejected": -272.2101135253906, "loss": 0.283, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 2.687791347503662, "rewards/margins": 2.1287665367126465, "rewards/rejected": 0.5590246319770813, "step": 300 }, { "epoch": 0.14, "eval_logits/chosen": -4.047587871551514, "eval_logits/rejected": -4.020656108856201, "eval_logps/chosen": -406.94097900390625, "eval_logps/rejected": -264.4837951660156, "eval_loss": 0.25923749804496765, "eval_rewards/accuracies": 0.8938547372817993, "eval_rewards/chosen": 2.4813857078552246, "eval_rewards/margins": 2.079826831817627, "eval_rewards/rejected": 0.40155842900276184, "eval_runtime": 196.6909, "eval_samples_per_second": 14.551, "eval_steps_per_second": 0.91, "step": 300 }, { "epoch": 0.14, "learning_rate": 1.415525114155251e-07, "logits/chosen": -4.0836262702941895, "logits/rejected": -4.0511064529418945, "logps/chosen": -397.8662109375, "logps/rejected": -258.64166259765625, "loss": 0.2812, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 2.7335610389709473, "rewards/margins": 2.2473812103271484, "rewards/rejected": 0.4861796498298645, "step": 310 }, { "epoch": 0.15, "learning_rate": 1.461187214611872e-07, "logits/chosen": -4.1076459884643555, "logits/rejected": -4.072136878967285, "logps/chosen": -452.25982666015625, "logps/rejected": -297.49688720703125, "loss": 0.2589, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 3.0233206748962402, "rewards/margins": 2.3415541648864746, "rewards/rejected": 0.6817664504051208, "step": 320 }, { "epoch": 0.15, "learning_rate": 1.506849315068493e-07, "logits/chosen": -4.096673011779785, "logits/rejected": -4.06699275970459, "logps/chosen": -406.5201721191406, "logps/rejected": -275.48583984375, "loss": 0.2452, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 2.7545294761657715, "rewards/margins": 2.33504056930542, "rewards/rejected": 0.41948890686035156, "step": 330 }, { "epoch": 0.16, "learning_rate": 1.552511415525114e-07, "logits/chosen": -4.090521812438965, "logits/rejected": -4.059487342834473, "logps/chosen": -399.48590087890625, "logps/rejected": -255.3171844482422, "loss": 0.2816, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 2.6917777061462402, "rewards/margins": 2.2513210773468018, "rewards/rejected": 0.4404570162296295, "step": 340 }, { "epoch": 0.16, "learning_rate": 1.598173515981735e-07, "logits/chosen": -4.1127610206604, "logits/rejected": -4.083159446716309, "logps/chosen": -417.5686950683594, "logps/rejected": -280.68841552734375, "loss": 0.2505, "rewards/accuracies": 0.9375, "rewards/chosen": 2.502535104751587, "rewards/margins": 2.404932975769043, "rewards/rejected": 0.09760208427906036, "step": 350 }, { "epoch": 0.16, "learning_rate": 1.643835616438356e-07, "logits/chosen": -4.107423305511475, "logits/rejected": -4.077065944671631, "logps/chosen": -421.57928466796875, "logps/rejected": -280.7342529296875, "loss": 0.222, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.036574125289917, "rewards/margins": 2.640514850616455, "rewards/rejected": 0.3960592448711395, "step": 360 }, { "epoch": 0.17, "learning_rate": 1.689497716894977e-07, "logits/chosen": -4.124993801116943, "logits/rejected": -4.094521522521973, "logps/chosen": -409.13128662109375, "logps/rejected": -276.92584228515625, "loss": 0.2557, "rewards/accuracies": 0.875, "rewards/chosen": 2.804875373840332, "rewards/margins": 2.2633109092712402, "rewards/rejected": 0.5415642857551575, "step": 370 }, { "epoch": 0.17, "learning_rate": 1.735159817351598e-07, "logits/chosen": -4.127420902252197, "logits/rejected": -4.088624000549316, "logps/chosen": -436.15362548828125, "logps/rejected": -253.9842071533203, "loss": 0.2625, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 3.2195563316345215, "rewards/margins": 2.7869300842285156, "rewards/rejected": 0.43262606859207153, "step": 380 }, { "epoch": 0.18, "learning_rate": 1.780821917808219e-07, "logits/chosen": -4.112975120544434, "logits/rejected": -4.078726291656494, "logps/chosen": -420.7740173339844, "logps/rejected": -255.58157348632812, "loss": 0.2451, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.0399105548858643, "rewards/margins": 2.727544069290161, "rewards/rejected": 0.31236642599105835, "step": 390 }, { "epoch": 0.18, "learning_rate": 1.82648401826484e-07, "logits/chosen": -4.131483554840088, "logits/rejected": -4.103003025054932, "logps/chosen": -385.6736755371094, "logps/rejected": -271.086181640625, "loss": 0.2269, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0034279823303223, "rewards/margins": 2.6873748302459717, "rewards/rejected": 0.31605321168899536, "step": 400 }, { "epoch": 0.18, "eval_logits/chosen": -4.090097904205322, "eval_logits/rejected": -4.064701557159424, "eval_logps/chosen": -405.7353820800781, "eval_logps/rejected": -264.3902893066406, "eval_loss": 0.23337939381599426, "eval_rewards/accuracies": 0.8882681727409363, "eval_rewards/chosen": 3.0841965675354004, "eval_rewards/margins": 2.635887861251831, "eval_rewards/rejected": 0.4483083486557007, "eval_runtime": 249.3464, "eval_samples_per_second": 11.478, "eval_steps_per_second": 0.718, "step": 400 }, { "epoch": 0.19, "learning_rate": 1.872146118721461e-07, "logits/chosen": -4.128126621246338, "logits/rejected": -4.083017826080322, "logps/chosen": -479.29241943359375, "logps/rejected": -266.75738525390625, "loss": 0.217, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.754101514816284, "rewards/margins": 3.4601378440856934, "rewards/rejected": 0.29396337270736694, "step": 410 }, { "epoch": 0.19, "learning_rate": 1.917808219178082e-07, "logits/chosen": -4.105954647064209, "logits/rejected": -4.081549167633057, "logps/chosen": -391.1182861328125, "logps/rejected": -269.7347412109375, "loss": 0.2316, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.942401170730591, "rewards/margins": 2.7238264083862305, "rewards/rejected": 0.21857433021068573, "step": 420 }, { "epoch": 0.2, "learning_rate": 1.963470319634703e-07, "logits/chosen": -4.0939507484436035, "logits/rejected": -4.068431377410889, "logps/chosen": -394.3544006347656, "logps/rejected": -258.0218811035156, "loss": 0.2345, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 3.0573782920837402, "rewards/margins": 2.7123544216156006, "rewards/rejected": 0.3450236916542053, "step": 430 }, { "epoch": 0.2, "learning_rate": 2.009132420091324e-07, "logits/chosen": -4.126564025878906, "logits/rejected": -4.099458694458008, "logps/chosen": -415.0599670410156, "logps/rejected": -281.7992858886719, "loss": 0.2538, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 2.743464946746826, "rewards/margins": 2.4870033264160156, "rewards/rejected": 0.2564617097377777, "step": 440 }, { "epoch": 0.21, "learning_rate": 2.054794520547945e-07, "logits/chosen": -4.1211442947387695, "logits/rejected": -4.082815647125244, "logps/chosen": -412.2413635253906, "logps/rejected": -254.90097045898438, "loss": 0.2282, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 3.6943106651306152, "rewards/margins": 3.11075496673584, "rewards/rejected": 0.583555281162262, "step": 450 }, { "epoch": 0.21, "learning_rate": 2.100456621004566e-07, "logits/chosen": -4.110466480255127, "logits/rejected": -4.074952602386475, "logps/chosen": -442.8841247558594, "logps/rejected": -279.22662353515625, "loss": 0.2137, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 3.563941478729248, "rewards/margins": 3.180065870285034, "rewards/rejected": 0.38387632369995117, "step": 460 }, { "epoch": 0.21, "learning_rate": 2.146118721461187e-07, "logits/chosen": -4.123247146606445, "logits/rejected": -4.086281776428223, "logps/chosen": -438.7225036621094, "logps/rejected": -276.7365417480469, "loss": 0.2356, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.731687068939209, "rewards/margins": 2.860921859741211, "rewards/rejected": 0.8707650303840637, "step": 470 }, { "epoch": 0.22, "learning_rate": 2.191780821917808e-07, "logits/chosen": -4.140954971313477, "logits/rejected": -4.109364986419678, "logps/chosen": -400.33111572265625, "logps/rejected": -271.6776123046875, "loss": 0.2198, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 3.4695968627929688, "rewards/margins": 2.4520959854125977, "rewards/rejected": 1.0175005197525024, "step": 480 }, { "epoch": 0.22, "learning_rate": 2.237442922374429e-07, "logits/chosen": -4.133633136749268, "logits/rejected": -4.103890419006348, "logps/chosen": -420.04608154296875, "logps/rejected": -274.11328125, "loss": 0.2155, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.755746364593506, "rewards/margins": 3.3902416229248047, "rewards/rejected": 0.36550456285476685, "step": 490 }, { "epoch": 0.23, "learning_rate": 2.28310502283105e-07, "logits/chosen": -4.073807239532471, "logits/rejected": -4.037424564361572, "logps/chosen": -428.438232421875, "logps/rejected": -272.7869567871094, "loss": 0.1909, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 3.968928098678589, "rewards/margins": 3.576451539993286, "rewards/rejected": 0.39247649908065796, "step": 500 }, { "epoch": 0.23, "eval_logits/chosen": -4.091692924499512, "eval_logits/rejected": -4.062863349914551, "eval_logps/chosen": -405.08428955078125, "eval_logps/rejected": -264.5758361816406, "eval_loss": 0.21518714725971222, "eval_rewards/accuracies": 0.8826815485954285, "eval_rewards/chosen": 3.4097392559051514, "eval_rewards/margins": 3.05419921875, "eval_rewards/rejected": 0.35554030537605286, "eval_runtime": 180.2004, "eval_samples_per_second": 15.882, "eval_steps_per_second": 0.993, "step": 500 }, { "epoch": 0.23, "learning_rate": 2.328767123287671e-07, "logits/chosen": -4.096264839172363, "logits/rejected": -4.062567710876465, "logps/chosen": -399.7438049316406, "logps/rejected": -251.76754760742188, "loss": 0.2257, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 3.763711929321289, "rewards/margins": 3.422013521194458, "rewards/rejected": 0.3416985869407654, "step": 510 }, { "epoch": 0.24, "learning_rate": 2.374429223744292e-07, "logits/chosen": -4.0962233543396, "logits/rejected": -4.058864116668701, "logps/chosen": -442.80767822265625, "logps/rejected": -272.2812194824219, "loss": 0.1889, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.318647861480713, "rewards/margins": 3.586409091949463, "rewards/rejected": 0.7322388887405396, "step": 520 }, { "epoch": 0.24, "learning_rate": 2.420091324200913e-07, "logits/chosen": -4.131686210632324, "logits/rejected": -4.101659297943115, "logps/chosen": -418.96527099609375, "logps/rejected": -280.4732360839844, "loss": 0.216, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.7352375984191895, "rewards/margins": 3.250955581665039, "rewards/rejected": 0.48428231477737427, "step": 530 }, { "epoch": 0.25, "learning_rate": 2.465753424657534e-07, "logits/chosen": -4.132394790649414, "logits/rejected": -4.092496395111084, "logps/chosen": -438.9020080566406, "logps/rejected": -274.0657653808594, "loss": 0.1954, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.403537750244141, "rewards/margins": 3.936189651489258, "rewards/rejected": 0.46734824776649475, "step": 540 }, { "epoch": 0.25, "learning_rate": 2.511415525114155e-07, "logits/chosen": -4.148422718048096, "logits/rejected": -4.109923362731934, "logps/chosen": -432.8534240722656, "logps/rejected": -278.4627380371094, "loss": 0.2027, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 3.563136577606201, "rewards/margins": 3.009920597076416, "rewards/rejected": 0.5532161593437195, "step": 550 }, { "epoch": 0.26, "learning_rate": 2.557077625570776e-07, "logits/chosen": -4.179664134979248, "logits/rejected": -4.141193389892578, "logps/chosen": -410.2976989746094, "logps/rejected": -282.09991455078125, "loss": 0.2326, "rewards/accuracies": 0.875, "rewards/chosen": 3.9078965187072754, "rewards/margins": 3.2463676929473877, "rewards/rejected": 0.6615282893180847, "step": 560 }, { "epoch": 0.26, "learning_rate": 2.602739726027397e-07, "logits/chosen": -4.184179782867432, "logits/rejected": -4.139160633087158, "logps/chosen": -440.67236328125, "logps/rejected": -267.76153564453125, "loss": 0.1994, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 3.9988555908203125, "rewards/margins": 3.5466320514678955, "rewards/rejected": 0.45222440361976624, "step": 570 }, { "epoch": 0.26, "learning_rate": 2.648401826484018e-07, "logits/chosen": -4.156145095825195, "logits/rejected": -4.113019943237305, "logps/chosen": -404.7920837402344, "logps/rejected": -254.5971221923828, "loss": 0.1864, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.07761287689209, "rewards/margins": 3.606677293777466, "rewards/rejected": 0.47093600034713745, "step": 580 }, { "epoch": 0.27, "learning_rate": 2.694063926940639e-07, "logits/chosen": -4.143758296966553, "logits/rejected": -4.117724418640137, "logps/chosen": -411.47576904296875, "logps/rejected": -288.5257263183594, "loss": 0.2001, "rewards/accuracies": 0.875, "rewards/chosen": 3.7214341163635254, "rewards/margins": 2.9603395462036133, "rewards/rejected": 0.7610949277877808, "step": 590 }, { "epoch": 0.27, "learning_rate": 2.73972602739726e-07, "logits/chosen": -4.118420600891113, "logits/rejected": -4.081874370574951, "logps/chosen": -419.12432861328125, "logps/rejected": -264.7353515625, "loss": 0.2244, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 3.932356595993042, "rewards/margins": 3.3289496898651123, "rewards/rejected": 0.6034070253372192, "step": 600 }, { "epoch": 0.27, "eval_logits/chosen": -4.106067657470703, "eval_logits/rejected": -4.074832439422607, "eval_logps/chosen": -404.0330505371094, "eval_logps/rejected": -264.20159912109375, "eval_loss": 0.20270854234695435, "eval_rewards/accuracies": 0.8994413614273071, "eval_rewards/chosen": 3.9353408813476562, "eval_rewards/margins": 3.3926780223846436, "eval_rewards/rejected": 0.5426631569862366, "eval_runtime": 182.0941, "eval_samples_per_second": 15.717, "eval_steps_per_second": 0.983, "step": 600 }, { "epoch": 0.28, "learning_rate": 2.785388127853881e-07, "logits/chosen": -4.136101722717285, "logits/rejected": -4.0961480140686035, "logps/chosen": -430.060546875, "logps/rejected": -274.0495910644531, "loss": 0.2314, "rewards/accuracies": 0.9375, "rewards/chosen": 4.594294548034668, "rewards/margins": 3.9268696308135986, "rewards/rejected": 0.667425274848938, "step": 610 }, { "epoch": 0.28, "learning_rate": 2.831050228310502e-07, "logits/chosen": -4.10974645614624, "logits/rejected": -4.078618049621582, "logps/chosen": -422.267822265625, "logps/rejected": -287.4093933105469, "loss": 0.2169, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 3.7346396446228027, "rewards/margins": 3.0842525959014893, "rewards/rejected": 0.650387167930603, "step": 620 }, { "epoch": 0.29, "learning_rate": 2.876712328767123e-07, "logits/chosen": -4.14223575592041, "logits/rejected": -4.105758190155029, "logps/chosen": -409.4903869628906, "logps/rejected": -270.93463134765625, "loss": 0.206, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.351956367492676, "rewards/margins": 3.782641649246216, "rewards/rejected": 0.5693148970603943, "step": 630 }, { "epoch": 0.29, "learning_rate": 2.922374429223744e-07, "logits/chosen": -4.149957180023193, "logits/rejected": -4.109908103942871, "logps/chosen": -422.36737060546875, "logps/rejected": -264.7787170410156, "loss": 0.192, "rewards/accuracies": 0.9375, "rewards/chosen": 4.248743534088135, "rewards/margins": 3.774355411529541, "rewards/rejected": 0.4743878245353699, "step": 640 }, { "epoch": 0.3, "learning_rate": 2.968036529680365e-07, "logits/chosen": -4.185708045959473, "logits/rejected": -4.142598628997803, "logps/chosen": -429.4815368652344, "logps/rejected": -272.57220458984375, "loss": 0.1873, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.298845291137695, "rewards/margins": 3.8823604583740234, "rewards/rejected": 0.4164847731590271, "step": 650 }, { "epoch": 0.3, "learning_rate": 2.998477929984779e-07, "logits/chosen": -4.209115982055664, "logits/rejected": -4.159461975097656, "logps/chosen": -437.9283142089844, "logps/rejected": -266.4573059082031, "loss": 0.198, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.321707725524902, "rewards/margins": 3.8896515369415283, "rewards/rejected": 0.4320557117462158, "step": 660 }, { "epoch": 0.31, "learning_rate": 2.993404363267377e-07, "logits/chosen": -4.2221879959106445, "logits/rejected": -4.175808906555176, "logps/chosen": -441.20379638671875, "logps/rejected": -268.2593688964844, "loss": 0.225, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.126081943511963, "rewards/margins": 3.6276869773864746, "rewards/rejected": 0.4983953833580017, "step": 670 }, { "epoch": 0.31, "learning_rate": 2.9883307965499743e-07, "logits/chosen": -4.219667911529541, "logits/rejected": -4.178839206695557, "logps/chosen": -427.64788818359375, "logps/rejected": -281.82257080078125, "loss": 0.1873, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 3.9308841228485107, "rewards/margins": 3.464876174926758, "rewards/rejected": 0.4660082757472992, "step": 680 }, { "epoch": 0.31, "learning_rate": 2.983257229832572e-07, "logits/chosen": -4.1189751625061035, "logits/rejected": -4.083699703216553, "logps/chosen": -410.87908935546875, "logps/rejected": -275.31707763671875, "loss": 0.2192, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9956748485565186, "rewards/margins": 3.9671969413757324, "rewards/rejected": 0.028477955609560013, "step": 690 }, { "epoch": 0.32, "learning_rate": 2.9781836631151696e-07, "logits/chosen": -4.219498157501221, "logits/rejected": -4.173437595367432, "logps/chosen": -441.61834716796875, "logps/rejected": -267.37066650390625, "loss": 0.2118, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.192904472351074, "rewards/margins": 3.602980375289917, "rewards/rejected": 0.5899245142936707, "step": 700 }, { "epoch": 0.32, "eval_logits/chosen": -4.173791885375977, "eval_logits/rejected": -4.1397833824157715, "eval_logps/chosen": -404.0215759277344, "eval_logps/rejected": -264.3592529296875, "eval_loss": 0.1949852705001831, "eval_rewards/accuracies": 0.9050279259681702, "eval_rewards/chosen": 3.9410858154296875, "eval_rewards/margins": 3.4772510528564453, "eval_rewards/rejected": 0.4638344645500183, "eval_runtime": 150.9544, "eval_samples_per_second": 18.959, "eval_steps_per_second": 1.186, "step": 700 }, { "epoch": 0.32, "learning_rate": 2.9731100963977676e-07, "logits/chosen": -4.183773994445801, "logits/rejected": -4.146794319152832, "logps/chosen": -433.4944763183594, "logps/rejected": -287.1102600097656, "loss": 0.1935, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.7404680252075195, "rewards/margins": 3.8502135276794434, "rewards/rejected": 0.8902549743652344, "step": 710 }, { "epoch": 0.33, "learning_rate": 2.968036529680365e-07, "logits/chosen": -4.196366310119629, "logits/rejected": -4.168686866760254, "logps/chosen": -395.640625, "logps/rejected": -288.086669921875, "loss": 0.1981, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.173182487487793, "rewards/margins": 3.1521525382995605, "rewards/rejected": 1.0210305452346802, "step": 720 }, { "epoch": 0.33, "learning_rate": 2.962962962962963e-07, "logits/chosen": -4.15021276473999, "logits/rejected": -4.113003730773926, "logps/chosen": -421.66998291015625, "logps/rejected": -272.64410400390625, "loss": 0.2097, "rewards/accuracies": 0.875, "rewards/chosen": 4.326259136199951, "rewards/margins": 3.6907806396484375, "rewards/rejected": 0.6354783177375793, "step": 730 }, { "epoch": 0.34, "learning_rate": 2.9578893962455603e-07, "logits/chosen": -4.1530022621154785, "logits/rejected": -4.110964775085449, "logps/chosen": -419.9805603027344, "logps/rejected": -267.90289306640625, "loss": 0.1892, "rewards/accuracies": 0.9375, "rewards/chosen": 4.160901069641113, "rewards/margins": 3.5975260734558105, "rewards/rejected": 0.5633751153945923, "step": 740 }, { "epoch": 0.34, "learning_rate": 2.952815829528158e-07, "logits/chosen": -4.182764530181885, "logits/rejected": -4.140324592590332, "logps/chosen": -427.1100158691406, "logps/rejected": -272.9124755859375, "loss": 0.1761, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.86968469619751, "rewards/margins": 4.50525426864624, "rewards/rejected": 0.3644307553768158, "step": 750 }, { "epoch": 0.35, "learning_rate": 2.9477422628107556e-07, "logits/chosen": -4.214711666107178, "logits/rejected": -4.161876678466797, "logps/chosen": -446.918701171875, "logps/rejected": -263.93157958984375, "loss": 0.1874, "rewards/accuracies": 0.9375, "rewards/chosen": 5.09863805770874, "rewards/margins": 4.415619373321533, "rewards/rejected": 0.683018684387207, "step": 760 }, { "epoch": 0.35, "learning_rate": 2.9426686960933536e-07, "logits/chosen": -4.242149353027344, "logits/rejected": -4.197558403015137, "logps/chosen": -418.9051818847656, "logps/rejected": -269.50323486328125, "loss": 0.196, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.592336654663086, "rewards/margins": 4.131098747253418, "rewards/rejected": 0.46123796701431274, "step": 770 }, { "epoch": 0.36, "learning_rate": 2.937595129375951e-07, "logits/chosen": -4.211565971374512, "logits/rejected": -4.169705390930176, "logps/chosen": -414.1048278808594, "logps/rejected": -273.3916015625, "loss": 0.2007, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.805819988250732, "rewards/margins": 3.7861087322235107, "rewards/rejected": 1.0197112560272217, "step": 780 }, { "epoch": 0.36, "learning_rate": 2.932521562658549e-07, "logits/chosen": -4.253178119659424, "logits/rejected": -4.212976932525635, "logps/chosen": -399.68304443359375, "logps/rejected": -266.843994140625, "loss": 0.191, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.364121437072754, "rewards/margins": 3.619459867477417, "rewards/rejected": 0.7446608543395996, "step": 790 }, { "epoch": 0.37, "learning_rate": 2.9274479959411463e-07, "logits/chosen": -4.187119483947754, "logits/rejected": -4.1452531814575195, "logps/chosen": -420.8622131347656, "logps/rejected": -282.3447265625, "loss": 0.1811, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.823877811431885, "rewards/margins": 3.8916053771972656, "rewards/rejected": 0.932272732257843, "step": 800 }, { "epoch": 0.37, "eval_logits/chosen": -4.147759914398193, "eval_logits/rejected": -4.115203857421875, "eval_logps/chosen": -402.8429260253906, "eval_logps/rejected": -263.91015625, "eval_loss": 0.19238564372062683, "eval_rewards/accuracies": 0.8994413614273071, "eval_rewards/chosen": 4.530416488647461, "eval_rewards/margins": 3.8420369625091553, "eval_rewards/rejected": 0.6883795261383057, "eval_runtime": 229.7832, "eval_samples_per_second": 12.455, "eval_steps_per_second": 0.779, "step": 800 }, { "epoch": 0.37, "learning_rate": 2.922374429223744e-07, "logits/chosen": -4.2307448387146, "logits/rejected": -4.192383766174316, "logps/chosen": -400.289306640625, "logps/rejected": -263.30584716796875, "loss": 0.1962, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.354440689086914, "rewards/margins": 4.112866401672363, "rewards/rejected": 0.24157476425170898, "step": 810 }, { "epoch": 0.37, "learning_rate": 2.9173008625063416e-07, "logits/chosen": -4.297841548919678, "logits/rejected": -4.253913402557373, "logps/chosen": -413.95233154296875, "logps/rejected": -272.9776916503906, "loss": 0.2062, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.2922163009643555, "rewards/margins": 3.5743491649627686, "rewards/rejected": 0.7178670763969421, "step": 820 }, { "epoch": 0.38, "learning_rate": 2.9122272957889396e-07, "logits/chosen": -4.282620906829834, "logits/rejected": -4.23444128036499, "logps/chosen": -427.3113708496094, "logps/rejected": -259.5711975097656, "loss": 0.192, "rewards/accuracies": 0.9375, "rewards/chosen": 4.7920966148376465, "rewards/margins": 3.7796459197998047, "rewards/rejected": 1.012450933456421, "step": 830 }, { "epoch": 0.38, "learning_rate": 2.907153729071537e-07, "logits/chosen": -4.269182205200195, "logits/rejected": -4.224325656890869, "logps/chosen": -423.260009765625, "logps/rejected": -276.0570068359375, "loss": 0.178, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.584607124328613, "rewards/margins": 4.113310813903809, "rewards/rejected": 0.4712963104248047, "step": 840 }, { "epoch": 0.39, "learning_rate": 2.902080162354135e-07, "logits/chosen": -4.26160192489624, "logits/rejected": -4.213840961456299, "logps/chosen": -417.0577697753906, "logps/rejected": -272.1130676269531, "loss": 0.1976, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.526599407196045, "rewards/margins": 3.719799757003784, "rewards/rejected": 0.8068000674247742, "step": 850 }, { "epoch": 0.39, "learning_rate": 2.8970065956367323e-07, "logits/chosen": -4.281061172485352, "logits/rejected": -4.228446006774902, "logps/chosen": -425.4596252441406, "logps/rejected": -266.59869384765625, "loss": 0.2089, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.073149681091309, "rewards/margins": 4.242137432098389, "rewards/rejected": -0.16898790001869202, "step": 860 }, { "epoch": 0.4, "learning_rate": 2.89193302891933e-07, "logits/chosen": -4.22875452041626, "logits/rejected": -4.181410312652588, "logps/chosen": -413.494384765625, "logps/rejected": -277.0962219238281, "loss": 0.1842, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.2723822593688965, "rewards/margins": 3.733464002609253, "rewards/rejected": 0.5389187932014465, "step": 870 }, { "epoch": 0.4, "learning_rate": 2.8868594622019276e-07, "logits/chosen": -4.213003635406494, "logits/rejected": -4.153162956237793, "logps/chosen": -436.41156005859375, "logps/rejected": -270.74176025390625, "loss": 0.209, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.831552028656006, "rewards/margins": 4.269440650939941, "rewards/rejected": 0.5621119737625122, "step": 880 }, { "epoch": 0.41, "learning_rate": 2.8817858954845256e-07, "logits/chosen": -4.295604228973389, "logits/rejected": -4.26129150390625, "logps/chosen": -382.20770263671875, "logps/rejected": -276.6923522949219, "loss": 0.2093, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 4.106089115142822, "rewards/margins": 3.655601978302002, "rewards/rejected": 0.4504874348640442, "step": 890 }, { "epoch": 0.41, "learning_rate": 2.876712328767123e-07, "logits/chosen": -4.260335922241211, "logits/rejected": -4.2171807289123535, "logps/chosen": -405.80792236328125, "logps/rejected": -268.5343933105469, "loss": 0.1802, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.247250556945801, "rewards/margins": 3.7043442726135254, "rewards/rejected": 0.5429067611694336, "step": 900 }, { "epoch": 0.41, "eval_logits/chosen": -4.290160655975342, "eval_logits/rejected": -4.2456488609313965, "eval_logps/chosen": -403.4390563964844, "eval_logps/rejected": -264.69061279296875, "eval_loss": 0.18701128661632538, "eval_rewards/accuracies": 0.9022346138954163, "eval_rewards/chosen": 4.232345104217529, "eval_rewards/margins": 3.9341819286346436, "eval_rewards/rejected": 0.29816320538520813, "eval_runtime": 360.2446, "eval_samples_per_second": 7.945, "eval_steps_per_second": 0.497, "step": 900 }, { "epoch": 0.42, "learning_rate": 2.871638762049721e-07, "logits/chosen": -4.311116695404053, "logits/rejected": -4.252312660217285, "logps/chosen": -439.2879943847656, "logps/rejected": -275.1027526855469, "loss": 0.1941, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.146880149841309, "rewards/margins": 4.627768516540527, "rewards/rejected": 0.5191121101379395, "step": 910 }, { "epoch": 0.42, "learning_rate": 2.8665651953323183e-07, "logits/chosen": -4.349404335021973, "logits/rejected": -4.28867244720459, "logps/chosen": -438.40380859375, "logps/rejected": -261.75579833984375, "loss": 0.172, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.756802558898926, "rewards/margins": 4.245242118835449, "rewards/rejected": 0.5115599632263184, "step": 920 }, { "epoch": 0.42, "learning_rate": 2.861491628614916e-07, "logits/chosen": -4.310251712799072, "logits/rejected": -4.253324031829834, "logps/chosen": -430.7648010253906, "logps/rejected": -270.4054260253906, "loss": 0.2097, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.8648223876953125, "rewards/margins": 4.301828861236572, "rewards/rejected": 0.5629938840866089, "step": 930 }, { "epoch": 0.43, "learning_rate": 2.8564180618975136e-07, "logits/chosen": -4.3355255126953125, "logits/rejected": -4.287189960479736, "logps/chosen": -421.4776306152344, "logps/rejected": -278.6324768066406, "loss": 0.1711, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.863546371459961, "rewards/margins": 4.304266452789307, "rewards/rejected": 0.5592796802520752, "step": 940 }, { "epoch": 0.43, "learning_rate": 2.8513444951801116e-07, "logits/chosen": -4.308553218841553, "logits/rejected": -4.261014461517334, "logps/chosen": -392.04608154296875, "logps/rejected": -257.5743408203125, "loss": 0.186, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.327346324920654, "rewards/margins": 3.899622678756714, "rewards/rejected": 0.42772403359413147, "step": 950 }, { "epoch": 0.44, "learning_rate": 2.846270928462709e-07, "logits/chosen": -4.304620742797852, "logits/rejected": -4.258695125579834, "logps/chosen": -419.928466796875, "logps/rejected": -287.6424255371094, "loss": 0.1919, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.3143205642700195, "rewards/margins": 3.8084816932678223, "rewards/rejected": 0.5058385729789734, "step": 960 }, { "epoch": 0.44, "learning_rate": 2.841197361745307e-07, "logits/chosen": -4.3021440505981445, "logits/rejected": -4.249146461486816, "logps/chosen": -429.81378173828125, "logps/rejected": -283.0129699707031, "loss": 0.1778, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.253724575042725, "rewards/margins": 4.843316078186035, "rewards/rejected": 0.4104085862636566, "step": 970 }, { "epoch": 0.45, "learning_rate": 2.8361237950279043e-07, "logits/chosen": -4.291184425354004, "logits/rejected": -4.223944187164307, "logps/chosen": -456.9485778808594, "logps/rejected": -265.01922607421875, "loss": 0.1717, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.599559783935547, "rewards/margins": 4.460396766662598, "rewards/rejected": 0.13916321098804474, "step": 980 }, { "epoch": 0.45, "learning_rate": 2.831050228310502e-07, "logits/chosen": -4.263381004333496, "logits/rejected": -4.209916591644287, "logps/chosen": -418.43389892578125, "logps/rejected": -274.9731750488281, "loss": 0.2164, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.56210470199585, "rewards/margins": 3.970785617828369, "rewards/rejected": 0.5913198590278625, "step": 990 }, { "epoch": 0.46, "learning_rate": 2.8259766615930996e-07, "logits/chosen": -4.310708522796631, "logits/rejected": -4.250560283660889, "logps/chosen": -441.808349609375, "logps/rejected": -271.27685546875, "loss": 0.1738, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.530468940734863, "rewards/margins": 4.7111005783081055, "rewards/rejected": 0.8193683624267578, "step": 1000 }, { "epoch": 0.46, "eval_logits/chosen": -4.3360185623168945, "eval_logits/rejected": -4.284560680389404, "eval_logps/chosen": -403.7695617675781, "eval_logps/rejected": -264.9965515136719, "eval_loss": 0.19607771933078766, "eval_rewards/accuracies": 0.8938547372817993, "eval_rewards/chosen": 4.067108154296875, "eval_rewards/margins": 3.921924352645874, "eval_rewards/rejected": 0.14518392086029053, "eval_runtime": 360.9223, "eval_samples_per_second": 7.93, "eval_steps_per_second": 0.496, "step": 1000 }, { "epoch": 0.46, "learning_rate": 2.8209030948756976e-07, "logits/chosen": -4.32918643951416, "logits/rejected": -4.27830696105957, "logps/chosen": -416.5372009277344, "logps/rejected": -274.45648193359375, "loss": 0.1858, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.442258358001709, "rewards/margins": 4.2470903396606445, "rewards/rejected": 0.19516849517822266, "step": 1010 }, { "epoch": 0.47, "learning_rate": 2.815829528158295e-07, "logits/chosen": -4.312193393707275, "logits/rejected": -4.270182132720947, "logps/chosen": -389.83270263671875, "logps/rejected": -261.42230224609375, "loss": 0.1679, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.291882514953613, "rewards/margins": 3.9460959434509277, "rewards/rejected": 0.3457861840724945, "step": 1020 }, { "epoch": 0.47, "learning_rate": 2.810755961440893e-07, "logits/chosen": -4.330978870391846, "logits/rejected": -4.284383773803711, "logps/chosen": -396.40155029296875, "logps/rejected": -280.8757019042969, "loss": 0.2009, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.800957679748535, "rewards/margins": 4.0810322761535645, "rewards/rejected": 0.7199259996414185, "step": 1030 }, { "epoch": 0.47, "learning_rate": 2.8056823947234903e-07, "logits/chosen": -4.355569839477539, "logits/rejected": -4.298050403594971, "logps/chosen": -429.83135986328125, "logps/rejected": -268.84405517578125, "loss": 0.1701, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.31143045425415, "rewards/margins": 4.6952900886535645, "rewards/rejected": 0.6161404848098755, "step": 1040 }, { "epoch": 0.48, "learning_rate": 2.800608828006088e-07, "logits/chosen": -4.3409104347229, "logits/rejected": -4.302095413208008, "logps/chosen": -389.56817626953125, "logps/rejected": -276.3115234375, "loss": 0.1637, "rewards/accuracies": 0.9375, "rewards/chosen": 4.556807518005371, "rewards/margins": 3.6151719093322754, "rewards/rejected": 0.9416353106498718, "step": 1050 }, { "epoch": 0.48, "learning_rate": 2.7955352612886856e-07, "logits/chosen": -4.349527835845947, "logits/rejected": -4.30363655090332, "logps/chosen": -389.2037048339844, "logps/rejected": -272.6180114746094, "loss": 0.1923, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.910339832305908, "rewards/margins": 3.980623722076416, "rewards/rejected": 0.9297159314155579, "step": 1060 }, { "epoch": 0.49, "learning_rate": 2.7904616945712836e-07, "logits/chosen": -4.328405857086182, "logits/rejected": -4.27632474899292, "logps/chosen": -426.80120849609375, "logps/rejected": -265.5594482421875, "loss": 0.1711, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.965883255004883, "rewards/margins": 4.277373790740967, "rewards/rejected": 0.6885095834732056, "step": 1070 }, { "epoch": 0.49, "learning_rate": 2.785388127853881e-07, "logits/chosen": -4.310082912445068, "logits/rejected": -4.253928184509277, "logps/chosen": -406.53546142578125, "logps/rejected": -244.21377563476562, "loss": 0.1802, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.0049567222595215, "rewards/margins": 4.5411553382873535, "rewards/rejected": 0.46380114555358887, "step": 1080 }, { "epoch": 0.5, "learning_rate": 2.780314561136479e-07, "logits/chosen": -4.404196739196777, "logits/rejected": -4.353032112121582, "logps/chosen": -408.02655029296875, "logps/rejected": -281.559326171875, "loss": 0.2169, "rewards/accuracies": 0.9375, "rewards/chosen": 5.284728050231934, "rewards/margins": 4.540365695953369, "rewards/rejected": 0.7443622350692749, "step": 1090 }, { "epoch": 0.5, "learning_rate": 2.7752409944190763e-07, "logits/chosen": -4.4333271980285645, "logits/rejected": -4.377909183502197, "logps/chosen": -409.1141357421875, "logps/rejected": -273.74920654296875, "loss": 0.1771, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.030637264251709, "rewards/margins": 4.559122562408447, "rewards/rejected": 0.4715147912502289, "step": 1100 }, { "epoch": 0.5, "eval_logits/chosen": -4.357529163360596, "eval_logits/rejected": -4.310585021972656, "eval_logps/chosen": -401.895263671875, "eval_logps/rejected": -263.4815673828125, "eval_loss": 0.1879216581583023, "eval_rewards/accuracies": 0.8966480493545532, "eval_rewards/chosen": 5.004257678985596, "eval_rewards/margins": 4.101582050323486, "eval_rewards/rejected": 0.9026751518249512, "eval_runtime": 202.9699, "eval_samples_per_second": 14.101, "eval_steps_per_second": 0.882, "step": 1100 }, { "epoch": 0.51, "learning_rate": 2.770167427701674e-07, "logits/chosen": -4.425228118896484, "logits/rejected": -4.364677429199219, "logps/chosen": -449.88653564453125, "logps/rejected": -280.2008056640625, "loss": 0.1772, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.938275337219238, "rewards/margins": 5.421079158782959, "rewards/rejected": 0.5171957015991211, "step": 1110 }, { "epoch": 0.51, "learning_rate": 2.7650938609842716e-07, "logits/chosen": -4.418746471405029, "logits/rejected": -4.359828472137451, "logps/chosen": -419.6585388183594, "logps/rejected": -265.8406982421875, "loss": 0.1755, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.153764724731445, "rewards/margins": 4.379703044891357, "rewards/rejected": 0.7740615606307983, "step": 1120 }, { "epoch": 0.52, "learning_rate": 2.7600202942668696e-07, "logits/chosen": -4.388796806335449, "logits/rejected": -4.328965187072754, "logps/chosen": -429.53472900390625, "logps/rejected": -268.2166442871094, "loss": 0.1601, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.379775047302246, "rewards/margins": 4.793489456176758, "rewards/rejected": 0.586286187171936, "step": 1130 }, { "epoch": 0.52, "learning_rate": 2.754946727549467e-07, "logits/chosen": -4.344653129577637, "logits/rejected": -4.291715621948242, "logps/chosen": -433.8997497558594, "logps/rejected": -294.9100036621094, "loss": 0.1664, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.244935035705566, "rewards/margins": 4.572360038757324, "rewards/rejected": 0.6725754141807556, "step": 1140 }, { "epoch": 0.52, "learning_rate": 2.749873160832065e-07, "logits/chosen": -4.360111713409424, "logits/rejected": -4.299374580383301, "logps/chosen": -408.0146179199219, "logps/rejected": -259.06109619140625, "loss": 0.1616, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.786743640899658, "rewards/margins": 4.564408302307129, "rewards/rejected": 0.22233542799949646, "step": 1150 }, { "epoch": 0.53, "learning_rate": 2.7447995941146623e-07, "logits/chosen": -4.358067989349365, "logits/rejected": -4.302337646484375, "logps/chosen": -426.9207458496094, "logps/rejected": -276.8738098144531, "loss": 0.1719, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 5.184803009033203, "rewards/margins": 4.847653388977051, "rewards/rejected": 0.3371497094631195, "step": 1160 }, { "epoch": 0.53, "learning_rate": 2.73972602739726e-07, "logits/chosen": -4.400092124938965, "logits/rejected": -4.350587844848633, "logps/chosen": -414.6220703125, "logps/rejected": -291.0278625488281, "loss": 0.1686, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.59894323348999, "rewards/margins": 3.6411423683166504, "rewards/rejected": 0.957800567150116, "step": 1170 }, { "epoch": 0.54, "learning_rate": 2.7346524606798576e-07, "logits/chosen": -4.355138301849365, "logits/rejected": -4.300421237945557, "logps/chosen": -414.13916015625, "logps/rejected": -279.5627746582031, "loss": 0.1693, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.466294765472412, "rewards/margins": 4.824118614196777, "rewards/rejected": -0.35782328248023987, "step": 1180 }, { "epoch": 0.54, "learning_rate": 2.7295788939624556e-07, "logits/chosen": -4.358715057373047, "logits/rejected": -4.2971296310424805, "logps/chosen": -437.0606384277344, "logps/rejected": -277.45428466796875, "loss": 0.1811, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.279383659362793, "rewards/margins": 4.793443202972412, "rewards/rejected": 0.4859410226345062, "step": 1190 }, { "epoch": 0.55, "learning_rate": 2.724505327245053e-07, "logits/chosen": -4.357456207275391, "logits/rejected": -4.301363945007324, "logps/chosen": -429.9264221191406, "logps/rejected": -271.985107421875, "loss": 0.1758, "rewards/accuracies": 0.9375, "rewards/chosen": 4.671680450439453, "rewards/margins": 4.669971466064453, "rewards/rejected": 0.0017088890308514237, "step": 1200 }, { "epoch": 0.55, "eval_logits/chosen": -4.330428123474121, "eval_logits/rejected": -4.284006118774414, "eval_logps/chosen": -402.4949951171875, "eval_logps/rejected": -264.4362487792969, "eval_loss": 0.1775824874639511, "eval_rewards/accuracies": 0.8994413614273071, "eval_rewards/chosen": 4.7043867111206055, "eval_rewards/margins": 4.2790422439575195, "eval_rewards/rejected": 0.42534491419792175, "eval_runtime": 190.679, "eval_samples_per_second": 15.01, "eval_steps_per_second": 0.939, "step": 1200 }, { "epoch": 0.55, "learning_rate": 2.719431760527651e-07, "logits/chosen": -4.3654069900512695, "logits/rejected": -4.3166913986206055, "logps/chosen": -385.2039794921875, "logps/rejected": -257.00360107421875, "loss": 0.1641, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.539787769317627, "rewards/margins": 3.742908477783203, "rewards/rejected": 0.796879231929779, "step": 1210 }, { "epoch": 0.56, "learning_rate": 2.7143581938102483e-07, "logits/chosen": -4.322198867797852, "logits/rejected": -4.259201526641846, "logps/chosen": -437.3424377441406, "logps/rejected": -267.7229919433594, "loss": 0.1713, "rewards/accuracies": 0.875, "rewards/chosen": 5.0920562744140625, "rewards/margins": 4.569535255432129, "rewards/rejected": 0.522520899772644, "step": 1220 }, { "epoch": 0.56, "learning_rate": 2.709284627092846e-07, "logits/chosen": -4.344841480255127, "logits/rejected": -4.287684440612793, "logps/chosen": -413.35302734375, "logps/rejected": -271.9473876953125, "loss": 0.1514, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.507546901702881, "rewards/margins": 4.733305931091309, "rewards/rejected": -0.22575941681861877, "step": 1230 }, { "epoch": 0.57, "learning_rate": 2.7042110603754436e-07, "logits/chosen": -4.366414546966553, "logits/rejected": -4.301076412200928, "logps/chosen": -440.35003662109375, "logps/rejected": -283.0614318847656, "loss": 0.1612, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.460962772369385, "rewards/margins": 4.724008560180664, "rewards/rejected": 0.7369540929794312, "step": 1240 }, { "epoch": 0.57, "learning_rate": 2.6991374936580416e-07, "logits/chosen": -4.39265775680542, "logits/rejected": -4.3354902267456055, "logps/chosen": -426.74493408203125, "logps/rejected": -284.55267333984375, "loss": 0.1749, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.159322261810303, "rewards/margins": 4.509133815765381, "rewards/rejected": 0.6501884460449219, "step": 1250 }, { "epoch": 0.58, "learning_rate": 2.694063926940639e-07, "logits/chosen": -4.413687229156494, "logits/rejected": -4.353660583496094, "logps/chosen": -437.8505859375, "logps/rejected": -268.86419677734375, "loss": 0.1664, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.07725191116333, "rewards/margins": 4.446320533752441, "rewards/rejected": 0.6309314966201782, "step": 1260 }, { "epoch": 0.58, "learning_rate": 2.688990360223237e-07, "logits/chosen": -4.397888660430908, "logits/rejected": -4.347235202789307, "logps/chosen": -413.70465087890625, "logps/rejected": -284.2705993652344, "loss": 0.174, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.635946750640869, "rewards/margins": 4.467560768127441, "rewards/rejected": 0.16838626563549042, "step": 1270 }, { "epoch": 0.58, "learning_rate": 2.6839167935058343e-07, "logits/chosen": -4.391224384307861, "logits/rejected": -4.344693183898926, "logps/chosen": -384.71435546875, "logps/rejected": -274.98492431640625, "loss": 0.1655, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.82876443862915, "rewards/margins": 4.032337188720703, "rewards/rejected": 0.7964270114898682, "step": 1280 }, { "epoch": 0.59, "learning_rate": 2.678843226788432e-07, "logits/chosen": -4.414493083953857, "logits/rejected": -4.359188079833984, "logps/chosen": -416.6834411621094, "logps/rejected": -290.2602233886719, "loss": 0.1808, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.524363994598389, "rewards/margins": 4.491707801818848, "rewards/rejected": 1.032657265663147, "step": 1290 }, { "epoch": 0.59, "learning_rate": 2.6737696600710296e-07, "logits/chosen": -4.3778395652771, "logits/rejected": -4.313540935516357, "logps/chosen": -433.55572509765625, "logps/rejected": -274.5830993652344, "loss": 0.175, "rewards/accuracies": 0.9375, "rewards/chosen": 5.457139015197754, "rewards/margins": 5.197794437408447, "rewards/rejected": 0.2593439519405365, "step": 1300 }, { "epoch": 0.59, "eval_logits/chosen": -4.372620582580566, "eval_logits/rejected": -4.321435928344727, "eval_logps/chosen": -402.5318603515625, "eval_logps/rejected": -264.57806396484375, "eval_loss": 0.17272613942623138, "eval_rewards/accuracies": 0.910614550113678, "eval_rewards/chosen": 4.685904026031494, "eval_rewards/margins": 4.331483364105225, "eval_rewards/rejected": 0.35442137718200684, "eval_runtime": 265.9703, "eval_samples_per_second": 10.761, "eval_steps_per_second": 0.673, "step": 1300 }, { "epoch": 0.6, "learning_rate": 2.6686960933536276e-07, "logits/chosen": -4.428188323974609, "logits/rejected": -4.381823539733887, "logps/chosen": -390.6050109863281, "logps/rejected": -275.87103271484375, "loss": 0.1743, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.130678653717041, "rewards/margins": 3.8588154315948486, "rewards/rejected": 0.2718631327152252, "step": 1310 }, { "epoch": 0.6, "learning_rate": 2.663622526636225e-07, "logits/chosen": -4.377615451812744, "logits/rejected": -4.337802886962891, "logps/chosen": -384.69659423828125, "logps/rejected": -274.11309814453125, "loss": 0.1895, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.005585193634033, "rewards/margins": 3.9639923572540283, "rewards/rejected": 1.041593074798584, "step": 1320 }, { "epoch": 0.61, "learning_rate": 2.658548959918823e-07, "logits/chosen": -4.41627311706543, "logits/rejected": -4.355051517486572, "logps/chosen": -454.8443298339844, "logps/rejected": -291.37017822265625, "loss": 0.1632, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.983343601226807, "rewards/margins": 5.177704334259033, "rewards/rejected": 0.8056389093399048, "step": 1330 }, { "epoch": 0.61, "learning_rate": 2.6534753932014203e-07, "logits/chosen": -4.3591108322143555, "logits/rejected": -4.305520057678223, "logps/chosen": -397.17547607421875, "logps/rejected": -264.7599792480469, "loss": 0.1599, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.010995864868164, "rewards/margins": 4.313994884490967, "rewards/rejected": 0.6970014572143555, "step": 1340 }, { "epoch": 0.62, "learning_rate": 2.648401826484018e-07, "logits/chosen": -4.385621547698975, "logits/rejected": -4.311063766479492, "logps/chosen": -449.44561767578125, "logps/rejected": -270.2893371582031, "loss": 0.175, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.735474586486816, "rewards/margins": 5.07749080657959, "rewards/rejected": 0.6579844355583191, "step": 1350 }, { "epoch": 0.62, "learning_rate": 2.6433282597666156e-07, "logits/chosen": -4.373836517333984, "logits/rejected": -4.306999206542969, "logps/chosen": -431.8389587402344, "logps/rejected": -263.03826904296875, "loss": 0.1623, "rewards/accuracies": 0.9375, "rewards/chosen": 6.083526134490967, "rewards/margins": 5.356041431427002, "rewards/rejected": 0.7274845838546753, "step": 1360 }, { "epoch": 0.63, "learning_rate": 2.6382546930492135e-07, "logits/chosen": -4.411145210266113, "logits/rejected": -4.363010883331299, "logps/chosen": -410.60345458984375, "logps/rejected": -274.31201171875, "loss": 0.188, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.842541217803955, "rewards/margins": 4.711003303527832, "rewards/rejected": 0.13153724372386932, "step": 1370 }, { "epoch": 0.63, "learning_rate": 2.633181126331811e-07, "logits/chosen": -4.467243194580078, "logits/rejected": -4.41967248916626, "logps/chosen": -381.033203125, "logps/rejected": -260.02899169921875, "loss": 0.1739, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 4.5908613204956055, "rewards/margins": 3.9631030559539795, "rewards/rejected": 0.6277579069137573, "step": 1380 }, { "epoch": 0.63, "learning_rate": 2.628107559614409e-07, "logits/chosen": -4.4827117919921875, "logits/rejected": -4.42117166519165, "logps/chosen": -411.9617614746094, "logps/rejected": -267.61981201171875, "loss": 0.2022, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 5.20654821395874, "rewards/margins": 4.773781776428223, "rewards/rejected": 0.4327661991119385, "step": 1390 }, { "epoch": 0.64, "learning_rate": 2.6230339928970063e-07, "logits/chosen": -4.4382524490356445, "logits/rejected": -4.3928327560424805, "logps/chosen": -378.07110595703125, "logps/rejected": -267.78118896484375, "loss": 0.164, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.487858772277832, "rewards/margins": 4.19870138168335, "rewards/rejected": 0.289156436920166, "step": 1400 }, { "epoch": 0.64, "eval_logits/chosen": -4.371476650238037, "eval_logits/rejected": -4.319361686706543, "eval_logps/chosen": -402.01519775390625, "eval_logps/rejected": -264.3507995605469, "eval_loss": 0.17241604626178741, "eval_rewards/accuracies": 0.9078212380409241, "eval_rewards/chosen": 4.9442901611328125, "eval_rewards/margins": 4.476222038269043, "eval_rewards/rejected": 0.46806800365448, "eval_runtime": 247.0499, "eval_samples_per_second": 11.585, "eval_steps_per_second": 0.725, "step": 1400 }, { "epoch": 0.64, "learning_rate": 2.617960426179604e-07, "logits/chosen": -4.417299270629883, "logits/rejected": -4.354534149169922, "logps/chosen": -440.31182861328125, "logps/rejected": -285.1217956542969, "loss": 0.1752, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.238030910491943, "rewards/margins": 4.515671730041504, "rewards/rejected": 0.7223596572875977, "step": 1410 }, { "epoch": 0.65, "learning_rate": 2.6128868594622016e-07, "logits/chosen": -4.422516822814941, "logits/rejected": -4.371415615081787, "logps/chosen": -372.64080810546875, "logps/rejected": -256.39874267578125, "loss": 0.1802, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.572682857513428, "rewards/margins": 3.9749386310577393, "rewards/rejected": 0.597744345664978, "step": 1420 }, { "epoch": 0.65, "learning_rate": 2.6078132927447995e-07, "logits/chosen": -4.403166770935059, "logits/rejected": -4.348055839538574, "logps/chosen": -415.2976989746094, "logps/rejected": -279.9350280761719, "loss": 0.1445, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.567896366119385, "rewards/margins": 4.9451727867126465, "rewards/rejected": 0.6227229833602905, "step": 1430 }, { "epoch": 0.66, "learning_rate": 2.602739726027397e-07, "logits/chosen": -4.452329158782959, "logits/rejected": -4.381457328796387, "logps/chosen": -426.6133728027344, "logps/rejected": -264.4897155761719, "loss": 0.1579, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.08850622177124, "rewards/margins": 5.064419746398926, "rewards/rejected": 0.02408733405172825, "step": 1440 }, { "epoch": 0.66, "learning_rate": 2.597666159309995e-07, "logits/chosen": -4.495135307312012, "logits/rejected": -4.431539535522461, "logps/chosen": -416.81353759765625, "logps/rejected": -266.808837890625, "loss": 0.1812, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.971259117126465, "rewards/margins": 4.354701042175293, "rewards/rejected": 0.6165581345558167, "step": 1450 }, { "epoch": 0.67, "learning_rate": 2.5925925925925923e-07, "logits/chosen": -4.526634216308594, "logits/rejected": -4.458277702331543, "logps/chosen": -426.47216796875, "logps/rejected": -271.3341064453125, "loss": 0.1588, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.458561420440674, "rewards/margins": 4.924861431121826, "rewards/rejected": 0.5336992740631104, "step": 1460 }, { "epoch": 0.67, "learning_rate": 2.58751902587519e-07, "logits/chosen": -4.48293924331665, "logits/rejected": -4.405966281890869, "logps/chosen": -425.5380859375, "logps/rejected": -261.43084716796875, "loss": 0.1786, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.525833606719971, "rewards/margins": 5.293367862701416, "rewards/rejected": 0.23246555030345917, "step": 1470 }, { "epoch": 0.68, "learning_rate": 2.5824454591577876e-07, "logits/chosen": -4.487465858459473, "logits/rejected": -4.420768737792969, "logps/chosen": -392.27447509765625, "logps/rejected": -254.05252075195312, "loss": 0.1786, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.2108893394470215, "rewards/margins": 4.667431831359863, "rewards/rejected": 0.5434574484825134, "step": 1480 }, { "epoch": 0.68, "learning_rate": 2.5773718924403855e-07, "logits/chosen": -4.463377952575684, "logits/rejected": -4.402941703796387, "logps/chosen": -396.9203186035156, "logps/rejected": -258.8115234375, "loss": 0.1699, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.88924503326416, "rewards/margins": 4.606551170349121, "rewards/rejected": 0.282693475484848, "step": 1490 }, { "epoch": 0.68, "learning_rate": 2.572298325722983e-07, "logits/chosen": -4.433585166931152, "logits/rejected": -4.365602970123291, "logps/chosen": -436.4053649902344, "logps/rejected": -277.13641357421875, "loss": 0.1452, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.6584978103637695, "rewards/margins": 4.772089958190918, "rewards/rejected": -0.11359219253063202, "step": 1500 }, { "epoch": 0.68, "eval_logits/chosen": -4.415165424346924, "eval_logits/rejected": -4.355190277099609, "eval_logps/chosen": -402.333740234375, "eval_logps/rejected": -264.8377990722656, "eval_loss": 0.1732957661151886, "eval_rewards/accuracies": 0.8994413614273071, "eval_rewards/chosen": 4.785008430480957, "eval_rewards/margins": 4.56046724319458, "eval_rewards/rejected": 0.2245408445596695, "eval_runtime": 239.0872, "eval_samples_per_second": 11.971, "eval_steps_per_second": 0.749, "step": 1500 }, { "epoch": 0.69, "learning_rate": 2.567224759005581e-07, "logits/chosen": -4.438943386077881, "logits/rejected": -4.3645920753479, "logps/chosen": -428.20086669921875, "logps/rejected": -256.2311096191406, "loss": 0.1729, "rewards/accuracies": 0.9375, "rewards/chosen": 5.720515251159668, "rewards/margins": 5.347445487976074, "rewards/rejected": 0.3730693459510803, "step": 1510 }, { "epoch": 0.69, "learning_rate": 2.5621511922881783e-07, "logits/chosen": -4.472426891326904, "logits/rejected": -4.394861698150635, "logps/chosen": -415.8504333496094, "logps/rejected": -268.88458251953125, "loss": 0.1503, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.499445915222168, "rewards/margins": 5.302393913269043, "rewards/rejected": 0.1970522403717041, "step": 1520 }, { "epoch": 0.7, "learning_rate": 2.557077625570776e-07, "logits/chosen": -4.462099552154541, "logits/rejected": -4.411559104919434, "logps/chosen": -402.39837646484375, "logps/rejected": -282.028564453125, "loss": 0.1859, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.286023139953613, "rewards/margins": 4.159365653991699, "rewards/rejected": 0.12665767967700958, "step": 1530 }, { "epoch": 0.7, "learning_rate": 2.5520040588533736e-07, "logits/chosen": -4.456170558929443, "logits/rejected": -4.384518623352051, "logps/chosen": -444.78631591796875, "logps/rejected": -269.0514221191406, "loss": 0.1769, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.811339855194092, "rewards/margins": 5.570575714111328, "rewards/rejected": 0.24076423048973083, "step": 1540 }, { "epoch": 0.71, "learning_rate": 2.5469304921359715e-07, "logits/chosen": -4.466643810272217, "logits/rejected": -4.4056196212768555, "logps/chosen": -409.94793701171875, "logps/rejected": -273.4974670410156, "loss": 0.1708, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.865522861480713, "rewards/margins": 4.546541690826416, "rewards/rejected": 0.31898126006126404, "step": 1550 }, { "epoch": 0.71, "learning_rate": 2.541856925418569e-07, "logits/chosen": -4.524622440338135, "logits/rejected": -4.467017650604248, "logps/chosen": -403.84454345703125, "logps/rejected": -272.0582275390625, "loss": 0.1925, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.279751777648926, "rewards/margins": 4.330625534057617, "rewards/rejected": -0.05087399482727051, "step": 1560 }, { "epoch": 0.72, "learning_rate": 2.536783358701167e-07, "logits/chosen": -4.524371147155762, "logits/rejected": -4.4550395011901855, "logps/chosen": -430.81121826171875, "logps/rejected": -272.2115783691406, "loss": 0.2107, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.127749443054199, "rewards/margins": 4.673842430114746, "rewards/rejected": 0.45390695333480835, "step": 1570 }, { "epoch": 0.72, "learning_rate": 2.5317097919837643e-07, "logits/chosen": -4.486627101898193, "logits/rejected": -4.430063724517822, "logps/chosen": -411.3312072753906, "logps/rejected": -286.0501403808594, "loss": 0.1733, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.149035453796387, "rewards/margins": 3.866406202316284, "rewards/rejected": 0.28262948989868164, "step": 1580 }, { "epoch": 0.73, "learning_rate": 2.526636225266362e-07, "logits/chosen": -4.489851951599121, "logits/rejected": -4.416788578033447, "logps/chosen": -425.87493896484375, "logps/rejected": -271.1956787109375, "loss": 0.164, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.201760292053223, "rewards/margins": 4.812933444976807, "rewards/rejected": 0.38882723450660706, "step": 1590 }, { "epoch": 0.73, "learning_rate": 2.5215626585489596e-07, "logits/chosen": -4.441145420074463, "logits/rejected": -4.387523174285889, "logps/chosen": -387.686767578125, "logps/rejected": -259.6787109375, "loss": 0.1607, "rewards/accuracies": 0.9375, "rewards/chosen": 4.6412553787231445, "rewards/margins": 4.38083553314209, "rewards/rejected": 0.26041942834854126, "step": 1600 }, { "epoch": 0.73, "eval_logits/chosen": -4.383084297180176, "eval_logits/rejected": -4.334245681762695, "eval_logps/chosen": -402.24835205078125, "eval_logps/rejected": -264.7025146484375, "eval_loss": 0.18377768993377686, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.827717304229736, "eval_rewards/margins": 4.535507678985596, "eval_rewards/rejected": 0.2922096252441406, "eval_runtime": 238.4477, "eval_samples_per_second": 12.003, "eval_steps_per_second": 0.751, "step": 1600 }, { "epoch": 0.73, "learning_rate": 2.5164890918315575e-07, "logits/chosen": -4.483403205871582, "logits/rejected": -4.4213738441467285, "logps/chosen": -420.90740966796875, "logps/rejected": -278.93707275390625, "loss": 0.1872, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.419531345367432, "rewards/margins": 4.174765110015869, "rewards/rejected": 0.24476651847362518, "step": 1610 }, { "epoch": 0.74, "learning_rate": 2.511415525114155e-07, "logits/chosen": -4.517584323883057, "logits/rejected": -4.463850021362305, "logps/chosen": -391.39813232421875, "logps/rejected": -263.4396667480469, "loss": 0.1658, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.772282123565674, "rewards/margins": 4.309974670410156, "rewards/rejected": 0.46230775117874146, "step": 1620 }, { "epoch": 0.74, "learning_rate": 2.506341958396753e-07, "logits/chosen": -4.511186122894287, "logits/rejected": -4.451176643371582, "logps/chosen": -410.41888427734375, "logps/rejected": -267.0570373535156, "loss": 0.1715, "rewards/accuracies": 0.875, "rewards/chosen": 4.733063697814941, "rewards/margins": 4.703683376312256, "rewards/rejected": 0.029380034655332565, "step": 1630 }, { "epoch": 0.75, "learning_rate": 2.5012683916793503e-07, "logits/chosen": -4.479667663574219, "logits/rejected": -4.4175286293029785, "logps/chosen": -440.5326232910156, "logps/rejected": -266.63848876953125, "loss": 0.1534, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 6.132332801818848, "rewards/margins": 6.0439958572387695, "rewards/rejected": 0.08833713829517365, "step": 1640 }, { "epoch": 0.75, "learning_rate": 2.496194824961948e-07, "logits/chosen": -4.488329887390137, "logits/rejected": -4.434225559234619, "logps/chosen": -411.2500915527344, "logps/rejected": -280.1176452636719, "loss": 0.1527, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.518690586090088, "rewards/margins": 4.6796159744262695, "rewards/rejected": -0.1609257161617279, "step": 1650 }, { "epoch": 0.76, "learning_rate": 2.4911212582445456e-07, "logits/chosen": -4.452143669128418, "logits/rejected": -4.4000349044799805, "logps/chosen": -404.4725341796875, "logps/rejected": -276.4758605957031, "loss": 0.1534, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.304643630981445, "rewards/margins": 4.80792236328125, "rewards/rejected": 0.496721088886261, "step": 1660 }, { "epoch": 0.76, "learning_rate": 2.4860476915271435e-07, "logits/chosen": -4.443146705627441, "logits/rejected": -4.386309623718262, "logps/chosen": -432.103759765625, "logps/rejected": -285.84368896484375, "loss": 0.1699, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.531009197235107, "rewards/margins": 5.0866312980651855, "rewards/rejected": 0.4443773329257965, "step": 1670 }, { "epoch": 0.77, "learning_rate": 2.480974124809741e-07, "logits/chosen": -4.509986877441406, "logits/rejected": -4.432333946228027, "logps/chosen": -464.96014404296875, "logps/rejected": -283.7665100097656, "loss": 0.16, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.337653160095215, "rewards/margins": 5.344212532043457, "rewards/rejected": -0.006559705827385187, "step": 1680 }, { "epoch": 0.77, "learning_rate": 2.475900558092339e-07, "logits/chosen": -4.537201404571533, "logits/rejected": -4.469505310058594, "logps/chosen": -403.14251708984375, "logps/rejected": -258.81890869140625, "loss": 0.1944, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.148838520050049, "rewards/margins": 4.981331825256348, "rewards/rejected": 0.16750702261924744, "step": 1690 }, { "epoch": 0.78, "learning_rate": 2.4708269913749363e-07, "logits/chosen": -4.514194011688232, "logits/rejected": -4.446475982666016, "logps/chosen": -424.048583984375, "logps/rejected": -275.9847717285156, "loss": 0.1611, "rewards/accuracies": 0.9375, "rewards/chosen": 4.838525295257568, "rewards/margins": 4.840458869934082, "rewards/rejected": -0.0019339561695232987, "step": 1700 }, { "epoch": 0.78, "eval_logits/chosen": -4.483520030975342, "eval_logits/rejected": -4.427080154418945, "eval_logps/chosen": -402.70037841796875, "eval_logps/rejected": -265.12188720703125, "eval_loss": 0.17200621962547302, "eval_rewards/accuracies": 0.9078212380409241, "eval_rewards/chosen": 4.601683139801025, "eval_rewards/margins": 4.5191650390625, "eval_rewards/rejected": 0.082518070936203, "eval_runtime": 215.2625, "eval_samples_per_second": 13.295, "eval_steps_per_second": 0.832, "step": 1700 }, { "epoch": 0.78, "learning_rate": 2.465753424657534e-07, "logits/chosen": -4.571104049682617, "logits/rejected": -4.506728172302246, "logps/chosen": -403.62615966796875, "logps/rejected": -268.719482421875, "loss": 0.1626, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.759049415588379, "rewards/margins": 4.908803939819336, "rewards/rejected": -0.1497546136379242, "step": 1710 }, { "epoch": 0.79, "learning_rate": 2.4606798579401316e-07, "logits/chosen": -4.485417366027832, "logits/rejected": -4.420697212219238, "logps/chosen": -422.61700439453125, "logps/rejected": -269.18450927734375, "loss": 0.161, "rewards/accuracies": 0.9375, "rewards/chosen": 5.163121223449707, "rewards/margins": 4.9524126052856445, "rewards/rejected": 0.21070890128612518, "step": 1720 }, { "epoch": 0.79, "learning_rate": 2.4556062912227295e-07, "logits/chosen": -4.475164890289307, "logits/rejected": -4.403307914733887, "logps/chosen": -417.5039978027344, "logps/rejected": -251.69482421875, "loss": 0.1765, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.525936126708984, "rewards/margins": 4.733728885650635, "rewards/rejected": -0.2077922821044922, "step": 1730 }, { "epoch": 0.79, "learning_rate": 2.450532724505327e-07, "logits/chosen": -4.4733476638793945, "logits/rejected": -4.400755405426025, "logps/chosen": -430.858154296875, "logps/rejected": -262.5831298828125, "loss": 0.1614, "rewards/accuracies": 0.9375, "rewards/chosen": 5.3758955001831055, "rewards/margins": 4.953577995300293, "rewards/rejected": 0.4223175048828125, "step": 1740 }, { "epoch": 0.8, "learning_rate": 2.445459157787925e-07, "logits/chosen": -4.408218860626221, "logits/rejected": -4.343362808227539, "logps/chosen": -424.46875, "logps/rejected": -262.0726623535156, "loss": 0.1583, "rewards/accuracies": 0.9375, "rewards/chosen": 5.523038387298584, "rewards/margins": 5.293150901794434, "rewards/rejected": 0.22988729178905487, "step": 1750 }, { "epoch": 0.8, "learning_rate": 2.4403855910705223e-07, "logits/chosen": -4.511113166809082, "logits/rejected": -4.455735206604004, "logps/chosen": -396.0439758300781, "logps/rejected": -270.16357421875, "loss": 0.1956, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.647899150848389, "rewards/margins": 4.655453681945801, "rewards/rejected": -0.007554483599960804, "step": 1760 }, { "epoch": 0.81, "learning_rate": 2.43531202435312e-07, "logits/chosen": -4.547673225402832, "logits/rejected": -4.467702388763428, "logps/chosen": -440.41357421875, "logps/rejected": -260.62005615234375, "loss": 0.1581, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.400877952575684, "rewards/margins": 5.202000617980957, "rewards/rejected": 0.19887714087963104, "step": 1770 }, { "epoch": 0.81, "learning_rate": 2.4302384576357176e-07, "logits/chosen": -4.567178726196289, "logits/rejected": -4.5145344734191895, "logps/chosen": -415.9549255371094, "logps/rejected": -300.63592529296875, "loss": 0.1629, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.006296634674072, "rewards/margins": 4.788304328918457, "rewards/rejected": 0.21799302101135254, "step": 1780 }, { "epoch": 0.82, "learning_rate": 2.4251648909183155e-07, "logits/chosen": -4.540877342224121, "logits/rejected": -4.479895114898682, "logps/chosen": -423.14056396484375, "logps/rejected": -282.12872314453125, "loss": 0.1778, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.216141700744629, "rewards/margins": 5.2195658683776855, "rewards/rejected": -0.003423547837883234, "step": 1790 }, { "epoch": 0.82, "learning_rate": 2.420091324200913e-07, "logits/chosen": -4.533786773681641, "logits/rejected": -4.484832286834717, "logps/chosen": -410.8390197753906, "logps/rejected": -301.4473571777344, "loss": 0.1895, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.642655372619629, "rewards/margins": 4.6054887771606445, "rewards/rejected": 0.03716700151562691, "step": 1800 }, { "epoch": 0.82, "eval_logits/chosen": -4.443898677825928, "eval_logits/rejected": -4.392253875732422, "eval_logps/chosen": -402.24493408203125, "eval_logps/rejected": -264.771484375, "eval_loss": 0.17237447202205658, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 4.829391956329346, "eval_rewards/margins": 4.571688175201416, "eval_rewards/rejected": 0.25770482420921326, "eval_runtime": 255.9459, "eval_samples_per_second": 11.182, "eval_steps_per_second": 0.699, "step": 1800 }, { "epoch": 0.83, "learning_rate": 2.415017757483511e-07, "logits/chosen": -4.498022556304932, "logits/rejected": -4.438601016998291, "logps/chosen": -415.36181640625, "logps/rejected": -275.1888732910156, "loss": 0.1609, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.908847332000732, "rewards/margins": 5.0465192794799805, "rewards/rejected": -0.1376717984676361, "step": 1810 }, { "epoch": 0.83, "learning_rate": 2.409944190766108e-07, "logits/chosen": -4.541338920593262, "logits/rejected": -4.472410678863525, "logps/chosen": -421.87054443359375, "logps/rejected": -282.64044189453125, "loss": 0.1779, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.11863374710083, "rewards/margins": 4.5419697761535645, "rewards/rejected": 0.576663613319397, "step": 1820 }, { "epoch": 0.84, "learning_rate": 2.404870624048706e-07, "logits/chosen": -4.589247703552246, "logits/rejected": -4.529891490936279, "logps/chosen": -410.45721435546875, "logps/rejected": -277.8152770996094, "loss": 0.1839, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.644155502319336, "rewards/margins": 5.010619163513184, "rewards/rejected": 0.6335360407829285, "step": 1830 }, { "epoch": 0.84, "learning_rate": 2.3997970573313036e-07, "logits/chosen": -4.593600273132324, "logits/rejected": -4.530243873596191, "logps/chosen": -410.0294494628906, "logps/rejected": -261.29412841796875, "loss": 0.1643, "rewards/accuracies": 0.9375, "rewards/chosen": 4.880810737609863, "rewards/margins": 4.5101704597473145, "rewards/rejected": 0.37064018845558167, "step": 1840 }, { "epoch": 0.84, "learning_rate": 2.3947234906139015e-07, "logits/chosen": -4.615656852722168, "logits/rejected": -4.543457508087158, "logps/chosen": -431.28375244140625, "logps/rejected": -276.77728271484375, "loss": 0.154, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 6.0550031661987305, "rewards/margins": 5.649577617645264, "rewards/rejected": 0.4054257869720459, "step": 1850 }, { "epoch": 0.85, "learning_rate": 2.389649923896499e-07, "logits/chosen": -4.628702640533447, "logits/rejected": -4.573164463043213, "logps/chosen": -402.3510437011719, "logps/rejected": -266.1717224121094, "loss": 0.1619, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.843657493591309, "rewards/margins": 4.895514488220215, "rewards/rejected": -0.05185704305768013, "step": 1860 }, { "epoch": 0.85, "learning_rate": 2.384576357179097e-07, "logits/chosen": -4.591099739074707, "logits/rejected": -4.5256171226501465, "logps/chosen": -416.304443359375, "logps/rejected": -277.9515380859375, "loss": 0.165, "rewards/accuracies": 0.875, "rewards/chosen": 5.372960567474365, "rewards/margins": 5.2500433921813965, "rewards/rejected": 0.12291707843542099, "step": 1870 }, { "epoch": 0.86, "learning_rate": 2.3795027904616943e-07, "logits/chosen": -4.632084369659424, "logits/rejected": -4.562806129455566, "logps/chosen": -406.490478515625, "logps/rejected": -266.46075439453125, "loss": 0.1784, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.202516078948975, "rewards/margins": 4.384525775909424, "rewards/rejected": -0.18200913071632385, "step": 1880 }, { "epoch": 0.86, "learning_rate": 2.374429223744292e-07, "logits/chosen": -4.595376491546631, "logits/rejected": -4.541309833526611, "logps/chosen": -406.84735107421875, "logps/rejected": -273.3302001953125, "loss": 0.1931, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.958785533905029, "rewards/margins": 4.527499198913574, "rewards/rejected": 0.43128663301467896, "step": 1890 }, { "epoch": 0.87, "learning_rate": 2.3693556570268896e-07, "logits/chosen": -4.542021751403809, "logits/rejected": -4.487260341644287, "logps/chosen": -403.89459228515625, "logps/rejected": -285.2601013183594, "loss": 0.1553, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.689157485961914, "rewards/margins": 4.529534339904785, "rewards/rejected": 0.15962281823158264, "step": 1900 }, { "epoch": 0.87, "eval_logits/chosen": -4.490891933441162, "eval_logits/rejected": -4.440150260925293, "eval_logps/chosen": -401.9498596191406, "eval_logps/rejected": -264.7819519042969, "eval_loss": 0.1675945371389389, "eval_rewards/accuracies": 0.910614550113678, "eval_rewards/chosen": 4.976942539215088, "eval_rewards/margins": 4.724472522735596, "eval_rewards/rejected": 0.25247013568878174, "eval_runtime": 461.2949, "eval_samples_per_second": 6.204, "eval_steps_per_second": 0.388, "step": 1900 }, { "epoch": 0.87, "learning_rate": 2.3642820903094873e-07, "logits/chosen": -4.5298051834106445, "logits/rejected": -4.458852291107178, "logps/chosen": -455.91571044921875, "logps/rejected": -284.51123046875, "loss": 0.1362, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.9968414306640625, "rewards/margins": 5.480269432067871, "rewards/rejected": 0.5165713429450989, "step": 1910 }, { "epoch": 0.88, "learning_rate": 2.359208523592085e-07, "logits/chosen": -4.508334159851074, "logits/rejected": -4.451658725738525, "logps/chosen": -398.7845458984375, "logps/rejected": -265.28802490234375, "loss": 0.1642, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.730782508850098, "rewards/margins": 4.622853755950928, "rewards/rejected": 0.10792865604162216, "step": 1920 }, { "epoch": 0.88, "learning_rate": 2.3541349568746826e-07, "logits/chosen": -4.588617324829102, "logits/rejected": -4.518343925476074, "logps/chosen": -431.79962158203125, "logps/rejected": -258.20037841796875, "loss": 0.1638, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.54833984375, "rewards/margins": 5.038142204284668, "rewards/rejected": -0.48980244994163513, "step": 1930 }, { "epoch": 0.89, "learning_rate": 2.3490613901572803e-07, "logits/chosen": -4.567276954650879, "logits/rejected": -4.52529764175415, "logps/chosen": -384.0699462890625, "logps/rejected": -273.1277160644531, "loss": 0.1749, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.106245994567871, "rewards/margins": 4.154184341430664, "rewards/rejected": -0.04793829843401909, "step": 1940 }, { "epoch": 0.89, "learning_rate": 2.343987823439878e-07, "logits/chosen": -4.515659809112549, "logits/rejected": -4.458474159240723, "logps/chosen": -409.35174560546875, "logps/rejected": -264.0165100097656, "loss": 0.1508, "rewards/accuracies": 0.9375, "rewards/chosen": 4.799483299255371, "rewards/margins": 5.166934013366699, "rewards/rejected": -0.367450475692749, "step": 1950 }, { "epoch": 0.89, "learning_rate": 2.3389142567224756e-07, "logits/chosen": -4.502835273742676, "logits/rejected": -4.460237979888916, "logps/chosen": -413.63909912109375, "logps/rejected": -300.35064697265625, "loss": 0.1488, "rewards/accuracies": 0.9375, "rewards/chosen": 4.283700942993164, "rewards/margins": 4.694336891174316, "rewards/rejected": -0.410635769367218, "step": 1960 }, { "epoch": 0.9, "learning_rate": 2.3338406900050733e-07, "logits/chosen": -4.567854881286621, "logits/rejected": -4.503543853759766, "logps/chosen": -440.4073181152344, "logps/rejected": -280.6188049316406, "loss": 0.1745, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.4340410232543945, "rewards/margins": 5.190110683441162, "rewards/rejected": 0.2439301460981369, "step": 1970 }, { "epoch": 0.9, "learning_rate": 2.328767123287671e-07, "logits/chosen": -4.612586975097656, "logits/rejected": -4.550424098968506, "logps/chosen": -433.07672119140625, "logps/rejected": -278.91961669921875, "loss": 0.1612, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.997767448425293, "rewards/margins": 5.415538787841797, "rewards/rejected": -0.41777148842811584, "step": 1980 }, { "epoch": 0.91, "learning_rate": 2.3236935565702686e-07, "logits/chosen": -4.667189121246338, "logits/rejected": -4.601871490478516, "logps/chosen": -426.22174072265625, "logps/rejected": -280.7257995605469, "loss": 0.1749, "rewards/accuracies": 0.9375, "rewards/chosen": 5.314779758453369, "rewards/margins": 5.056300163269043, "rewards/rejected": 0.2584792971611023, "step": 1990 }, { "epoch": 0.91, "learning_rate": 2.3186199898528663e-07, "logits/chosen": -4.63146448135376, "logits/rejected": -4.578326225280762, "logps/chosen": -419.3214416503906, "logps/rejected": -288.25311279296875, "loss": 0.1555, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.800443172454834, "rewards/margins": 4.3753662109375, "rewards/rejected": 0.4250775873661041, "step": 2000 }, { "epoch": 0.91, "eval_logits/chosen": -4.542140007019043, "eval_logits/rejected": -4.489096641540527, "eval_logps/chosen": -402.5832824707031, "eval_logps/rejected": -265.29669189453125, "eval_loss": 0.16511675715446472, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.660248756408691, "eval_rewards/margins": 4.66511869430542, "eval_rewards/rejected": -0.004871038254350424, "eval_runtime": 361.8885, "eval_samples_per_second": 7.909, "eval_steps_per_second": 0.495, "step": 2000 }, { "epoch": 0.92, "learning_rate": 2.313546423135464e-07, "logits/chosen": -4.590828895568848, "logits/rejected": -4.525210380554199, "logps/chosen": -428.13128662109375, "logps/rejected": -272.3417663574219, "loss": 0.1577, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.846682071685791, "rewards/margins": 4.844321250915527, "rewards/rejected": 0.002360606100410223, "step": 2010 }, { "epoch": 0.92, "learning_rate": 2.3084728564180616e-07, "logits/chosen": -4.58351469039917, "logits/rejected": -4.536841869354248, "logps/chosen": -385.2735900878906, "logps/rejected": -276.7143859863281, "loss": 0.1772, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.063285827636719, "rewards/margins": 4.793701648712158, "rewards/rejected": 0.26958388090133667, "step": 2020 }, { "epoch": 0.93, "learning_rate": 2.3033992897006593e-07, "logits/chosen": -4.553132057189941, "logits/rejected": -4.48330545425415, "logps/chosen": -439.46551513671875, "logps/rejected": -271.4604187011719, "loss": 0.1604, "rewards/accuracies": 0.9375, "rewards/chosen": 5.261726379394531, "rewards/margins": 4.909426689147949, "rewards/rejected": 0.3522997498512268, "step": 2030 }, { "epoch": 0.93, "learning_rate": 2.298325722983257e-07, "logits/chosen": -4.569801330566406, "logits/rejected": -4.494760036468506, "logps/chosen": -444.6923828125, "logps/rejected": -272.12054443359375, "loss": 0.1747, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.73537540435791, "rewards/margins": 5.877743721008301, "rewards/rejected": -0.1423684060573578, "step": 2040 }, { "epoch": 0.94, "learning_rate": 2.2932521562658546e-07, "logits/chosen": -4.589688301086426, "logits/rejected": -4.519567966461182, "logps/chosen": -420.65673828125, "logps/rejected": -262.73187255859375, "loss": 0.1597, "rewards/accuracies": 0.9375, "rewards/chosen": 5.032346725463867, "rewards/margins": 5.011034965515137, "rewards/rejected": 0.021311331540346146, "step": 2050 }, { "epoch": 0.94, "learning_rate": 2.2881785895484523e-07, "logits/chosen": -4.565917015075684, "logits/rejected": -4.504647731781006, "logps/chosen": -418.061279296875, "logps/rejected": -275.18035888671875, "loss": 0.1567, "rewards/accuracies": 0.9375, "rewards/chosen": 4.764811038970947, "rewards/margins": 4.495087623596191, "rewards/rejected": 0.2697228789329529, "step": 2060 }, { "epoch": 0.94, "learning_rate": 2.28310502283105e-07, "logits/chosen": -4.583676338195801, "logits/rejected": -4.529091835021973, "logps/chosen": -407.1875305175781, "logps/rejected": -260.7109375, "loss": 0.1593, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.689192295074463, "rewards/margins": 5.170809745788574, "rewards/rejected": -0.48161691427230835, "step": 2070 }, { "epoch": 0.95, "learning_rate": 2.2780314561136476e-07, "logits/chosen": -4.58168888092041, "logits/rejected": -4.523324012756348, "logps/chosen": -417.3694763183594, "logps/rejected": -271.89019775390625, "loss": 0.1435, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.5714521408081055, "rewards/margins": 4.729221343994141, "rewards/rejected": -0.15776872634887695, "step": 2080 }, { "epoch": 0.95, "learning_rate": 2.2729578893962453e-07, "logits/chosen": -4.609949588775635, "logits/rejected": -4.5528564453125, "logps/chosen": -392.3988342285156, "logps/rejected": -274.916748046875, "loss": 0.1681, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.082572937011719, "rewards/margins": 4.938451290130615, "rewards/rejected": 0.1441211700439453, "step": 2090 }, { "epoch": 0.96, "learning_rate": 2.267884322678843e-07, "logits/chosen": -4.615639686584473, "logits/rejected": -4.5505242347717285, "logps/chosen": -401.02545166015625, "logps/rejected": -252.4737548828125, "loss": 0.1583, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.75829553604126, "rewards/margins": 4.711214542388916, "rewards/rejected": 0.04708065837621689, "step": 2100 }, { "epoch": 0.96, "eval_logits/chosen": -4.546866416931152, "eval_logits/rejected": -4.48971700668335, "eval_logps/chosen": -401.98944091796875, "eval_logps/rejected": -265.0967712402344, "eval_loss": 0.1643914133310318, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.95715856552124, "eval_rewards/margins": 4.862085819244385, "eval_rewards/rejected": 0.09507280588150024, "eval_runtime": 276.1478, "eval_samples_per_second": 10.364, "eval_steps_per_second": 0.648, "step": 2100 }, { "epoch": 0.96, "learning_rate": 2.2628107559614406e-07, "logits/chosen": -4.589270114898682, "logits/rejected": -4.544071674346924, "logps/chosen": -388.0083312988281, "logps/rejected": -288.11724853515625, "loss": 0.1564, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.566590785980225, "rewards/margins": 3.916243076324463, "rewards/rejected": 0.6503478288650513, "step": 2110 }, { "epoch": 0.97, "learning_rate": 2.2577371892440383e-07, "logits/chosen": -4.630921363830566, "logits/rejected": -4.567738056182861, "logps/chosen": -417.51959228515625, "logps/rejected": -282.10760498046875, "loss": 0.1652, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.728931427001953, "rewards/margins": 4.830723762512207, "rewards/rejected": -0.10179267078638077, "step": 2120 }, { "epoch": 0.97, "learning_rate": 2.252663622526636e-07, "logits/chosen": -4.633877754211426, "logits/rejected": -4.56465482711792, "logps/chosen": -418.76556396484375, "logps/rejected": -278.3013000488281, "loss": 0.1716, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.111180305480957, "rewards/margins": 5.085168361663818, "rewards/rejected": 0.026010990142822266, "step": 2130 }, { "epoch": 0.98, "learning_rate": 2.2475900558092336e-07, "logits/chosen": -4.60957670211792, "logits/rejected": -4.547099590301514, "logps/chosen": -413.037109375, "logps/rejected": -274.35003662109375, "loss": 0.178, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.22531270980835, "rewards/margins": 5.203864097595215, "rewards/rejected": 0.02144799195230007, "step": 2140 }, { "epoch": 0.98, "learning_rate": 2.2425164890918313e-07, "logits/chosen": -4.59743595123291, "logits/rejected": -4.523187160491943, "logps/chosen": -468.9046325683594, "logps/rejected": -286.56927490234375, "loss": 0.161, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.846278667449951, "rewards/margins": 5.291247367858887, "rewards/rejected": 0.555031418800354, "step": 2150 }, { "epoch": 0.99, "learning_rate": 2.237442922374429e-07, "logits/chosen": -4.62969446182251, "logits/rejected": -4.566431999206543, "logps/chosen": -407.6806335449219, "logps/rejected": -266.2337951660156, "loss": 0.16, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.681333065032959, "rewards/margins": 4.356958389282227, "rewards/rejected": 0.32437554001808167, "step": 2160 }, { "epoch": 0.99, "learning_rate": 2.2323693556570266e-07, "logits/chosen": -4.585515975952148, "logits/rejected": -4.515237331390381, "logps/chosen": -457.56878662109375, "logps/rejected": -284.44854736328125, "loss": 0.1536, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.8800458908081055, "rewards/margins": 5.733695983886719, "rewards/rejected": 0.1463499516248703, "step": 2170 }, { "epoch": 1.0, "learning_rate": 2.2272957889396242e-07, "logits/chosen": -4.6434645652771, "logits/rejected": -4.579050540924072, "logps/chosen": -411.5159606933594, "logps/rejected": -269.6261291503906, "loss": 0.1621, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.417605400085449, "rewards/margins": 5.1453704833984375, "rewards/rejected": 0.2722352147102356, "step": 2180 }, { "epoch": 1.0, "learning_rate": 2.222222222222222e-07, "logits/chosen": -4.716753959655762, "logits/rejected": -4.65456485748291, "logps/chosen": -403.6361083984375, "logps/rejected": -266.50128173828125, "loss": 0.1442, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.8592848777771, "rewards/margins": 4.6472063064575195, "rewards/rejected": 0.21207837760448456, "step": 2190 }, { "epoch": 1.0, "learning_rate": 2.2171486555048196e-07, "logits/chosen": -4.707067489624023, "logits/rejected": -4.635984897613525, "logps/chosen": -452.47430419921875, "logps/rejected": -295.72454833984375, "loss": 0.1414, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.708849906921387, "rewards/margins": 5.2992634773254395, "rewards/rejected": 0.4095870852470398, "step": 2200 }, { "epoch": 1.0, "eval_logits/chosen": -4.622661590576172, "eval_logits/rejected": -4.564619541168213, "eval_logps/chosen": -402.4035949707031, "eval_logps/rejected": -265.51629638671875, "eval_loss": 0.16473622620105743, "eval_rewards/accuracies": 0.9022346138954163, "eval_rewards/chosen": 4.75007438659668, "eval_rewards/margins": 4.864765167236328, "eval_rewards/rejected": -0.11469125002622604, "eval_runtime": 241.1009, "eval_samples_per_second": 11.871, "eval_steps_per_second": 0.742, "step": 2200 }, { "epoch": 1.01, "learning_rate": 2.2120750887874172e-07, "logits/chosen": -4.691455364227295, "logits/rejected": -4.626311302185059, "logps/chosen": -427.0602111816406, "logps/rejected": -282.3951416015625, "loss": 0.1538, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.415336608886719, "rewards/margins": 5.328603267669678, "rewards/rejected": 0.0867331475019455, "step": 2210 }, { "epoch": 1.01, "learning_rate": 2.207001522070015e-07, "logits/chosen": -4.653100967407227, "logits/rejected": -4.589020252227783, "logps/chosen": -439.33868408203125, "logps/rejected": -298.8544921875, "loss": 0.1453, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.7958083152771, "rewards/margins": 5.009117126464844, "rewards/rejected": 0.7866915464401245, "step": 2220 }, { "epoch": 1.02, "learning_rate": 2.2019279553526126e-07, "logits/chosen": -4.6317458152771, "logits/rejected": -4.553036212921143, "logps/chosen": -420.63360595703125, "logps/rejected": -249.65792846679688, "loss": 0.139, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.916329383850098, "rewards/margins": 5.496735095977783, "rewards/rejected": 0.4195937216281891, "step": 2230 }, { "epoch": 1.02, "learning_rate": 2.1968543886352102e-07, "logits/chosen": -4.659050941467285, "logits/rejected": -4.598803520202637, "logps/chosen": -389.9103698730469, "logps/rejected": -265.2038269042969, "loss": 0.1352, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.60361909866333, "rewards/margins": 4.522326469421387, "rewards/rejected": 0.0812920555472374, "step": 2240 }, { "epoch": 1.03, "learning_rate": 2.191780821917808e-07, "logits/chosen": -4.569336891174316, "logits/rejected": -4.519515037536621, "logps/chosen": -390.0275573730469, "logps/rejected": -269.519287109375, "loss": 0.1426, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.4411091804504395, "rewards/margins": 4.313481330871582, "rewards/rejected": 0.12762793898582458, "step": 2250 }, { "epoch": 1.03, "learning_rate": 2.1867072552004056e-07, "logits/chosen": -4.5951642990112305, "logits/rejected": -4.535480976104736, "logps/chosen": -404.6128845214844, "logps/rejected": -270.63818359375, "loss": 0.1345, "rewards/accuracies": 0.9375, "rewards/chosen": 5.147619724273682, "rewards/margins": 4.96987247467041, "rewards/rejected": 0.17774729430675507, "step": 2260 }, { "epoch": 1.04, "learning_rate": 2.1816336884830032e-07, "logits/chosen": -4.623845100402832, "logits/rejected": -4.566351890563965, "logps/chosen": -390.02410888671875, "logps/rejected": -256.20220947265625, "loss": 0.1351, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.8381757736206055, "rewards/margins": 5.005610942840576, "rewards/rejected": -0.16743476688861847, "step": 2270 }, { "epoch": 1.04, "learning_rate": 2.176560121765601e-07, "logits/chosen": -4.639063358306885, "logits/rejected": -4.574423789978027, "logps/chosen": -434.8050842285156, "logps/rejected": -290.1243591308594, "loss": 0.1538, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.499131202697754, "rewards/margins": 5.612876892089844, "rewards/rejected": -0.11374549567699432, "step": 2280 }, { "epoch": 1.05, "learning_rate": 2.1714865550481986e-07, "logits/chosen": -4.651917457580566, "logits/rejected": -4.577620029449463, "logps/chosen": -432.62249755859375, "logps/rejected": -270.78338623046875, "loss": 0.1394, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.557310104370117, "rewards/margins": 5.459046363830566, "rewards/rejected": 0.0982634574174881, "step": 2290 }, { "epoch": 1.05, "learning_rate": 2.1664129883307962e-07, "logits/chosen": -4.6372785568237305, "logits/rejected": -4.58381462097168, "logps/chosen": -403.2381591796875, "logps/rejected": -288.7150573730469, "loss": 0.1572, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.891635894775391, "rewards/margins": 4.456629753112793, "rewards/rejected": 0.43500566482543945, "step": 2300 }, { "epoch": 1.05, "eval_logits/chosen": -4.545873165130615, "eval_logits/rejected": -4.487427711486816, "eval_logps/chosen": -401.9750671386719, "eval_logps/rejected": -265.334716796875, "eval_loss": 0.16128751635551453, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.964333534240723, "eval_rewards/margins": 4.988239765167236, "eval_rewards/rejected": -0.023905673995614052, "eval_runtime": 170.7899, "eval_samples_per_second": 16.757, "eval_steps_per_second": 1.048, "step": 2300 }, { "epoch": 1.05, "learning_rate": 2.161339421613394e-07, "logits/chosen": -4.623624324798584, "logits/rejected": -4.561903476715088, "logps/chosen": -405.3636169433594, "logps/rejected": -279.37799072265625, "loss": 0.148, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.8197808265686035, "rewards/margins": 4.811264991760254, "rewards/rejected": 0.008516359142959118, "step": 2310 }, { "epoch": 1.06, "learning_rate": 2.1562658548959916e-07, "logits/chosen": -4.590320110321045, "logits/rejected": -4.5214033126831055, "logps/chosen": -449.48419189453125, "logps/rejected": -284.75994873046875, "loss": 0.1252, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.389808654785156, "rewards/margins": 5.88195276260376, "rewards/rejected": -0.49214357137680054, "step": 2320 }, { "epoch": 1.06, "learning_rate": 2.1511922881785892e-07, "logits/chosen": -4.610832214355469, "logits/rejected": -4.556483268737793, "logps/chosen": -400.5215759277344, "logps/rejected": -274.66021728515625, "loss": 0.1656, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.815681457519531, "rewards/margins": 5.0928215980529785, "rewards/rejected": -0.27713990211486816, "step": 2330 }, { "epoch": 1.07, "learning_rate": 2.146118721461187e-07, "logits/chosen": -4.594456672668457, "logits/rejected": -4.525969982147217, "logps/chosen": -428.5234375, "logps/rejected": -288.2721862792969, "loss": 0.1186, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.482345104217529, "rewards/margins": 5.192389965057373, "rewards/rejected": 0.28995516896247864, "step": 2340 }, { "epoch": 1.07, "learning_rate": 2.1410451547437846e-07, "logits/chosen": -4.580326080322266, "logits/rejected": -4.528518199920654, "logps/chosen": -413.868408203125, "logps/rejected": -293.4358825683594, "loss": 0.1566, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.750143051147461, "rewards/margins": 4.470927715301514, "rewards/rejected": 0.27921542525291443, "step": 2350 }, { "epoch": 1.08, "learning_rate": 2.1359715880263822e-07, "logits/chosen": -4.595850467681885, "logits/rejected": -4.523636817932129, "logps/chosen": -425.35821533203125, "logps/rejected": -271.9968566894531, "loss": 0.1455, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.878283500671387, "rewards/margins": 4.9924750328063965, "rewards/rejected": 0.8858088254928589, "step": 2360 }, { "epoch": 1.08, "learning_rate": 2.13089802130898e-07, "logits/chosen": -4.621100425720215, "logits/rejected": -4.5505266189575195, "logps/chosen": -408.945068359375, "logps/rejected": -276.8642578125, "loss": 0.1414, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.91372537612915, "rewards/margins": 5.19683837890625, "rewards/rejected": -0.28311339020729065, "step": 2370 }, { "epoch": 1.09, "learning_rate": 2.1258244545915776e-07, "logits/chosen": -4.704928398132324, "logits/rejected": -4.629445552825928, "logps/chosen": -410.17132568359375, "logps/rejected": -289.75201416015625, "loss": 0.1521, "rewards/accuracies": 0.9375, "rewards/chosen": 5.2707719802856445, "rewards/margins": 4.987483024597168, "rewards/rejected": 0.2832895815372467, "step": 2380 }, { "epoch": 1.09, "learning_rate": 2.1207508878741752e-07, "logits/chosen": -4.704293251037598, "logits/rejected": -4.638591766357422, "logps/chosen": -369.72784423828125, "logps/rejected": -248.36294555664062, "loss": 0.1563, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.4744343757629395, "rewards/margins": 5.0368428230285645, "rewards/rejected": -0.562408983707428, "step": 2390 }, { "epoch": 1.1, "learning_rate": 2.115677321156773e-07, "logits/chosen": -4.679817199707031, "logits/rejected": -4.6026763916015625, "logps/chosen": -442.40289306640625, "logps/rejected": -292.56707763671875, "loss": 0.1271, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.903312683105469, "rewards/margins": 5.563991546630859, "rewards/rejected": 0.3393217623233795, "step": 2400 }, { "epoch": 1.1, "eval_logits/chosen": -4.596662521362305, "eval_logits/rejected": -4.533384799957275, "eval_logps/chosen": -402.0570373535156, "eval_logps/rejected": -265.4513854980469, "eval_loss": 0.1592414528131485, "eval_rewards/accuracies": 0.9050279259681702, "eval_rewards/chosen": 4.923354625701904, "eval_rewards/margins": 5.00559139251709, "eval_rewards/rejected": -0.08223670721054077, "eval_runtime": 228.4243, "eval_samples_per_second": 12.529, "eval_steps_per_second": 0.784, "step": 2400 }, { "epoch": 1.1, "learning_rate": 2.1106037544393706e-07, "logits/chosen": -4.635499000549316, "logits/rejected": -4.57289981842041, "logps/chosen": -419.62811279296875, "logps/rejected": -288.00347900390625, "loss": 0.1426, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.549483299255371, "rewards/margins": 5.433948040008545, "rewards/rejected": 0.115534208714962, "step": 2410 }, { "epoch": 1.1, "learning_rate": 2.1055301877219682e-07, "logits/chosen": -4.641855716705322, "logits/rejected": -4.553411483764648, "logps/chosen": -450.4766540527344, "logps/rejected": -267.71380615234375, "loss": 0.1368, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.73415470123291, "rewards/margins": 6.039590835571289, "rewards/rejected": -0.3054371774196625, "step": 2420 }, { "epoch": 1.11, "learning_rate": 2.100456621004566e-07, "logits/chosen": -4.638934135437012, "logits/rejected": -4.588161468505859, "logps/chosen": -366.949462890625, "logps/rejected": -263.68341064453125, "loss": 0.1481, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.824484348297119, "rewards/margins": 4.980218887329102, "rewards/rejected": -0.15573473274707794, "step": 2430 }, { "epoch": 1.11, "learning_rate": 2.0953830542871636e-07, "logits/chosen": -4.640177249908447, "logits/rejected": -4.567892551422119, "logps/chosen": -436.56243896484375, "logps/rejected": -282.61834716796875, "loss": 0.1195, "rewards/accuracies": 0.9375, "rewards/chosen": 5.8879828453063965, "rewards/margins": 5.6131591796875, "rewards/rejected": 0.2748235762119293, "step": 2440 }, { "epoch": 1.12, "learning_rate": 2.0903094875697612e-07, "logits/chosen": -4.665534019470215, "logits/rejected": -4.581798553466797, "logps/chosen": -455.39764404296875, "logps/rejected": -277.80584716796875, "loss": 0.1551, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.901768207550049, "rewards/margins": 5.911789894104004, "rewards/rejected": -0.010022163391113281, "step": 2450 }, { "epoch": 1.12, "learning_rate": 2.085235920852359e-07, "logits/chosen": -4.682118892669678, "logits/rejected": -4.614459991455078, "logps/chosen": -405.5885925292969, "logps/rejected": -277.41986083984375, "loss": 0.1302, "rewards/accuracies": 0.9375, "rewards/chosen": 4.336734771728516, "rewards/margins": 5.042340278625488, "rewards/rejected": -0.7056052684783936, "step": 2460 }, { "epoch": 1.13, "learning_rate": 2.0801623541349566e-07, "logits/chosen": -4.66549825668335, "logits/rejected": -4.597802639007568, "logps/chosen": -423.67340087890625, "logps/rejected": -281.371826171875, "loss": 0.1417, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.0633134841918945, "rewards/margins": 4.710551738739014, "rewards/rejected": 0.3527612090110779, "step": 2470 }, { "epoch": 1.13, "learning_rate": 2.0750887874175542e-07, "logits/chosen": -4.718007564544678, "logits/rejected": -4.649482727050781, "logps/chosen": -426.1044921875, "logps/rejected": -289.7950134277344, "loss": 0.1247, "rewards/accuracies": 0.9375, "rewards/chosen": 5.0616350173950195, "rewards/margins": 5.805014133453369, "rewards/rejected": -0.7433785200119019, "step": 2480 }, { "epoch": 1.14, "learning_rate": 2.070015220700152e-07, "logits/chosen": -4.722356796264648, "logits/rejected": -4.649219036102295, "logps/chosen": -444.7149963378906, "logps/rejected": -271.2923278808594, "loss": 0.1223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.024299621582031, "rewards/margins": 5.731832027435303, "rewards/rejected": -0.7075319290161133, "step": 2490 }, { "epoch": 1.14, "learning_rate": 2.0649416539827496e-07, "logits/chosen": -4.688532829284668, "logits/rejected": -4.612643241882324, "logps/chosen": -434.0406188964844, "logps/rejected": -282.1349182128906, "loss": 0.128, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.506690979003906, "rewards/margins": 5.450496673583984, "rewards/rejected": 0.05619411543011665, "step": 2500 }, { "epoch": 1.14, "eval_logits/chosen": -4.592950820922852, "eval_logits/rejected": -4.533634662628174, "eval_logps/chosen": -401.494140625, "eval_logps/rejected": -265.15155029296875, "eval_loss": 0.15845036506652832, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 5.204805850982666, "eval_rewards/margins": 5.137132167816162, "eval_rewards/rejected": 0.06767434626817703, "eval_runtime": 174.2943, "eval_samples_per_second": 16.421, "eval_steps_per_second": 1.027, "step": 2500 }, { "epoch": 1.15, "learning_rate": 2.0598680872653472e-07, "logits/chosen": -4.683821678161621, "logits/rejected": -4.612093448638916, "logps/chosen": -428.28485107421875, "logps/rejected": -273.9698181152344, "loss": 0.1502, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.7181620597839355, "rewards/margins": 5.6464996337890625, "rewards/rejected": 0.07166238129138947, "step": 2510 }, { "epoch": 1.15, "learning_rate": 2.054794520547945e-07, "logits/chosen": -4.729501247406006, "logits/rejected": -4.661202907562256, "logps/chosen": -416.16680908203125, "logps/rejected": -279.8552551269531, "loss": 0.1352, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.7415571212768555, "rewards/margins": 5.462691307067871, "rewards/rejected": 0.2788657248020172, "step": 2520 }, { "epoch": 1.15, "learning_rate": 2.0497209538305426e-07, "logits/chosen": -4.670582294464111, "logits/rejected": -4.60074520111084, "logps/chosen": -412.16607666015625, "logps/rejected": -266.31353759765625, "loss": 0.1366, "rewards/accuracies": 0.9375, "rewards/chosen": 5.376128196716309, "rewards/margins": 5.501835346221924, "rewards/rejected": -0.12570667266845703, "step": 2530 }, { "epoch": 1.16, "learning_rate": 2.0446473871131402e-07, "logits/chosen": -4.6687541007995605, "logits/rejected": -4.598757266998291, "logps/chosen": -437.97857666015625, "logps/rejected": -295.8390808105469, "loss": 0.1436, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.664647579193115, "rewards/margins": 5.903940677642822, "rewards/rejected": -0.23929262161254883, "step": 2540 }, { "epoch": 1.16, "learning_rate": 2.039573820395738e-07, "logits/chosen": -4.643807888031006, "logits/rejected": -4.565986633300781, "logps/chosen": -438.4078674316406, "logps/rejected": -275.6295471191406, "loss": 0.1527, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 6.1801958084106445, "rewards/margins": 6.178706169128418, "rewards/rejected": 0.0014888762962073088, "step": 2550 }, { "epoch": 1.17, "learning_rate": 2.0345002536783356e-07, "logits/chosen": -4.668728351593018, "logits/rejected": -4.599949836730957, "logps/chosen": -416.12860107421875, "logps/rejected": -260.29376220703125, "loss": 0.1403, "rewards/accuracies": 0.9375, "rewards/chosen": 4.81443452835083, "rewards/margins": 5.2935895919799805, "rewards/rejected": -0.4791547358036041, "step": 2560 }, { "epoch": 1.17, "learning_rate": 2.0294266869609332e-07, "logits/chosen": -4.749151706695557, "logits/rejected": -4.67990779876709, "logps/chosen": -404.9203186035156, "logps/rejected": -260.6658935546875, "loss": 0.1414, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.249196529388428, "rewards/margins": 5.418937683105469, "rewards/rejected": -0.16974106431007385, "step": 2570 }, { "epoch": 1.18, "learning_rate": 2.024353120243531e-07, "logits/chosen": -4.745065689086914, "logits/rejected": -4.680525302886963, "logps/chosen": -419.73052978515625, "logps/rejected": -292.0650329589844, "loss": 0.1497, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.573615074157715, "rewards/margins": 5.036746978759766, "rewards/rejected": 0.5368689298629761, "step": 2580 }, { "epoch": 1.18, "learning_rate": 2.0192795535261286e-07, "logits/chosen": -4.7524518966674805, "logits/rejected": -4.670641899108887, "logps/chosen": -433.12799072265625, "logps/rejected": -275.57073974609375, "loss": 0.1312, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.394001007080078, "rewards/margins": 6.280089378356934, "rewards/rejected": -0.8860880732536316, "step": 2590 }, { "epoch": 1.19, "learning_rate": 2.0142059868087262e-07, "logits/chosen": -4.741904258728027, "logits/rejected": -4.668903350830078, "logps/chosen": -410.4855041503906, "logps/rejected": -272.53778076171875, "loss": 0.1276, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.720524787902832, "rewards/margins": 5.795670509338379, "rewards/rejected": -0.07514572143554688, "step": 2600 }, { "epoch": 1.19, "eval_logits/chosen": -4.626635551452637, "eval_logits/rejected": -4.563098907470703, "eval_logps/chosen": -401.83624267578125, "eval_logps/rejected": -265.490966796875, "eval_loss": 0.15984460711479187, "eval_rewards/accuracies": 0.9329608678817749, "eval_rewards/chosen": 5.033752918243408, "eval_rewards/margins": 5.135770320892334, "eval_rewards/rejected": -0.1020173728466034, "eval_runtime": 358.2861, "eval_samples_per_second": 7.988, "eval_steps_per_second": 0.5, "step": 2600 }, { "epoch": 1.19, "learning_rate": 2.009132420091324e-07, "logits/chosen": -4.686434745788574, "logits/rejected": -4.629500389099121, "logps/chosen": -379.0474548339844, "logps/rejected": -270.8832092285156, "loss": 0.1711, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.508579730987549, "rewards/margins": 5.389525413513184, "rewards/rejected": 0.11905422061681747, "step": 2610 }, { "epoch": 1.2, "learning_rate": 2.0040588533739216e-07, "logits/chosen": -4.747511863708496, "logits/rejected": -4.66859245300293, "logps/chosen": -435.533203125, "logps/rejected": -279.46734619140625, "loss": 0.1387, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.606345176696777, "rewards/margins": 5.951859474182129, "rewards/rejected": -0.3455139994621277, "step": 2620 }, { "epoch": 1.2, "learning_rate": 1.9989852866565192e-07, "logits/chosen": -4.728833198547363, "logits/rejected": -4.659870147705078, "logps/chosen": -415.07696533203125, "logps/rejected": -277.5272216796875, "loss": 0.1224, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.0965142250061035, "rewards/margins": 5.454050064086914, "rewards/rejected": 0.6424643397331238, "step": 2630 }, { "epoch": 1.21, "learning_rate": 1.993911719939117e-07, "logits/chosen": -4.790101051330566, "logits/rejected": -4.7091264724731445, "logps/chosen": -415.70794677734375, "logps/rejected": -264.314208984375, "loss": 0.1487, "rewards/accuracies": 0.9375, "rewards/chosen": 4.987692356109619, "rewards/margins": 4.897696495056152, "rewards/rejected": 0.08999605476856232, "step": 2640 }, { "epoch": 1.21, "learning_rate": 1.9888381532217146e-07, "logits/chosen": -4.743100166320801, "logits/rejected": -4.68099308013916, "logps/chosen": -383.8597106933594, "logps/rejected": -262.56396484375, "loss": 0.1186, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.855942249298096, "rewards/margins": 5.423813343048096, "rewards/rejected": -0.5678711533546448, "step": 2650 }, { "epoch": 1.21, "learning_rate": 1.9837645865043122e-07, "logits/chosen": -4.716538906097412, "logits/rejected": -4.645163059234619, "logps/chosen": -415.4833068847656, "logps/rejected": -268.6138000488281, "loss": 0.1484, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.089824676513672, "rewards/margins": 5.364691734313965, "rewards/rejected": -0.2748669981956482, "step": 2660 }, { "epoch": 1.22, "learning_rate": 1.97869101978691e-07, "logits/chosen": -4.71204948425293, "logits/rejected": -4.652632236480713, "logps/chosen": -395.47869873046875, "logps/rejected": -271.0872497558594, "loss": 0.1187, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.570948600769043, "rewards/margins": 4.550985813140869, "rewards/rejected": 0.01996307447552681, "step": 2670 }, { "epoch": 1.22, "learning_rate": 1.9736174530695076e-07, "logits/chosen": -4.709834575653076, "logits/rejected": -4.647024631500244, "logps/chosen": -393.42181396484375, "logps/rejected": -263.7567443847656, "loss": 0.1474, "rewards/accuracies": 0.9375, "rewards/chosen": 4.477217674255371, "rewards/margins": 4.908478736877441, "rewards/rejected": -0.43126052618026733, "step": 2680 }, { "epoch": 1.23, "learning_rate": 1.9685438863521052e-07, "logits/chosen": -4.687317848205566, "logits/rejected": -4.624991416931152, "logps/chosen": -400.67535400390625, "logps/rejected": -270.1904296875, "loss": 0.1467, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.255560398101807, "rewards/margins": 4.8730058670043945, "rewards/rejected": 0.38255470991134644, "step": 2690 }, { "epoch": 1.23, "learning_rate": 1.963470319634703e-07, "logits/chosen": -4.819007873535156, "logits/rejected": -4.75001859664917, "logps/chosen": -410.330810546875, "logps/rejected": -273.15338134765625, "loss": 0.1377, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.249783515930176, "rewards/margins": 5.343628883361816, "rewards/rejected": -0.09384489059448242, "step": 2700 }, { "epoch": 1.23, "eval_logits/chosen": -4.708266735076904, "eval_logits/rejected": -4.646238327026367, "eval_logps/chosen": -401.6972351074219, "eval_logps/rejected": -265.3082275390625, "eval_loss": 0.16181820631027222, "eval_rewards/accuracies": 0.9189944267272949, "eval_rewards/chosen": 5.103267192840576, "eval_rewards/margins": 5.1139116287231445, "eval_rewards/rejected": -0.010644582100212574, "eval_runtime": 219.4982, "eval_samples_per_second": 13.039, "eval_steps_per_second": 0.815, "step": 2700 }, { "epoch": 1.24, "learning_rate": 1.9583967529173006e-07, "logits/chosen": -4.762511730194092, "logits/rejected": -4.684549808502197, "logps/chosen": -436.26666259765625, "logps/rejected": -284.36236572265625, "loss": 0.1325, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.2178635597229, "rewards/margins": 5.885565757751465, "rewards/rejected": 0.3322971761226654, "step": 2710 }, { "epoch": 1.24, "learning_rate": 1.9533231861998982e-07, "logits/chosen": -4.71683406829834, "logits/rejected": -4.641875267028809, "logps/chosen": -451.26025390625, "logps/rejected": -287.6884765625, "loss": 0.1532, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.5788397789001465, "rewards/margins": 5.926271438598633, "rewards/rejected": -0.3474324643611908, "step": 2720 }, { "epoch": 1.25, "learning_rate": 1.948249619482496e-07, "logits/chosen": -4.744879245758057, "logits/rejected": -4.675485610961914, "logps/chosen": -405.8666687011719, "logps/rejected": -263.1166687011719, "loss": 0.1527, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 3.92828631401062, "rewards/margins": 4.984368801116943, "rewards/rejected": -1.0560823678970337, "step": 2730 }, { "epoch": 1.25, "learning_rate": 1.9431760527650936e-07, "logits/chosen": -4.751509666442871, "logits/rejected": -4.656686305999756, "logps/chosen": -463.6688537597656, "logps/rejected": -274.2791748046875, "loss": 0.1459, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 6.410833835601807, "rewards/margins": 6.696615695953369, "rewards/rejected": -0.28578147292137146, "step": 2740 }, { "epoch": 1.26, "learning_rate": 1.9381024860476912e-07, "logits/chosen": -4.785609245300293, "logits/rejected": -4.703537464141846, "logps/chosen": -453.5716247558594, "logps/rejected": -272.9024658203125, "loss": 0.1499, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.403480052947998, "rewards/margins": 5.613043785095215, "rewards/rejected": -0.20956353843212128, "step": 2750 }, { "epoch": 1.26, "learning_rate": 1.933028919330289e-07, "logits/chosen": -4.788984775543213, "logits/rejected": -4.726633071899414, "logps/chosen": -415.7835998535156, "logps/rejected": -279.831298828125, "loss": 0.1499, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.6923089027404785, "rewards/margins": 5.368066787719727, "rewards/rejected": 0.32424196600914, "step": 2760 }, { "epoch": 1.26, "learning_rate": 1.9279553526128866e-07, "logits/chosen": -4.810788154602051, "logits/rejected": -4.7394609451293945, "logps/chosen": -420.9473571777344, "logps/rejected": -274.3715515136719, "loss": 0.1726, "rewards/accuracies": 0.9375, "rewards/chosen": 5.51529598236084, "rewards/margins": 6.101782321929932, "rewards/rejected": -0.5864860415458679, "step": 2770 }, { "epoch": 1.27, "learning_rate": 1.9228817858954842e-07, "logits/chosen": -4.836386203765869, "logits/rejected": -4.770260334014893, "logps/chosen": -411.0437927246094, "logps/rejected": -278.07440185546875, "loss": 0.1278, "rewards/accuracies": 0.9375, "rewards/chosen": 5.438403129577637, "rewards/margins": 5.277481555938721, "rewards/rejected": 0.16092152893543243, "step": 2780 }, { "epoch": 1.27, "learning_rate": 1.917808219178082e-07, "logits/chosen": -4.7758378982543945, "logits/rejected": -4.71877908706665, "logps/chosen": -383.00714111328125, "logps/rejected": -278.8111877441406, "loss": 0.1459, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.809662818908691, "rewards/margins": 4.953597068786621, "rewards/rejected": -0.14393410086631775, "step": 2790 }, { "epoch": 1.28, "learning_rate": 1.9127346524606796e-07, "logits/chosen": -4.8130340576171875, "logits/rejected": -4.744383335113525, "logps/chosen": -414.734375, "logps/rejected": -268.0003356933594, "loss": 0.1489, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.613664150238037, "rewards/margins": 5.636666774749756, "rewards/rejected": -0.02300238609313965, "step": 2800 }, { "epoch": 1.28, "eval_logits/chosen": -4.714895725250244, "eval_logits/rejected": -4.656703948974609, "eval_logps/chosen": -401.8643798828125, "eval_logps/rejected": -265.4261169433594, "eval_loss": 0.15762589871883392, "eval_rewards/accuracies": 0.9273743033409119, "eval_rewards/chosen": 5.019688129425049, "eval_rewards/margins": 5.089291572570801, "eval_rewards/rejected": -0.06960343569517136, "eval_runtime": 243.5672, "eval_samples_per_second": 11.75, "eval_steps_per_second": 0.735, "step": 2800 }, { "epoch": 1.28, "learning_rate": 1.9076610857432772e-07, "logits/chosen": -4.7702317237854, "logits/rejected": -4.698868751525879, "logps/chosen": -425.22222900390625, "logps/rejected": -285.938720703125, "loss": 0.1289, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.5780181884765625, "rewards/margins": 5.588247776031494, "rewards/rejected": -0.010229158215224743, "step": 2810 }, { "epoch": 1.29, "learning_rate": 1.902587519025875e-07, "logits/chosen": -4.747796058654785, "logits/rejected": -4.671156883239746, "logps/chosen": -440.99591064453125, "logps/rejected": -275.7499084472656, "loss": 0.114, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.831236839294434, "rewards/margins": 6.020218372344971, "rewards/rejected": -0.18898148834705353, "step": 2820 }, { "epoch": 1.29, "learning_rate": 1.8975139523084726e-07, "logits/chosen": -4.770453453063965, "logits/rejected": -4.707358360290527, "logps/chosen": -416.5846252441406, "logps/rejected": -286.60430908203125, "loss": 0.1504, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.750993251800537, "rewards/margins": 5.363245010375977, "rewards/rejected": -0.6122517585754395, "step": 2830 }, { "epoch": 1.3, "learning_rate": 1.8924403855910702e-07, "logits/chosen": -4.806455612182617, "logits/rejected": -4.736222267150879, "logps/chosen": -414.2118225097656, "logps/rejected": -272.4441223144531, "loss": 0.1364, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.827682018280029, "rewards/margins": 5.28678035736084, "rewards/rejected": -0.4590977132320404, "step": 2840 }, { "epoch": 1.3, "learning_rate": 1.887366818873668e-07, "logits/chosen": -4.800880432128906, "logits/rejected": -4.73232889175415, "logps/chosen": -410.1888122558594, "logps/rejected": -279.577392578125, "loss": 0.1131, "rewards/accuracies": 0.9375, "rewards/chosen": 5.066840171813965, "rewards/margins": 4.99465799331665, "rewards/rejected": 0.07218246161937714, "step": 2850 }, { "epoch": 1.31, "learning_rate": 1.8822932521562656e-07, "logits/chosen": -4.791386604309082, "logits/rejected": -4.72270393371582, "logps/chosen": -420.3934631347656, "logps/rejected": -287.21466064453125, "loss": 0.1289, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.1452484130859375, "rewards/margins": 5.138092517852783, "rewards/rejected": 0.007155990693718195, "step": 2860 }, { "epoch": 1.31, "learning_rate": 1.8772196854388632e-07, "logits/chosen": -4.810246467590332, "logits/rejected": -4.745818614959717, "logps/chosen": -404.50518798828125, "logps/rejected": -275.2052001953125, "loss": 0.1456, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.067367076873779, "rewards/margins": 5.245428085327148, "rewards/rejected": -0.17806124687194824, "step": 2870 }, { "epoch": 1.31, "learning_rate": 1.872146118721461e-07, "logits/chosen": -4.786719799041748, "logits/rejected": -4.697300910949707, "logps/chosen": -460.42431640625, "logps/rejected": -272.08758544921875, "loss": 0.1271, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.117160320281982, "rewards/margins": 6.0278000831604, "rewards/rejected": 0.08935976028442383, "step": 2880 }, { "epoch": 1.32, "learning_rate": 1.8670725520040586e-07, "logits/chosen": -4.7514262199401855, "logits/rejected": -4.681183815002441, "logps/chosen": -430.7438049316406, "logps/rejected": -282.2538146972656, "loss": 0.1248, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.8138251304626465, "rewards/margins": 5.940483093261719, "rewards/rejected": -0.12665757536888123, "step": 2890 }, { "epoch": 1.32, "learning_rate": 1.8619989852866562e-07, "logits/chosen": -4.7802019119262695, "logits/rejected": -4.721458911895752, "logps/chosen": -391.18505859375, "logps/rejected": -269.9803771972656, "loss": 0.1252, "rewards/accuracies": 0.9375, "rewards/chosen": 5.017066478729248, "rewards/margins": 5.21406888961792, "rewards/rejected": -0.19700197875499725, "step": 2900 }, { "epoch": 1.32, "eval_logits/chosen": -4.749572277069092, "eval_logits/rejected": -4.688549041748047, "eval_logps/chosen": -402.2606201171875, "eval_logps/rejected": -266.0809326171875, "eval_loss": 0.15943273901939392, "eval_rewards/accuracies": 0.9217877388000488, "eval_rewards/chosen": 4.8215556144714355, "eval_rewards/margins": 5.2185564041137695, "eval_rewards/rejected": -0.3970007300376892, "eval_runtime": 500.4671, "eval_samples_per_second": 5.719, "eval_steps_per_second": 0.358, "step": 2900 }, { "epoch": 1.33, "learning_rate": 1.856925418569254e-07, "logits/chosen": -4.803264617919922, "logits/rejected": -4.725480079650879, "logps/chosen": -443.0115661621094, "logps/rejected": -280.8976745605469, "loss": 0.1214, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.92267370223999, "rewards/margins": 6.436093807220459, "rewards/rejected": -0.5134209990501404, "step": 2910 }, { "epoch": 1.33, "learning_rate": 1.8518518518518516e-07, "logits/chosen": -4.765622138977051, "logits/rejected": -4.7021164894104, "logps/chosen": -403.44873046875, "logps/rejected": -276.02337646484375, "loss": 0.1588, "rewards/accuracies": 0.9375, "rewards/chosen": 4.565531253814697, "rewards/margins": 5.2337141036987305, "rewards/rejected": -0.668183445930481, "step": 2920 }, { "epoch": 1.34, "learning_rate": 1.8467782851344492e-07, "logits/chosen": -4.731475830078125, "logits/rejected": -4.67299222946167, "logps/chosen": -390.0352478027344, "logps/rejected": -269.45648193359375, "loss": 0.1542, "rewards/accuracies": 0.9375, "rewards/chosen": 5.30756950378418, "rewards/margins": 5.882264614105225, "rewards/rejected": -0.5746955275535583, "step": 2930 }, { "epoch": 1.34, "learning_rate": 1.841704718417047e-07, "logits/chosen": -4.8074774742126465, "logits/rejected": -4.736169338226318, "logps/chosen": -426.4104919433594, "logps/rejected": -271.7747497558594, "loss": 0.1428, "rewards/accuracies": 0.9375, "rewards/chosen": 5.5315937995910645, "rewards/margins": 6.1569085121154785, "rewards/rejected": -0.6253147721290588, "step": 2940 }, { "epoch": 1.35, "learning_rate": 1.8366311516996446e-07, "logits/chosen": -4.809752464294434, "logits/rejected": -4.745334625244141, "logps/chosen": -400.6762390136719, "logps/rejected": -259.08270263671875, "loss": 0.1072, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.367508888244629, "rewards/margins": 5.14243745803833, "rewards/rejected": 0.22507119178771973, "step": 2950 }, { "epoch": 1.35, "learning_rate": 1.8315575849822422e-07, "logits/chosen": -4.77707052230835, "logits/rejected": -4.713869094848633, "logps/chosen": -417.20819091796875, "logps/rejected": -284.25982666015625, "loss": 0.1388, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.330142021179199, "rewards/margins": 5.237193584442139, "rewards/rejected": 0.09294833987951279, "step": 2960 }, { "epoch": 1.36, "learning_rate": 1.82648401826484e-07, "logits/chosen": -4.7584991455078125, "logits/rejected": -4.691665172576904, "logps/chosen": -402.3412780761719, "logps/rejected": -263.42657470703125, "loss": 0.1584, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.15602970123291, "rewards/margins": 5.074208736419678, "rewards/rejected": 0.08182082325220108, "step": 2970 }, { "epoch": 1.36, "learning_rate": 1.8214104515474375e-07, "logits/chosen": -4.770436763763428, "logits/rejected": -4.696353435516357, "logps/chosen": -430.2450256347656, "logps/rejected": -280.1198425292969, "loss": 0.1301, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.352513313293457, "rewards/margins": 6.116093635559082, "rewards/rejected": -0.763581395149231, "step": 2980 }, { "epoch": 1.36, "learning_rate": 1.8163368848300352e-07, "logits/chosen": -4.805212497711182, "logits/rejected": -4.746600151062012, "logps/chosen": -404.6134033203125, "logps/rejected": -291.4418029785156, "loss": 0.1493, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.081612586975098, "rewards/margins": 5.287693500518799, "rewards/rejected": -0.20608015358448029, "step": 2990 }, { "epoch": 1.37, "learning_rate": 1.811263318112633e-07, "logits/chosen": -4.778563976287842, "logits/rejected": -4.717883586883545, "logps/chosen": -397.737548828125, "logps/rejected": -272.70013427734375, "loss": 0.1177, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.185258865356445, "rewards/margins": 5.105632781982422, "rewards/rejected": 0.079625703394413, "step": 3000 }, { "epoch": 1.37, "eval_logits/chosen": -4.717940807342529, "eval_logits/rejected": -4.6551713943481445, "eval_logps/chosen": -401.6279602050781, "eval_logps/rejected": -265.67547607421875, "eval_loss": 0.1561262160539627, "eval_rewards/accuracies": 0.9189944267272949, "eval_rewards/chosen": 5.1378960609436035, "eval_rewards/margins": 5.332164287567139, "eval_rewards/rejected": -0.19426818192005157, "eval_runtime": 462.3356, "eval_samples_per_second": 6.19, "eval_steps_per_second": 0.387, "step": 3000 }, { "epoch": 1.37, "learning_rate": 1.8061897513952305e-07, "logits/chosen": -4.777006149291992, "logits/rejected": -4.696858882904053, "logps/chosen": -433.67047119140625, "logps/rejected": -277.02337646484375, "loss": 0.1235, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 6.403310298919678, "rewards/margins": 6.462069511413574, "rewards/rejected": -0.05876011773943901, "step": 3010 }, { "epoch": 1.38, "learning_rate": 1.8011161846778282e-07, "logits/chosen": -4.8407158851623535, "logits/rejected": -4.756444454193115, "logps/chosen": -436.6886291503906, "logps/rejected": -272.11688232421875, "loss": 0.1614, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.475863456726074, "rewards/margins": 5.956229209899902, "rewards/rejected": -0.480365514755249, "step": 3020 }, { "epoch": 1.38, "learning_rate": 1.796042617960426e-07, "logits/chosen": -4.831148624420166, "logits/rejected": -4.748528957366943, "logps/chosen": -433.33099365234375, "logps/rejected": -268.96380615234375, "loss": 0.1273, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.4567365646362305, "rewards/margins": 5.92402982711792, "rewards/rejected": -0.4672931730747223, "step": 3030 }, { "epoch": 1.39, "learning_rate": 1.7909690512430235e-07, "logits/chosen": -4.822821617126465, "logits/rejected": -4.74285888671875, "logps/chosen": -423.15740966796875, "logps/rejected": -284.204345703125, "loss": 0.132, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.0500383377075195, "rewards/margins": 6.7750372886657715, "rewards/rejected": -0.7249988317489624, "step": 3040 }, { "epoch": 1.39, "learning_rate": 1.7858954845256212e-07, "logits/chosen": -4.777098178863525, "logits/rejected": -4.72080135345459, "logps/chosen": -392.68255615234375, "logps/rejected": -297.8228454589844, "loss": 0.1341, "rewards/accuracies": 0.9375, "rewards/chosen": 5.101828098297119, "rewards/margins": 5.039213180541992, "rewards/rejected": 0.06261472404003143, "step": 3050 }, { "epoch": 1.4, "learning_rate": 1.780821917808219e-07, "logits/chosen": -4.79052734375, "logits/rejected": -4.732372760772705, "logps/chosen": -427.8304748535156, "logps/rejected": -301.1048278808594, "loss": 0.1357, "rewards/accuracies": 0.9375, "rewards/chosen": 4.837908744812012, "rewards/margins": 5.030834197998047, "rewards/rejected": -0.19292545318603516, "step": 3060 }, { "epoch": 1.4, "learning_rate": 1.7757483510908165e-07, "logits/chosen": -4.746830940246582, "logits/rejected": -4.685823917388916, "logps/chosen": -396.97442626953125, "logps/rejected": -280.0191345214844, "loss": 0.156, "rewards/accuracies": 0.9375, "rewards/chosen": 5.097185134887695, "rewards/margins": 5.40142297744751, "rewards/rejected": -0.3042370676994324, "step": 3070 }, { "epoch": 1.41, "learning_rate": 1.7706747843734142e-07, "logits/chosen": -4.768489837646484, "logits/rejected": -4.681058406829834, "logps/chosen": -406.5540466308594, "logps/rejected": -250.9787139892578, "loss": 0.1268, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.154329299926758, "rewards/margins": 5.676821708679199, "rewards/rejected": -0.5224921703338623, "step": 3080 }, { "epoch": 1.41, "learning_rate": 1.765601217656012e-07, "logits/chosen": -4.812024116516113, "logits/rejected": -4.730838775634766, "logps/chosen": -433.43829345703125, "logps/rejected": -272.6005554199219, "loss": 0.127, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.181927680969238, "rewards/margins": 5.7364397048950195, "rewards/rejected": -0.5545116662979126, "step": 3090 }, { "epoch": 1.42, "learning_rate": 1.7605276509386095e-07, "logits/chosen": -4.762328624725342, "logits/rejected": -4.698655605316162, "logps/chosen": -407.59515380859375, "logps/rejected": -277.57391357421875, "loss": 0.1338, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.305472373962402, "rewards/margins": 4.9136528968811035, "rewards/rejected": -0.6081802845001221, "step": 3100 }, { "epoch": 1.42, "eval_logits/chosen": -4.712358474731445, "eval_logits/rejected": -4.646889686584473, "eval_logps/chosen": -402.30035400390625, "eval_logps/rejected": -266.2644958496094, "eval_loss": 0.1595572829246521, "eval_rewards/accuracies": 0.9217877388000488, "eval_rewards/chosen": 4.801685810089111, "eval_rewards/margins": 5.290464401245117, "eval_rewards/rejected": -0.4887782633304596, "eval_runtime": 246.3794, "eval_samples_per_second": 11.616, "eval_steps_per_second": 0.727, "step": 3100 }, { "epoch": 1.42, "learning_rate": 1.7554540842212072e-07, "logits/chosen": -4.787060260772705, "logits/rejected": -4.701440334320068, "logps/chosen": -423.6065368652344, "logps/rejected": -264.43896484375, "loss": 0.1283, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.916399955749512, "rewards/margins": 5.012326240539551, "rewards/rejected": -0.09592647850513458, "step": 3110 }, { "epoch": 1.42, "learning_rate": 1.750380517503805e-07, "logits/chosen": -4.765745639801025, "logits/rejected": -4.686514377593994, "logps/chosen": -420.5269470214844, "logps/rejected": -271.1041259765625, "loss": 0.1311, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.2386369705200195, "rewards/margins": 5.768832206726074, "rewards/rejected": -0.5301946401596069, "step": 3120 }, { "epoch": 1.43, "learning_rate": 1.7453069507864025e-07, "logits/chosen": -4.782876491546631, "logits/rejected": -4.706012725830078, "logps/chosen": -413.84307861328125, "logps/rejected": -264.0591125488281, "loss": 0.1689, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.296800136566162, "rewards/margins": 5.425093173980713, "rewards/rejected": -1.1282932758331299, "step": 3130 }, { "epoch": 1.43, "learning_rate": 1.7402333840690002e-07, "logits/chosen": -4.781687259674072, "logits/rejected": -4.707230091094971, "logps/chosen": -401.7633056640625, "logps/rejected": -254.99783325195312, "loss": 0.1534, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.400811195373535, "rewards/margins": 4.717905044555664, "rewards/rejected": -0.317094087600708, "step": 3140 }, { "epoch": 1.44, "learning_rate": 1.735159817351598e-07, "logits/chosen": -4.834644317626953, "logits/rejected": -4.762909889221191, "logps/chosen": -404.98089599609375, "logps/rejected": -282.34710693359375, "loss": 0.125, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.372402191162109, "rewards/margins": 5.774404048919678, "rewards/rejected": -0.40200185775756836, "step": 3150 }, { "epoch": 1.44, "learning_rate": 1.7300862506341955e-07, "logits/chosen": -4.846747398376465, "logits/rejected": -4.76505184173584, "logps/chosen": -429.6842346191406, "logps/rejected": -274.89373779296875, "loss": 0.1381, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.06003999710083, "rewards/margins": 5.866089820861816, "rewards/rejected": -0.8060493469238281, "step": 3160 }, { "epoch": 1.45, "learning_rate": 1.7250126839167932e-07, "logits/chosen": -4.845246315002441, "logits/rejected": -4.7736711502075195, "logps/chosen": -430.8252868652344, "logps/rejected": -289.3251037597656, "loss": 0.1422, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.190143585205078, "rewards/margins": 5.32658576965332, "rewards/rejected": -0.13644209504127502, "step": 3170 }, { "epoch": 1.45, "learning_rate": 1.719939117199391e-07, "logits/chosen": -4.785200119018555, "logits/rejected": -4.713950157165527, "logps/chosen": -425.682373046875, "logps/rejected": -283.3774719238281, "loss": 0.1364, "rewards/accuracies": 0.9375, "rewards/chosen": 5.5864434242248535, "rewards/margins": 5.401054859161377, "rewards/rejected": 0.18538837134838104, "step": 3180 }, { "epoch": 1.46, "learning_rate": 1.7148655504819885e-07, "logits/chosen": -4.776744842529297, "logits/rejected": -4.705835342407227, "logps/chosen": -425.938720703125, "logps/rejected": -282.21331787109375, "loss": 0.1284, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.197883129119873, "rewards/margins": 5.647789478302002, "rewards/rejected": -0.4499061107635498, "step": 3190 }, { "epoch": 1.46, "learning_rate": 1.7097919837645862e-07, "logits/chosen": -4.774179935455322, "logits/rejected": -4.70543098449707, "logps/chosen": -414.49969482421875, "logps/rejected": -275.3302001953125, "loss": 0.1393, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.5719146728515625, "rewards/margins": 5.305825710296631, "rewards/rejected": -0.733911395072937, "step": 3200 }, { "epoch": 1.46, "eval_logits/chosen": -4.704594135284424, "eval_logits/rejected": -4.638712406158447, "eval_logps/chosen": -401.77239990234375, "eval_logps/rejected": -265.6769714355469, "eval_loss": 0.15575671195983887, "eval_rewards/accuracies": 0.9273743033409119, "eval_rewards/chosen": 5.0656819343566895, "eval_rewards/margins": 5.260718822479248, "eval_rewards/rejected": -0.19503676891326904, "eval_runtime": 214.9211, "eval_samples_per_second": 13.317, "eval_steps_per_second": 0.833, "step": 3200 }, { "epoch": 1.47, "learning_rate": 1.704718417047184e-07, "logits/chosen": -4.782650947570801, "logits/rejected": -4.7215189933776855, "logps/chosen": -385.34466552734375, "logps/rejected": -269.16339111328125, "loss": 0.1372, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 3.9977965354919434, "rewards/margins": 4.4492878913879395, "rewards/rejected": -0.4514910578727722, "step": 3210 }, { "epoch": 1.47, "learning_rate": 1.6996448503297815e-07, "logits/chosen": -4.73641300201416, "logits/rejected": -4.668841361999512, "logps/chosen": -391.7770080566406, "logps/rejected": -273.7413635253906, "loss": 0.1288, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.829885959625244, "rewards/margins": 5.521437168121338, "rewards/rejected": -0.6915513277053833, "step": 3220 }, { "epoch": 1.47, "learning_rate": 1.6945712836123792e-07, "logits/chosen": -4.735869884490967, "logits/rejected": -4.6583476066589355, "logps/chosen": -408.921142578125, "logps/rejected": -267.76129150390625, "loss": 0.1391, "rewards/accuracies": 0.9375, "rewards/chosen": 5.187173366546631, "rewards/margins": 5.352859020233154, "rewards/rejected": -0.16568522155284882, "step": 3230 }, { "epoch": 1.48, "learning_rate": 1.689497716894977e-07, "logits/chosen": -4.763653755187988, "logits/rejected": -4.6911163330078125, "logps/chosen": -407.8968811035156, "logps/rejected": -278.49786376953125, "loss": 0.1372, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.553576469421387, "rewards/margins": 5.697615146636963, "rewards/rejected": -1.144038200378418, "step": 3240 }, { "epoch": 1.48, "learning_rate": 1.6844241501775745e-07, "logits/chosen": -4.776995658874512, "logits/rejected": -4.701313495635986, "logps/chosen": -410.92718505859375, "logps/rejected": -280.94061279296875, "loss": 0.1427, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.296577453613281, "rewards/margins": 5.283516883850098, "rewards/rejected": 0.013060855679214, "step": 3250 }, { "epoch": 1.49, "learning_rate": 1.6793505834601722e-07, "logits/chosen": -4.80420446395874, "logits/rejected": -4.737746238708496, "logps/chosen": -396.3955993652344, "logps/rejected": -282.39654541015625, "loss": 0.1439, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.8195672035217285, "rewards/margins": 5.177999973297119, "rewards/rejected": -0.3584325909614563, "step": 3260 }, { "epoch": 1.49, "learning_rate": 1.67427701674277e-07, "logits/chosen": -4.824585914611816, "logits/rejected": -4.745080471038818, "logps/chosen": -408.71343994140625, "logps/rejected": -263.7518615722656, "loss": 0.1362, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.045434474945068, "rewards/margins": 5.222501754760742, "rewards/rejected": -0.17706727981567383, "step": 3270 }, { "epoch": 1.5, "learning_rate": 1.6692034500253675e-07, "logits/chosen": -4.830862045288086, "logits/rejected": -4.759037494659424, "logps/chosen": -416.78253173828125, "logps/rejected": -283.1253967285156, "loss": 0.1715, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.269645690917969, "rewards/margins": 5.9218854904174805, "rewards/rejected": -0.6522405743598938, "step": 3280 }, { "epoch": 1.5, "learning_rate": 1.6641298833079652e-07, "logits/chosen": -4.797753810882568, "logits/rejected": -4.714999198913574, "logps/chosen": -413.4232482910156, "logps/rejected": -261.8997497558594, "loss": 0.133, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.9916090965271, "rewards/margins": 5.5544586181640625, "rewards/rejected": -0.562849760055542, "step": 3290 }, { "epoch": 1.51, "learning_rate": 1.659056316590563e-07, "logits/chosen": -4.812762260437012, "logits/rejected": -4.744751930236816, "logps/chosen": -365.1100769042969, "logps/rejected": -257.6358947753906, "loss": 0.1268, "rewards/accuracies": 0.9375, "rewards/chosen": 4.667060852050781, "rewards/margins": 4.981400966644287, "rewards/rejected": -0.31434041261672974, "step": 3300 }, { "epoch": 1.51, "eval_logits/chosen": -4.786074161529541, "eval_logits/rejected": -4.713245391845703, "eval_logps/chosen": -402.5906982421875, "eval_logps/rejected": -266.3041076660156, "eval_loss": 0.15603595972061157, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.65652322769165, "eval_rewards/margins": 5.165135860443115, "eval_rewards/rejected": -0.5086129307746887, "eval_runtime": 354.7717, "eval_samples_per_second": 8.067, "eval_steps_per_second": 0.505, "step": 3300 }, { "epoch": 1.51, "learning_rate": 1.6539827498731605e-07, "logits/chosen": -4.808962821960449, "logits/rejected": -4.727324962615967, "logps/chosen": -401.99468994140625, "logps/rejected": -264.54852294921875, "loss": 0.1546, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.40731954574585, "rewards/margins": 5.375855445861816, "rewards/rejected": 0.0314638614654541, "step": 3310 }, { "epoch": 1.52, "learning_rate": 1.6489091831557582e-07, "logits/chosen": -4.802215099334717, "logits/rejected": -4.724505424499512, "logps/chosen": -415.7156677246094, "logps/rejected": -277.2547607421875, "loss": 0.1422, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.248006343841553, "rewards/margins": 5.29903507232666, "rewards/rejected": -0.051029205322265625, "step": 3320 }, { "epoch": 1.52, "learning_rate": 1.643835616438356e-07, "logits/chosen": -4.827422142028809, "logits/rejected": -4.759106636047363, "logps/chosen": -396.0538330078125, "logps/rejected": -276.63311767578125, "loss": 0.1291, "rewards/accuracies": 0.9375, "rewards/chosen": 4.4300079345703125, "rewards/margins": 4.936140537261963, "rewards/rejected": -0.5061327219009399, "step": 3330 }, { "epoch": 1.52, "learning_rate": 1.6387620497209535e-07, "logits/chosen": -4.805856227874756, "logits/rejected": -4.730849266052246, "logps/chosen": -404.5299377441406, "logps/rejected": -269.18353271484375, "loss": 0.1188, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.365715026855469, "rewards/margins": 5.497616291046143, "rewards/rejected": -0.1319015473127365, "step": 3340 }, { "epoch": 1.53, "learning_rate": 1.6336884830035512e-07, "logits/chosen": -4.751157283782959, "logits/rejected": -4.679110527038574, "logps/chosen": -403.8088073730469, "logps/rejected": -277.25592041015625, "loss": 0.1392, "rewards/accuracies": 0.9375, "rewards/chosen": 5.14719820022583, "rewards/margins": 5.467467308044434, "rewards/rejected": -0.3202696442604065, "step": 3350 }, { "epoch": 1.53, "learning_rate": 1.6286149162861489e-07, "logits/chosen": -4.7955217361450195, "logits/rejected": -4.707754611968994, "logps/chosen": -407.8445129394531, "logps/rejected": -257.2816162109375, "loss": 0.1232, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.613996982574463, "rewards/margins": 5.818697929382324, "rewards/rejected": -0.20470114052295685, "step": 3360 }, { "epoch": 1.54, "learning_rate": 1.6235413495687465e-07, "logits/chosen": -4.807827949523926, "logits/rejected": -4.7258124351501465, "logps/chosen": -394.56561279296875, "logps/rejected": -250.92196655273438, "loss": 0.138, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.448158264160156, "rewards/margins": 5.156351566314697, "rewards/rejected": -0.7081928253173828, "step": 3370 }, { "epoch": 1.54, "learning_rate": 1.6184677828513442e-07, "logits/chosen": -4.84067440032959, "logits/rejected": -4.757169723510742, "logps/chosen": -420.3226623535156, "logps/rejected": -266.6034240722656, "loss": 0.1685, "rewards/accuracies": 0.9375, "rewards/chosen": 5.521374702453613, "rewards/margins": 5.801348686218262, "rewards/rejected": -0.2799742817878723, "step": 3380 }, { "epoch": 1.55, "learning_rate": 1.613394216133942e-07, "logits/chosen": -4.879509925842285, "logits/rejected": -4.803898811340332, "logps/chosen": -422.73583984375, "logps/rejected": -278.46087646484375, "loss": 0.1191, "rewards/accuracies": 0.9375, "rewards/chosen": 5.4318647384643555, "rewards/margins": 5.696895599365234, "rewards/rejected": -0.26503095030784607, "step": 3390 }, { "epoch": 1.55, "learning_rate": 1.6083206494165398e-07, "logits/chosen": -4.930531978607178, "logits/rejected": -4.849920272827148, "logps/chosen": -423.6416931152344, "logps/rejected": -280.6522216796875, "loss": 0.14, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.314401149749756, "rewards/margins": 5.490204811096191, "rewards/rejected": -0.17580337822437286, "step": 3400 }, { "epoch": 1.55, "eval_logits/chosen": -4.860458850860596, "eval_logits/rejected": -4.791549205780029, "eval_logps/chosen": -402.0388488769531, "eval_logps/rejected": -266.013427734375, "eval_loss": 0.15384715795516968, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 4.932440280914307, "eval_rewards/margins": 5.295705795288086, "eval_rewards/rejected": -0.363266259431839, "eval_runtime": 372.995, "eval_samples_per_second": 7.673, "eval_steps_per_second": 0.48, "step": 3400 }, { "epoch": 1.56, "learning_rate": 1.6032470826991375e-07, "logits/chosen": -4.881518840789795, "logits/rejected": -4.8138861656188965, "logps/chosen": -409.6473083496094, "logps/rejected": -273.3043212890625, "loss": 0.1216, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.973959922790527, "rewards/margins": 5.089644908905029, "rewards/rejected": -0.1156853660941124, "step": 3410 }, { "epoch": 1.56, "learning_rate": 1.598173515981735e-07, "logits/chosen": -4.875397682189941, "logits/rejected": -4.789037704467773, "logps/chosen": -431.323974609375, "logps/rejected": -269.9063415527344, "loss": 0.1416, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.741078853607178, "rewards/margins": 5.951551914215088, "rewards/rejected": -0.21047282218933105, "step": 3420 }, { "epoch": 1.57, "learning_rate": 1.5930999492643328e-07, "logits/chosen": -4.856462001800537, "logits/rejected": -4.777732849121094, "logps/chosen": -423.88946533203125, "logps/rejected": -279.8882751464844, "loss": 0.1399, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.006382942199707, "rewards/margins": 5.556227207183838, "rewards/rejected": -0.5498436689376831, "step": 3430 }, { "epoch": 1.57, "learning_rate": 1.5880263825469305e-07, "logits/chosen": -4.869896411895752, "logits/rejected": -4.791926383972168, "logps/chosen": -402.3299865722656, "logps/rejected": -259.65142822265625, "loss": 0.1275, "rewards/accuracies": 0.9375, "rewards/chosen": 5.280472278594971, "rewards/margins": 6.0745649337768555, "rewards/rejected": -0.7940927743911743, "step": 3440 }, { "epoch": 1.57, "learning_rate": 1.582952815829528e-07, "logits/chosen": -4.869402885437012, "logits/rejected": -4.782708168029785, "logps/chosen": -429.22705078125, "logps/rejected": -276.6224060058594, "loss": 0.1234, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.499544620513916, "rewards/margins": 5.8404951095581055, "rewards/rejected": -0.340951144695282, "step": 3450 }, { "epoch": 1.58, "learning_rate": 1.5778792491121258e-07, "logits/chosen": -4.796781063079834, "logits/rejected": -4.717053413391113, "logps/chosen": -416.6231384277344, "logps/rejected": -272.57464599609375, "loss": 0.1206, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.828265190124512, "rewards/margins": 6.367212295532227, "rewards/rejected": -0.5389474034309387, "step": 3460 }, { "epoch": 1.58, "learning_rate": 1.5728056823947235e-07, "logits/chosen": -4.855520725250244, "logits/rejected": -4.786080360412598, "logps/chosen": -383.80316162109375, "logps/rejected": -258.72418212890625, "loss": 0.1532, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.406111240386963, "rewards/margins": 6.089895248413086, "rewards/rejected": -0.6837841868400574, "step": 3470 }, { "epoch": 1.59, "learning_rate": 1.567732115677321e-07, "logits/chosen": -4.851085662841797, "logits/rejected": -4.784983158111572, "logps/chosen": -415.5584411621094, "logps/rejected": -298.3888244628906, "loss": 0.1532, "rewards/accuracies": 0.9375, "rewards/chosen": 5.319189548492432, "rewards/margins": 5.690426826477051, "rewards/rejected": -0.37123697996139526, "step": 3480 }, { "epoch": 1.59, "learning_rate": 1.5626585489599188e-07, "logits/chosen": -4.8977861404418945, "logits/rejected": -4.824227333068848, "logps/chosen": -420.14312744140625, "logps/rejected": -290.7713317871094, "loss": 0.1302, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.458127021789551, "rewards/margins": 5.0913004875183105, "rewards/rejected": -0.6331741809844971, "step": 3490 }, { "epoch": 1.6, "learning_rate": 1.5575849822425165e-07, "logits/chosen": -4.840010643005371, "logits/rejected": -4.765267848968506, "logps/chosen": -409.0025329589844, "logps/rejected": -270.3695068359375, "loss": 0.144, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 5.536954402923584, "rewards/margins": 5.734356880187988, "rewards/rejected": -0.1974021941423416, "step": 3500 }, { "epoch": 1.6, "eval_logits/chosen": -4.782894611358643, "eval_logits/rejected": -4.7135796546936035, "eval_logps/chosen": -401.62872314453125, "eval_logps/rejected": -265.6830139160156, "eval_loss": 0.154439777135849, "eval_rewards/accuracies": 0.924580991268158, "eval_rewards/chosen": 5.137514114379883, "eval_rewards/margins": 5.3355584144592285, "eval_rewards/rejected": -0.19804365932941437, "eval_runtime": 326.3873, "eval_samples_per_second": 8.769, "eval_steps_per_second": 0.548, "step": 3500 }, { "epoch": 1.6, "learning_rate": 1.552511415525114e-07, "logits/chosen": -4.854730606079102, "logits/rejected": -4.777928352355957, "logps/chosen": -407.8537902832031, "logps/rejected": -262.1612548828125, "loss": 0.1411, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.826956748962402, "rewards/margins": 5.4741644859313965, "rewards/rejected": -0.6472080945968628, "step": 3510 }, { "epoch": 1.61, "learning_rate": 1.5474378488077118e-07, "logits/chosen": -4.868206977844238, "logits/rejected": -4.797361373901367, "logps/chosen": -418.659423828125, "logps/rejected": -292.6211242675781, "loss": 0.131, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.85814094543457, "rewards/margins": 5.269218444824219, "rewards/rejected": -0.4110773503780365, "step": 3520 }, { "epoch": 1.61, "learning_rate": 1.5423642820903095e-07, "logits/chosen": -4.832231521606445, "logits/rejected": -4.767117500305176, "logps/chosen": -402.9268493652344, "logps/rejected": -279.7882385253906, "loss": 0.1459, "rewards/accuracies": 0.875, "rewards/chosen": 5.000153541564941, "rewards/margins": 5.198324680328369, "rewards/rejected": -0.19817037880420685, "step": 3530 }, { "epoch": 1.62, "learning_rate": 1.537290715372907e-07, "logits/chosen": -4.804234027862549, "logits/rejected": -4.732181549072266, "logps/chosen": -404.46929931640625, "logps/rejected": -268.2332458496094, "loss": 0.1159, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.378109931945801, "rewards/margins": 5.408408164978027, "rewards/rejected": -0.03029813803732395, "step": 3540 }, { "epoch": 1.62, "learning_rate": 1.5322171486555048e-07, "logits/chosen": -4.848740577697754, "logits/rejected": -4.74808406829834, "logps/chosen": -482.76043701171875, "logps/rejected": -285.25750732421875, "loss": 0.127, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.341855525970459, "rewards/margins": 6.363237380981445, "rewards/rejected": -0.02138195000588894, "step": 3550 }, { "epoch": 1.63, "learning_rate": 1.5271435819381025e-07, "logits/chosen": -4.801382541656494, "logits/rejected": -4.726290702819824, "logps/chosen": -425.5838317871094, "logps/rejected": -284.0330505371094, "loss": 0.1155, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.878510475158691, "rewards/margins": 6.09785270690918, "rewards/rejected": -0.21934270858764648, "step": 3560 }, { "epoch": 1.63, "learning_rate": 1.5220700152207e-07, "logits/chosen": -4.777207374572754, "logits/rejected": -4.6835832595825195, "logps/chosen": -453.79888916015625, "logps/rejected": -271.0815124511719, "loss": 0.11, "rewards/accuracies": 0.9375, "rewards/chosen": 6.3752827644348145, "rewards/margins": 6.843291282653809, "rewards/rejected": -0.4680088460445404, "step": 3570 }, { "epoch": 1.63, "learning_rate": 1.5169964485032978e-07, "logits/chosen": -4.854363441467285, "logits/rejected": -4.7885894775390625, "logps/chosen": -393.6365051269531, "logps/rejected": -274.2425537109375, "loss": 0.1288, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.884383201599121, "rewards/margins": 5.578164100646973, "rewards/rejected": -0.6937803030014038, "step": 3580 }, { "epoch": 1.64, "learning_rate": 1.5119228817858955e-07, "logits/chosen": -4.86555290222168, "logits/rejected": -4.7786455154418945, "logps/chosen": -430.27587890625, "logps/rejected": -280.77374267578125, "loss": 0.1277, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.5163445472717285, "rewards/margins": 5.982257843017578, "rewards/rejected": -0.4659128189086914, "step": 3590 }, { "epoch": 1.64, "learning_rate": 1.506849315068493e-07, "logits/chosen": -4.82952356338501, "logits/rejected": -4.746293067932129, "logps/chosen": -408.99853515625, "logps/rejected": -264.8529357910156, "loss": 0.1293, "rewards/accuracies": 0.9375, "rewards/chosen": 5.322381973266602, "rewards/margins": 5.217175006866455, "rewards/rejected": 0.10520720481872559, "step": 3600 }, { "epoch": 1.64, "eval_logits/chosen": -4.743781089782715, "eval_logits/rejected": -4.675158500671387, "eval_logps/chosen": -402.1170959472656, "eval_logps/rejected": -266.3995361328125, "eval_loss": 0.15348190069198608, "eval_rewards/accuracies": 0.9217877388000488, "eval_rewards/chosen": 4.893320560455322, "eval_rewards/margins": 5.44963264465332, "eval_rewards/rejected": -0.556312084197998, "eval_runtime": 396.4641, "eval_samples_per_second": 7.219, "eval_steps_per_second": 0.451, "step": 3600 }, { "epoch": 1.65, "learning_rate": 1.5017757483510908e-07, "logits/chosen": -4.821118354797363, "logits/rejected": -4.743973731994629, "logps/chosen": -434.16357421875, "logps/rejected": -291.2716064453125, "loss": 0.1372, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.5478973388671875, "rewards/margins": 5.9900994300842285, "rewards/rejected": -0.44220179319381714, "step": 3610 }, { "epoch": 1.65, "learning_rate": 1.4967021816336885e-07, "logits/chosen": -4.858708381652832, "logits/rejected": -4.775480270385742, "logps/chosen": -411.1961364746094, "logps/rejected": -255.7838134765625, "loss": 0.1522, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.687599182128906, "rewards/margins": 5.4713335037231445, "rewards/rejected": -0.783734142780304, "step": 3620 }, { "epoch": 1.66, "learning_rate": 1.491628614916286e-07, "logits/chosen": -4.886147499084473, "logits/rejected": -4.811054229736328, "logps/chosen": -426.3934631347656, "logps/rejected": -297.427490234375, "loss": 0.1086, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.8400678634643555, "rewards/margins": 5.6957807540893555, "rewards/rejected": 0.14428739249706268, "step": 3630 }, { "epoch": 1.66, "learning_rate": 1.4865550481988838e-07, "logits/chosen": -4.8693132400512695, "logits/rejected": -4.794482707977295, "logps/chosen": -416.00311279296875, "logps/rejected": -289.7362060546875, "loss": 0.161, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.819903373718262, "rewards/margins": 5.364016532897949, "rewards/rejected": -0.5441136360168457, "step": 3640 }, { "epoch": 1.67, "learning_rate": 1.4814814814814815e-07, "logits/chosen": -4.8955769538879395, "logits/rejected": -4.8063459396362305, "logps/chosen": -420.66015625, "logps/rejected": -274.5057678222656, "loss": 0.1301, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.75004768371582, "rewards/margins": 5.239798545837402, "rewards/rejected": -0.48975086212158203, "step": 3650 }, { "epoch": 1.67, "learning_rate": 1.476407914764079e-07, "logits/chosen": -4.89899206161499, "logits/rejected": -4.815016746520996, "logps/chosen": -431.0458068847656, "logps/rejected": -280.3574523925781, "loss": 0.1151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.5052618980407715, "rewards/margins": 5.813795566558838, "rewards/rejected": -0.30853399634361267, "step": 3660 }, { "epoch": 1.68, "learning_rate": 1.4713343480466768e-07, "logits/chosen": -4.885598659515381, "logits/rejected": -4.808058261871338, "logps/chosen": -396.3288269042969, "logps/rejected": -269.12078857421875, "loss": 0.1336, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.382796287536621, "rewards/margins": 5.643939971923828, "rewards/rejected": -0.2611432671546936, "step": 3670 }, { "epoch": 1.68, "learning_rate": 1.4662607813292745e-07, "logits/chosen": -4.924083709716797, "logits/rejected": -4.8247575759887695, "logps/chosen": -425.57049560546875, "logps/rejected": -260.6233215332031, "loss": 0.1291, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.617369174957275, "rewards/margins": 6.059269905090332, "rewards/rejected": -0.4419013559818268, "step": 3680 }, { "epoch": 1.68, "learning_rate": 1.461187214611872e-07, "logits/chosen": -4.900703430175781, "logits/rejected": -4.811938285827637, "logps/chosen": -423.801025390625, "logps/rejected": -268.89520263671875, "loss": 0.1363, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.755816459655762, "rewards/margins": 6.004420757293701, "rewards/rejected": -0.24860462546348572, "step": 3690 }, { "epoch": 1.69, "learning_rate": 1.4561136478944698e-07, "logits/chosen": -4.897640228271484, "logits/rejected": -4.81813907623291, "logps/chosen": -411.6173400878906, "logps/rejected": -278.7688903808594, "loss": 0.1503, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.211131572723389, "rewards/margins": 5.253973484039307, "rewards/rejected": -0.04284205287694931, "step": 3700 }, { "epoch": 1.69, "eval_logits/chosen": -4.788158416748047, "eval_logits/rejected": -4.717593193054199, "eval_logps/chosen": -401.7884521484375, "eval_logps/rejected": -265.9451599121094, "eval_loss": 0.1544504314661026, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 5.057638168334961, "eval_rewards/margins": 5.386771202087402, "eval_rewards/rejected": -0.3291333317756653, "eval_runtime": 347.4182, "eval_samples_per_second": 8.238, "eval_steps_per_second": 0.515, "step": 3700 }, { "epoch": 1.69, "learning_rate": 1.4510400811770675e-07, "logits/chosen": -4.9061384201049805, "logits/rejected": -4.792243957519531, "logps/chosen": -479.3064880371094, "logps/rejected": -279.6571350097656, "loss": 0.1436, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 6.235214710235596, "rewards/margins": 6.317317008972168, "rewards/rejected": -0.08210253715515137, "step": 3710 }, { "epoch": 1.7, "learning_rate": 1.445966514459665e-07, "logits/chosen": -4.84390926361084, "logits/rejected": -4.76339864730835, "logps/chosen": -410.469970703125, "logps/rejected": -275.3158874511719, "loss": 0.1445, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.171835422515869, "rewards/margins": 5.63153076171875, "rewards/rejected": -0.45969611406326294, "step": 3720 }, { "epoch": 1.7, "learning_rate": 1.4408929477422628e-07, "logits/chosen": -4.859330177307129, "logits/rejected": -4.7885637283325195, "logps/chosen": -404.4298095703125, "logps/rejected": -273.4940490722656, "loss": 0.137, "rewards/accuracies": 0.9375, "rewards/chosen": 5.375868320465088, "rewards/margins": 5.881934642791748, "rewards/rejected": -0.5060666799545288, "step": 3730 }, { "epoch": 1.71, "learning_rate": 1.4358193810248604e-07, "logits/chosen": -4.885931015014648, "logits/rejected": -4.809623718261719, "logps/chosen": -399.83331298828125, "logps/rejected": -277.1554260253906, "loss": 0.1276, "rewards/accuracies": 0.9375, "rewards/chosen": 4.595635890960693, "rewards/margins": 5.387939929962158, "rewards/rejected": -0.7923033237457275, "step": 3740 }, { "epoch": 1.71, "learning_rate": 1.430745814307458e-07, "logits/chosen": -4.876900672912598, "logits/rejected": -4.795269012451172, "logps/chosen": -407.26129150390625, "logps/rejected": -264.84326171875, "loss": 0.1469, "rewards/accuracies": 0.9375, "rewards/chosen": 5.391351699829102, "rewards/margins": 5.874351501464844, "rewards/rejected": -0.4829998016357422, "step": 3750 }, { "epoch": 1.72, "learning_rate": 1.4256722475900558e-07, "logits/chosen": -4.906834125518799, "logits/rejected": -4.829418182373047, "logps/chosen": -402.30242919921875, "logps/rejected": -264.7235412597656, "loss": 0.1388, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.199213981628418, "rewards/margins": 6.0583367347717285, "rewards/rejected": -0.8591222763061523, "step": 3760 }, { "epoch": 1.72, "learning_rate": 1.4205986808726534e-07, "logits/chosen": -4.877079963684082, "logits/rejected": -4.811589241027832, "logps/chosen": -419.886474609375, "logps/rejected": -292.5098876953125, "loss": 0.1373, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.915735244750977, "rewards/margins": 5.689780235290527, "rewards/rejected": -0.7740451097488403, "step": 3770 }, { "epoch": 1.73, "learning_rate": 1.415525114155251e-07, "logits/chosen": -4.892580032348633, "logits/rejected": -4.807557582855225, "logps/chosen": -435.90460205078125, "logps/rejected": -273.6236572265625, "loss": 0.1362, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.331456661224365, "rewards/margins": 6.359336853027344, "rewards/rejected": -1.0278804302215576, "step": 3780 }, { "epoch": 1.73, "learning_rate": 1.4104515474378488e-07, "logits/chosen": -4.809112548828125, "logits/rejected": -4.734879970550537, "logps/chosen": -425.7713317871094, "logps/rejected": -270.1158447265625, "loss": 0.1389, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.006583213806152, "rewards/margins": 5.3523268699646, "rewards/rejected": -0.34574347734451294, "step": 3790 }, { "epoch": 1.73, "learning_rate": 1.4053779807204464e-07, "logits/chosen": -4.8211750984191895, "logits/rejected": -4.743138313293457, "logps/chosen": -395.36505126953125, "logps/rejected": -255.6575164794922, "loss": 0.1313, "rewards/accuracies": 0.9375, "rewards/chosen": 5.343501091003418, "rewards/margins": 5.479709148406982, "rewards/rejected": -0.13620814681053162, "step": 3800 }, { "epoch": 1.73, "eval_logits/chosen": -4.773489475250244, "eval_logits/rejected": -4.707581043243408, "eval_logps/chosen": -401.828857421875, "eval_logps/rejected": -266.0661315917969, "eval_loss": 0.1492536962032318, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 5.037442207336426, "eval_rewards/margins": 5.427051067352295, "eval_rewards/rejected": -0.38960981369018555, "eval_runtime": 357.2432, "eval_samples_per_second": 8.011, "eval_steps_per_second": 0.501, "step": 3800 }, { "epoch": 1.74, "learning_rate": 1.400304414003044e-07, "logits/chosen": -4.839893817901611, "logits/rejected": -4.773051738739014, "logps/chosen": -408.8924255371094, "logps/rejected": -284.0409240722656, "loss": 0.1256, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.691256046295166, "rewards/margins": 5.78350830078125, "rewards/rejected": -1.0922523736953735, "step": 3810 }, { "epoch": 1.74, "learning_rate": 1.3952308472856418e-07, "logits/chosen": -4.837934970855713, "logits/rejected": -4.758072853088379, "logps/chosen": -402.15179443359375, "logps/rejected": -261.89007568359375, "loss": 0.1187, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.467240333557129, "rewards/margins": 5.953133583068848, "rewards/rejected": -0.48589402437210083, "step": 3820 }, { "epoch": 1.75, "learning_rate": 1.3901572805682394e-07, "logits/chosen": -4.898125648498535, "logits/rejected": -4.809311866760254, "logps/chosen": -455.2549743652344, "logps/rejected": -291.0647277832031, "loss": 0.1252, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.4069061279296875, "rewards/margins": 6.211111545562744, "rewards/rejected": -0.8042048215866089, "step": 3830 }, { "epoch": 1.75, "learning_rate": 1.385083713850837e-07, "logits/chosen": -4.880120754241943, "logits/rejected": -4.787081718444824, "logps/chosen": -428.466064453125, "logps/rejected": -263.0096740722656, "loss": 0.1407, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.054627418518066, "rewards/margins": 5.818029880523682, "rewards/rejected": -0.7634022831916809, "step": 3840 }, { "epoch": 1.76, "learning_rate": 1.3800101471334348e-07, "logits/chosen": -4.868729591369629, "logits/rejected": -4.771548271179199, "logps/chosen": -418.08050537109375, "logps/rejected": -254.17080688476562, "loss": 0.1182, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.2043561935424805, "rewards/margins": 6.103675365447998, "rewards/rejected": -0.8993192911148071, "step": 3850 }, { "epoch": 1.76, "learning_rate": 1.3749365804160324e-07, "logits/chosen": -4.812873840332031, "logits/rejected": -4.731348037719727, "logps/chosen": -441.2776794433594, "logps/rejected": -285.8269348144531, "loss": 0.1295, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.475584983825684, "rewards/margins": 6.132378578186035, "rewards/rejected": -0.6567937731742859, "step": 3860 }, { "epoch": 1.77, "learning_rate": 1.36986301369863e-07, "logits/chosen": -4.823525428771973, "logits/rejected": -4.74614953994751, "logps/chosen": -423.47625732421875, "logps/rejected": -291.06427001953125, "loss": 0.1224, "rewards/accuracies": 0.9375, "rewards/chosen": 5.722263336181641, "rewards/margins": 6.219607353210449, "rewards/rejected": -0.4973435401916504, "step": 3870 }, { "epoch": 1.77, "learning_rate": 1.3647894469812278e-07, "logits/chosen": -4.83687162399292, "logits/rejected": -4.754541873931885, "logps/chosen": -427.55859375, "logps/rejected": -273.13275146484375, "loss": 0.1332, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.706133842468262, "rewards/margins": 6.195132732391357, "rewards/rejected": -0.4889994263648987, "step": 3880 }, { "epoch": 1.78, "learning_rate": 1.3597158802638254e-07, "logits/chosen": -4.822018623352051, "logits/rejected": -4.749094486236572, "logps/chosen": -412.400146484375, "logps/rejected": -273.47076416015625, "loss": 0.1414, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.243111610412598, "rewards/margins": 6.106514930725098, "rewards/rejected": -0.8634039163589478, "step": 3890 }, { "epoch": 1.78, "learning_rate": 1.354642313546423e-07, "logits/chosen": -4.855406761169434, "logits/rejected": -4.773202419281006, "logps/chosen": -407.1507568359375, "logps/rejected": -265.33135986328125, "loss": 0.1312, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.709498405456543, "rewards/margins": 5.240758419036865, "rewards/rejected": -0.5312596559524536, "step": 3900 }, { "epoch": 1.78, "eval_logits/chosen": -4.8094658851623535, "eval_logits/rejected": -4.735960483551025, "eval_logps/chosen": -401.8134460449219, "eval_logps/rejected": -265.99249267578125, "eval_loss": 0.1479773223400116, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 5.045133113861084, "eval_rewards/margins": 5.3979172706604, "eval_rewards/rejected": -0.3527843654155731, "eval_runtime": 204.5422, "eval_samples_per_second": 13.992, "eval_steps_per_second": 0.875, "step": 3900 }, { "epoch": 1.78, "learning_rate": 1.3495687468290208e-07, "logits/chosen": -4.887848854064941, "logits/rejected": -4.78920841217041, "logps/chosen": -430.08013916015625, "logps/rejected": -257.2960205078125, "loss": 0.1183, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.606698036193848, "rewards/margins": 6.159076690673828, "rewards/rejected": -0.5523785352706909, "step": 3910 }, { "epoch": 1.79, "learning_rate": 1.3444951801116184e-07, "logits/chosen": -4.847224235534668, "logits/rejected": -4.755773544311523, "logps/chosen": -403.518310546875, "logps/rejected": -251.7811279296875, "loss": 0.1362, "rewards/accuracies": 0.9375, "rewards/chosen": 5.2600226402282715, "rewards/margins": 5.889854431152344, "rewards/rejected": -0.629831850528717, "step": 3920 }, { "epoch": 1.79, "learning_rate": 1.339421613394216e-07, "logits/chosen": -4.880440711975098, "logits/rejected": -4.802273750305176, "logps/chosen": -413.08953857421875, "logps/rejected": -284.208740234375, "loss": 0.1284, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.7693095207214355, "rewards/margins": 5.340995788574219, "rewards/rejected": -0.5716865658760071, "step": 3930 }, { "epoch": 1.8, "learning_rate": 1.3343480466768138e-07, "logits/chosen": -4.860099792480469, "logits/rejected": -4.774273872375488, "logps/chosen": -416.45489501953125, "logps/rejected": -260.15277099609375, "loss": 0.1417, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.506192207336426, "rewards/margins": 5.792212963104248, "rewards/rejected": -0.2860206961631775, "step": 3940 }, { "epoch": 1.8, "learning_rate": 1.3292744799594114e-07, "logits/chosen": -4.849244594573975, "logits/rejected": -4.775120735168457, "logps/chosen": -410.3631896972656, "logps/rejected": -279.70587158203125, "loss": 0.1491, "rewards/accuracies": 0.9375, "rewards/chosen": 5.332108497619629, "rewards/margins": 5.563758850097656, "rewards/rejected": -0.23165054619312286, "step": 3950 }, { "epoch": 1.81, "learning_rate": 1.324200913242009e-07, "logits/chosen": -4.863145351409912, "logits/rejected": -4.788730621337891, "logps/chosen": -408.65765380859375, "logps/rejected": -266.03900146484375, "loss": 0.1395, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.161596775054932, "rewards/margins": 5.920387268066406, "rewards/rejected": -0.7587906718254089, "step": 3960 }, { "epoch": 1.81, "learning_rate": 1.3191273465246068e-07, "logits/chosen": -4.917438983917236, "logits/rejected": -4.849202632904053, "logps/chosen": -372.0543518066406, "logps/rejected": -264.3670349121094, "loss": 0.1112, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.275931358337402, "rewards/margins": 5.630006790161133, "rewards/rejected": -1.3540751934051514, "step": 3970 }, { "epoch": 1.82, "learning_rate": 1.3140537798072044e-07, "logits/chosen": -4.894449234008789, "logits/rejected": -4.8133931159973145, "logps/chosen": -408.71051025390625, "logps/rejected": -263.5215148925781, "loss": 0.114, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.761112689971924, "rewards/margins": 6.205967903137207, "rewards/rejected": -0.4448555111885071, "step": 3980 }, { "epoch": 1.82, "learning_rate": 1.308980213089802e-07, "logits/chosen": -4.882841110229492, "logits/rejected": -4.802196025848389, "logps/chosen": -414.21728515625, "logps/rejected": -267.473388671875, "loss": 0.1174, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.910806655883789, "rewards/margins": 6.241650104522705, "rewards/rejected": -0.33084338903427124, "step": 3990 }, { "epoch": 1.83, "learning_rate": 1.3039066463723998e-07, "logits/chosen": -4.922643184661865, "logits/rejected": -4.825827121734619, "logps/chosen": -416.2288513183594, "logps/rejected": -259.6297302246094, "loss": 0.1227, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.223371982574463, "rewards/margins": 5.85031795501709, "rewards/rejected": -0.6269458532333374, "step": 4000 }, { "epoch": 1.83, "eval_logits/chosen": -4.833512783050537, "eval_logits/rejected": -4.764825820922852, "eval_logps/chosen": -402.1415710449219, "eval_logps/rejected": -266.551513671875, "eval_loss": 0.14720118045806885, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 4.881073951721191, "eval_rewards/margins": 5.513360023498535, "eval_rewards/rejected": -0.6322864294052124, "eval_runtime": 195.2247, "eval_samples_per_second": 14.66, "eval_steps_per_second": 0.917, "step": 4000 }, { "epoch": 1.83, "learning_rate": 1.2988330796549974e-07, "logits/chosen": -4.88839864730835, "logits/rejected": -4.807201862335205, "logps/chosen": -432.52069091796875, "logps/rejected": -276.1604309082031, "loss": 0.127, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.464041709899902, "rewards/margins": 6.259596824645996, "rewards/rejected": -0.795554518699646, "step": 4010 }, { "epoch": 1.83, "learning_rate": 1.293759512937595e-07, "logits/chosen": -4.899007320404053, "logits/rejected": -4.827846050262451, "logps/chosen": -412.6627502441406, "logps/rejected": -280.35479736328125, "loss": 0.1502, "rewards/accuracies": 0.9375, "rewards/chosen": 4.863406181335449, "rewards/margins": 5.623223304748535, "rewards/rejected": -0.7598163485527039, "step": 4020 }, { "epoch": 1.84, "learning_rate": 1.2886859462201928e-07, "logits/chosen": -4.87841272354126, "logits/rejected": -4.805886268615723, "logps/chosen": -410.55291748046875, "logps/rejected": -282.27752685546875, "loss": 0.1321, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.271296977996826, "rewards/margins": 5.799007892608643, "rewards/rejected": -0.5277117490768433, "step": 4030 }, { "epoch": 1.84, "learning_rate": 1.2836123795027904e-07, "logits/chosen": -4.877324104309082, "logits/rejected": -4.812979221343994, "logps/chosen": -402.6602478027344, "logps/rejected": -278.23626708984375, "loss": 0.1139, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.5662970542907715, "rewards/margins": 5.044180393218994, "rewards/rejected": -0.477883517742157, "step": 4040 }, { "epoch": 1.85, "learning_rate": 1.278538812785388e-07, "logits/chosen": -4.874928951263428, "logits/rejected": -4.792891979217529, "logps/chosen": -434.03076171875, "logps/rejected": -280.65081787109375, "loss": 0.1528, "rewards/accuracies": 0.9375, "rewards/chosen": 5.873688697814941, "rewards/margins": 6.848775386810303, "rewards/rejected": -0.9750869870185852, "step": 4050 }, { "epoch": 1.85, "learning_rate": 1.2734652460679858e-07, "logits/chosen": -4.831969261169434, "logits/rejected": -4.752859592437744, "logps/chosen": -423.628173828125, "logps/rejected": -280.2437438964844, "loss": 0.1468, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.160457611083984, "rewards/margins": 5.989943027496338, "rewards/rejected": -0.8294855356216431, "step": 4060 }, { "epoch": 1.86, "learning_rate": 1.2683916793505834e-07, "logits/chosen": -4.858640670776367, "logits/rejected": -4.793429374694824, "logps/chosen": -389.1492614746094, "logps/rejected": -282.86358642578125, "loss": 0.1423, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.167361736297607, "rewards/margins": 5.107675552368164, "rewards/rejected": -0.9403136968612671, "step": 4070 }, { "epoch": 1.86, "learning_rate": 1.263318112633181e-07, "logits/chosen": -4.938597679138184, "logits/rejected": -4.8506178855896, "logps/chosen": -438.7210998535156, "logps/rejected": -276.87762451171875, "loss": 0.146, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.04889440536499, "rewards/margins": 5.49309778213501, "rewards/rejected": -0.44420352578163147, "step": 4080 }, { "epoch": 1.87, "learning_rate": 1.2582445459157788e-07, "logits/chosen": -4.941633224487305, "logits/rejected": -4.862156391143799, "logps/chosen": -424.306396484375, "logps/rejected": -270.0606384277344, "loss": 0.1496, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.29885196685791, "rewards/margins": 5.821725368499756, "rewards/rejected": -0.5228734016418457, "step": 4090 }, { "epoch": 1.87, "learning_rate": 1.2531709791983764e-07, "logits/chosen": -4.944653034210205, "logits/rejected": -4.859512805938721, "logps/chosen": -454.162109375, "logps/rejected": -292.4343566894531, "loss": 0.1364, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.076595306396484, "rewards/margins": 6.78316593170166, "rewards/rejected": -0.706570029258728, "step": 4100 }, { "epoch": 1.87, "eval_logits/chosen": -4.833913326263428, "eval_logits/rejected": -4.768759727478027, "eval_logps/chosen": -402.13677978515625, "eval_logps/rejected": -266.53778076171875, "eval_loss": 0.14637863636016846, "eval_rewards/accuracies": 0.9189944267272949, "eval_rewards/chosen": 4.883488178253174, "eval_rewards/margins": 5.508914947509766, "eval_rewards/rejected": -0.6254265904426575, "eval_runtime": 366.4995, "eval_samples_per_second": 7.809, "eval_steps_per_second": 0.488, "step": 4100 }, { "epoch": 1.88, "learning_rate": 1.248097412480974e-07, "logits/chosen": -4.897359371185303, "logits/rejected": -4.818748474121094, "logps/chosen": -443.74237060546875, "logps/rejected": -280.29144287109375, "loss": 0.1048, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.85573673248291, "rewards/margins": 5.737174987792969, "rewards/rejected": -0.8814382553100586, "step": 4110 }, { "epoch": 1.88, "learning_rate": 1.2430238457635718e-07, "logits/chosen": -4.865406036376953, "logits/rejected": -4.798456192016602, "logps/chosen": -418.35595703125, "logps/rejected": -285.4429626464844, "loss": 0.1314, "rewards/accuracies": 0.9375, "rewards/chosen": 4.99585485458374, "rewards/margins": 5.68489933013916, "rewards/rejected": -0.6890440583229065, "step": 4120 }, { "epoch": 1.89, "learning_rate": 1.2379502790461694e-07, "logits/chosen": -4.91237735748291, "logits/rejected": -4.826327323913574, "logps/chosen": -400.58203125, "logps/rejected": -254.1546630859375, "loss": 0.1552, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 5.029053688049316, "rewards/margins": 5.599238395690918, "rewards/rejected": -0.5701853036880493, "step": 4130 }, { "epoch": 1.89, "learning_rate": 1.232876712328767e-07, "logits/chosen": -4.88876485824585, "logits/rejected": -4.802396774291992, "logps/chosen": -420.10760498046875, "logps/rejected": -259.2742919921875, "loss": 0.1485, "rewards/accuracies": 0.875, "rewards/chosen": 5.9407057762146, "rewards/margins": 6.22561502456665, "rewards/rejected": -0.28490862250328064, "step": 4140 }, { "epoch": 1.89, "learning_rate": 1.2278031456113648e-07, "logits/chosen": -4.921999454498291, "logits/rejected": -4.829245567321777, "logps/chosen": -461.9033203125, "logps/rejected": -272.88494873046875, "loss": 0.1073, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.403397560119629, "rewards/margins": 6.932803153991699, "rewards/rejected": -0.5294064879417419, "step": 4150 }, { "epoch": 1.9, "learning_rate": 1.2227295788939624e-07, "logits/chosen": -4.910717487335205, "logits/rejected": -4.831747531890869, "logps/chosen": -425.89422607421875, "logps/rejected": -266.9718322753906, "loss": 0.1362, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.269554138183594, "rewards/margins": 5.832782745361328, "rewards/rejected": -0.5632286667823792, "step": 4160 }, { "epoch": 1.9, "learning_rate": 1.21765601217656e-07, "logits/chosen": -4.952414512634277, "logits/rejected": -4.885561943054199, "logps/chosen": -399.10546875, "logps/rejected": -277.2208251953125, "loss": 0.1123, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.1805524826049805, "rewards/margins": 5.1715192794799805, "rewards/rejected": 0.009033012203872204, "step": 4170 }, { "epoch": 1.91, "learning_rate": 1.2125824454591578e-07, "logits/chosen": -4.947776794433594, "logits/rejected": -4.872069835662842, "logps/chosen": -412.47064208984375, "logps/rejected": -277.737548828125, "loss": 0.1435, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.961889266967773, "rewards/margins": 6.1371026039123535, "rewards/rejected": -0.17521362006664276, "step": 4180 }, { "epoch": 1.91, "learning_rate": 1.2075088787417554e-07, "logits/chosen": -4.923088073730469, "logits/rejected": -4.850353240966797, "logps/chosen": -420.84619140625, "logps/rejected": -272.2101745605469, "loss": 0.1391, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.157086372375488, "rewards/margins": 5.8646931648254395, "rewards/rejected": -0.7076066732406616, "step": 4190 }, { "epoch": 1.92, "learning_rate": 1.202435312024353e-07, "logits/chosen": -4.8929548263549805, "logits/rejected": -4.824667453765869, "logps/chosen": -422.65753173828125, "logps/rejected": -286.6756591796875, "loss": 0.1472, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.241342067718506, "rewards/margins": 5.386134147644043, "rewards/rejected": -0.1447918862104416, "step": 4200 }, { "epoch": 1.92, "eval_logits/chosen": -4.840590953826904, "eval_logits/rejected": -4.777237892150879, "eval_logps/chosen": -401.89349365234375, "eval_logps/rejected": -266.2981262207031, "eval_loss": 0.14613699913024902, "eval_rewards/accuracies": 0.9189944267272949, "eval_rewards/chosen": 5.0051140785217285, "eval_rewards/margins": 5.5107221603393555, "eval_rewards/rejected": -0.5056081414222717, "eval_runtime": 331.4918, "eval_samples_per_second": 8.634, "eval_steps_per_second": 0.54, "step": 4200 }, { "epoch": 1.92, "learning_rate": 1.1973617453069508e-07, "logits/chosen": -4.915150165557861, "logits/rejected": -4.831782817840576, "logps/chosen": -438.6371154785156, "logps/rejected": -276.69061279296875, "loss": 0.111, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.81505823135376, "rewards/margins": 6.422762393951416, "rewards/rejected": -0.6077035665512085, "step": 4210 }, { "epoch": 1.93, "learning_rate": 1.1922881785895484e-07, "logits/chosen": -4.8684210777282715, "logits/rejected": -4.803809642791748, "logps/chosen": -408.12957763671875, "logps/rejected": -291.8279113769531, "loss": 0.1346, "rewards/accuracies": 0.9375, "rewards/chosen": 5.21033239364624, "rewards/margins": 5.362514495849609, "rewards/rejected": -0.15218181908130646, "step": 4220 }, { "epoch": 1.93, "learning_rate": 1.187214611872146e-07, "logits/chosen": -4.824395179748535, "logits/rejected": -4.7538557052612305, "logps/chosen": -413.96807861328125, "logps/rejected": -280.8396911621094, "loss": 0.115, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.84457540512085, "rewards/margins": 5.483084678649902, "rewards/rejected": -0.638509213924408, "step": 4230 }, { "epoch": 1.94, "learning_rate": 1.1821410451547436e-07, "logits/chosen": -4.886212348937988, "logits/rejected": -4.8206562995910645, "logps/chosen": -410.9671936035156, "logps/rejected": -279.1222229003906, "loss": 0.1604, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.444996356964111, "rewards/margins": 5.494571685791016, "rewards/rejected": -0.04957570880651474, "step": 4240 }, { "epoch": 1.94, "learning_rate": 1.1770674784373413e-07, "logits/chosen": -4.926669120788574, "logits/rejected": -4.878637313842773, "logps/chosen": -374.8077392578125, "logps/rejected": -283.6404113769531, "loss": 0.1503, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.317763328552246, "rewards/margins": 4.766018867492676, "rewards/rejected": -0.4482554495334625, "step": 4250 }, { "epoch": 1.94, "learning_rate": 1.171993911719939e-07, "logits/chosen": -4.914463043212891, "logits/rejected": -4.852496147155762, "logps/chosen": -399.0956115722656, "logps/rejected": -272.5018005371094, "loss": 0.1531, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.758635520935059, "rewards/margins": 5.472343921661377, "rewards/rejected": -0.7137077450752258, "step": 4260 }, { "epoch": 1.95, "learning_rate": 1.1669203450025366e-07, "logits/chosen": -4.904605865478516, "logits/rejected": -4.836545467376709, "logps/chosen": -430.7564392089844, "logps/rejected": -281.4437561035156, "loss": 0.1274, "rewards/accuracies": 0.9375, "rewards/chosen": 5.589218616485596, "rewards/margins": 5.915493965148926, "rewards/rejected": -0.326275497674942, "step": 4270 }, { "epoch": 1.95, "learning_rate": 1.1618467782851343e-07, "logits/chosen": -4.911101341247559, "logits/rejected": -4.833901405334473, "logps/chosen": -429.5088806152344, "logps/rejected": -282.09539794921875, "loss": 0.1069, "rewards/accuracies": 0.9375, "rewards/chosen": 5.597308158874512, "rewards/margins": 5.848171710968018, "rewards/rejected": -0.25086361169815063, "step": 4280 }, { "epoch": 1.96, "learning_rate": 1.156773211567732e-07, "logits/chosen": -4.888250827789307, "logits/rejected": -4.804991722106934, "logps/chosen": -405.09515380859375, "logps/rejected": -253.31884765625, "loss": 0.1295, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.265164852142334, "rewards/margins": 6.043637752532959, "rewards/rejected": -0.7784733176231384, "step": 4290 }, { "epoch": 1.96, "learning_rate": 1.1516996448503296e-07, "logits/chosen": -4.91660737991333, "logits/rejected": -4.83841609954834, "logps/chosen": -428.55523681640625, "logps/rejected": -282.8753967285156, "loss": 0.1187, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 6.493180274963379, "rewards/margins": 6.719484806060791, "rewards/rejected": -0.2263043373823166, "step": 4300 }, { "epoch": 1.96, "eval_logits/chosen": -4.896408557891846, "eval_logits/rejected": -4.827836036682129, "eval_logps/chosen": -401.7568664550781, "eval_logps/rejected": -266.26361083984375, "eval_loss": 0.1459536999464035, "eval_rewards/accuracies": 0.924580991268158, "eval_rewards/chosen": 5.07344388961792, "eval_rewards/margins": 5.561788558959961, "eval_rewards/rejected": -0.48834389448165894, "eval_runtime": 290.6078, "eval_samples_per_second": 9.848, "eval_steps_per_second": 0.616, "step": 4300 }, { "epoch": 1.97, "learning_rate": 1.1466260781329273e-07, "logits/chosen": -4.901521682739258, "logits/rejected": -4.831603050231934, "logps/chosen": -409.1568908691406, "logps/rejected": -277.5765380859375, "loss": 0.1325, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.250007629394531, "rewards/margins": 5.333972454071045, "rewards/rejected": -0.08396444469690323, "step": 4310 }, { "epoch": 1.97, "learning_rate": 1.141552511415525e-07, "logits/chosen": -4.90706729888916, "logits/rejected": -4.820285320281982, "logps/chosen": -437.1853942871094, "logps/rejected": -273.72088623046875, "loss": 0.1142, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 6.2223968505859375, "rewards/margins": 6.585951328277588, "rewards/rejected": -0.3635541796684265, "step": 4320 }, { "epoch": 1.98, "learning_rate": 1.1364789446981226e-07, "logits/chosen": -4.913697242736816, "logits/rejected": -4.836630821228027, "logps/chosen": -432.58551025390625, "logps/rejected": -287.257568359375, "loss": 0.1275, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.466434001922607, "rewards/margins": 6.108981609344482, "rewards/rejected": -0.6425477266311646, "step": 4330 }, { "epoch": 1.98, "learning_rate": 1.1314053779807203e-07, "logits/chosen": -4.862757682800293, "logits/rejected": -4.791149616241455, "logps/chosen": -390.869140625, "logps/rejected": -261.21527099609375, "loss": 0.1318, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.568188667297363, "rewards/margins": 5.716704368591309, "rewards/rejected": -0.14851494133472443, "step": 4340 }, { "epoch": 1.99, "learning_rate": 1.126331811263318e-07, "logits/chosen": -4.844531059265137, "logits/rejected": -4.767332553863525, "logps/chosen": -420.93377685546875, "logps/rejected": -269.09283447265625, "loss": 0.0957, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.1644134521484375, "rewards/margins": 5.494332790374756, "rewards/rejected": -0.32991963624954224, "step": 4350 }, { "epoch": 1.99, "learning_rate": 1.1212582445459156e-07, "logits/chosen": -4.85477352142334, "logits/rejected": -4.7679572105407715, "logps/chosen": -441.2588806152344, "logps/rejected": -272.7862243652344, "loss": 0.1319, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.857962608337402, "rewards/margins": 6.373206615447998, "rewards/rejected": -0.5152438879013062, "step": 4360 }, { "epoch": 1.99, "learning_rate": 1.1161846778285133e-07, "logits/chosen": -4.893786430358887, "logits/rejected": -4.816659927368164, "logps/chosen": -419.46112060546875, "logps/rejected": -273.40264892578125, "loss": 0.1377, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.0630292892456055, "rewards/margins": 6.531370639801025, "rewards/rejected": -0.4683413505554199, "step": 4370 }, { "epoch": 2.0, "learning_rate": 1.111111111111111e-07, "logits/chosen": -4.8774285316467285, "logits/rejected": -4.793211936950684, "logps/chosen": -430.4993591308594, "logps/rejected": -272.1214904785156, "loss": 0.1115, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.9691619873046875, "rewards/margins": 6.982408046722412, "rewards/rejected": -1.0132465362548828, "step": 4380 }, { "epoch": 2.0, "learning_rate": 1.1060375443937086e-07, "logits/chosen": -4.88178014755249, "logits/rejected": -4.799417018890381, "logps/chosen": -422.9811096191406, "logps/rejected": -260.16375732421875, "loss": 0.1214, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.532773017883301, "rewards/margins": 6.650575160980225, "rewards/rejected": -1.1178025007247925, "step": 4390 }, { "epoch": 2.01, "learning_rate": 1.1009639776763063e-07, "logits/chosen": -4.855304718017578, "logits/rejected": -4.792481422424316, "logps/chosen": -409.60467529296875, "logps/rejected": -278.6858825683594, "loss": 0.1212, "rewards/accuracies": 0.9375, "rewards/chosen": 4.45530891418457, "rewards/margins": 5.473141670227051, "rewards/rejected": -1.0178325176239014, "step": 4400 }, { "epoch": 2.01, "eval_logits/chosen": -4.8055524826049805, "eval_logits/rejected": -4.740081787109375, "eval_logps/chosen": -402.6217346191406, "eval_logps/rejected": -267.2349548339844, "eval_loss": 0.14755307137966156, "eval_rewards/accuracies": 0.9217877388000488, "eval_rewards/chosen": 4.641017913818359, "eval_rewards/margins": 5.6150312423706055, "eval_rewards/rejected": -0.9740130305290222, "eval_runtime": 407.4719, "eval_samples_per_second": 7.024, "eval_steps_per_second": 0.439, "step": 4400 }, { "epoch": 2.01, "learning_rate": 1.095890410958904e-07, "logits/chosen": -4.832887172698975, "logits/rejected": -4.76766300201416, "logps/chosen": -415.42987060546875, "logps/rejected": -284.4585876464844, "loss": 0.0847, "rewards/accuracies": 0.9375, "rewards/chosen": 5.4743242263793945, "rewards/margins": 6.31908655166626, "rewards/rejected": -0.8447621464729309, "step": 4410 }, { "epoch": 2.02, "learning_rate": 1.0908168442415016e-07, "logits/chosen": -4.877171039581299, "logits/rejected": -4.801630973815918, "logps/chosen": -392.74444580078125, "logps/rejected": -256.5757141113281, "loss": 0.1333, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.546787261962891, "rewards/margins": 6.340831279754639, "rewards/rejected": -0.7940441370010376, "step": 4420 }, { "epoch": 2.02, "learning_rate": 1.0857432775240993e-07, "logits/chosen": -4.875015735626221, "logits/rejected": -4.812748908996582, "logps/chosen": -398.28424072265625, "logps/rejected": -279.46771240234375, "loss": 0.1235, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.245759010314941, "rewards/margins": 5.775721073150635, "rewards/rejected": -0.5299624800682068, "step": 4430 }, { "epoch": 2.03, "learning_rate": 1.080669710806697e-07, "logits/chosen": -4.868747711181641, "logits/rejected": -4.790489673614502, "logps/chosen": -415.05499267578125, "logps/rejected": -266.7918701171875, "loss": 0.1004, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.509837627410889, "rewards/margins": 6.501837730407715, "rewards/rejected": -0.9920004606246948, "step": 4440 }, { "epoch": 2.03, "learning_rate": 1.0755961440892946e-07, "logits/chosen": -4.854556083679199, "logits/rejected": -4.787578105926514, "logps/chosen": -388.72918701171875, "logps/rejected": -261.45428466796875, "loss": 0.1198, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.813766002655029, "rewards/margins": 5.6340436935424805, "rewards/rejected": -0.8202775120735168, "step": 4450 }, { "epoch": 2.04, "learning_rate": 1.0705225773718923e-07, "logits/chosen": -4.866793155670166, "logits/rejected": -4.800551414489746, "logps/chosen": -405.4156188964844, "logps/rejected": -269.2408447265625, "loss": 0.111, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.056650638580322, "rewards/margins": 5.393206596374512, "rewards/rejected": -0.3365555703639984, "step": 4460 }, { "epoch": 2.04, "learning_rate": 1.06544901065449e-07, "logits/chosen": -4.907196998596191, "logits/rejected": -4.8301897048950195, "logps/chosen": -425.328369140625, "logps/rejected": -273.4461364746094, "loss": 0.1283, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.11956262588501, "rewards/margins": 5.604405403137207, "rewards/rejected": -0.4848434329032898, "step": 4470 }, { "epoch": 2.04, "learning_rate": 1.0603754439370876e-07, "logits/chosen": -4.924157619476318, "logits/rejected": -4.832226753234863, "logps/chosen": -442.025390625, "logps/rejected": -270.24188232421875, "loss": 0.0991, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.797433853149414, "rewards/margins": 6.504227638244629, "rewards/rejected": -0.7067936658859253, "step": 4480 }, { "epoch": 2.05, "learning_rate": 1.0553018772196853e-07, "logits/chosen": -4.893965721130371, "logits/rejected": -4.816378593444824, "logps/chosen": -409.865478515625, "logps/rejected": -273.190185546875, "loss": 0.0891, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.488694190979004, "rewards/margins": 6.04306697845459, "rewards/rejected": -0.5543726682662964, "step": 4490 }, { "epoch": 2.05, "learning_rate": 1.050228310502283e-07, "logits/chosen": -4.908883094787598, "logits/rejected": -4.841439247131348, "logps/chosen": -389.5849609375, "logps/rejected": -276.5188903808594, "loss": 0.0998, "rewards/accuracies": 0.9375, "rewards/chosen": 4.790728569030762, "rewards/margins": 5.585550785064697, "rewards/rejected": -0.7948225140571594, "step": 4500 }, { "epoch": 2.05, "eval_logits/chosen": -4.845737457275391, "eval_logits/rejected": -4.777454853057861, "eval_logps/chosen": -402.12298583984375, "eval_logps/rejected": -266.8114318847656, "eval_loss": 0.14526955783367157, "eval_rewards/accuracies": 0.9189944267272949, "eval_rewards/chosen": 4.89035701751709, "eval_rewards/margins": 5.652605056762695, "eval_rewards/rejected": -0.7622482776641846, "eval_runtime": 232.2669, "eval_samples_per_second": 12.322, "eval_steps_per_second": 0.771, "step": 4500 }, { "epoch": 2.06, "learning_rate": 1.0451547437848806e-07, "logits/chosen": -4.921597480773926, "logits/rejected": -4.830770969390869, "logps/chosen": -452.7974548339844, "logps/rejected": -281.0848083496094, "loss": 0.1077, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.788046836853027, "rewards/margins": 6.4459686279296875, "rewards/rejected": -0.6579217314720154, "step": 4510 }, { "epoch": 2.06, "learning_rate": 1.0400811770674783e-07, "logits/chosen": -4.917908191680908, "logits/rejected": -4.845015525817871, "logps/chosen": -399.94134521484375, "logps/rejected": -270.0165100097656, "loss": 0.1105, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.640042781829834, "rewards/margins": 6.789925575256348, "rewards/rejected": -1.1498830318450928, "step": 4520 }, { "epoch": 2.07, "learning_rate": 1.035007610350076e-07, "logits/chosen": -4.9208574295043945, "logits/rejected": -4.846704959869385, "logps/chosen": -412.40155029296875, "logps/rejected": -275.77789306640625, "loss": 0.1351, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.865345001220703, "rewards/margins": 5.185548305511475, "rewards/rejected": -0.32020360231399536, "step": 4530 }, { "epoch": 2.07, "learning_rate": 1.0299340436326736e-07, "logits/chosen": -4.863717079162598, "logits/rejected": -4.809562683105469, "logps/chosen": -378.0865173339844, "logps/rejected": -285.5669860839844, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.7488555908203125, "rewards/margins": 4.648155212402344, "rewards/rejected": 0.10070104897022247, "step": 4540 }, { "epoch": 2.08, "learning_rate": 1.0248604769152713e-07, "logits/chosen": -4.855767250061035, "logits/rejected": -4.789978981018066, "logps/chosen": -391.6422119140625, "logps/rejected": -278.743896484375, "loss": 0.1256, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.670050621032715, "rewards/margins": 5.278546333312988, "rewards/rejected": -0.6084957122802734, "step": 4550 }, { "epoch": 2.08, "learning_rate": 1.019786910197869e-07, "logits/chosen": -4.913547039031982, "logits/rejected": -4.842885971069336, "logps/chosen": -384.5548400878906, "logps/rejected": -265.2785949707031, "loss": 0.1008, "rewards/accuracies": 0.9375, "rewards/chosen": 4.434451103210449, "rewards/margins": 5.179271697998047, "rewards/rejected": -0.7448214292526245, "step": 4560 }, { "epoch": 2.09, "learning_rate": 1.0147133434804666e-07, "logits/chosen": -4.921236991882324, "logits/rejected": -4.828045845031738, "logps/chosen": -440.589111328125, "logps/rejected": -270.3564758300781, "loss": 0.1068, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.477619647979736, "rewards/margins": 6.183921813964844, "rewards/rejected": -0.7063025236129761, "step": 4570 }, { "epoch": 2.09, "learning_rate": 1.0096397767630643e-07, "logits/chosen": -4.879881381988525, "logits/rejected": -4.796624183654785, "logps/chosen": -423.9095764160156, "logps/rejected": -261.0993347167969, "loss": 0.1008, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 6.055180549621582, "rewards/margins": 7.1468000411987305, "rewards/rejected": -1.0916197299957275, "step": 4580 }, { "epoch": 2.1, "learning_rate": 1.004566210045662e-07, "logits/chosen": -4.90405797958374, "logits/rejected": -4.833564758300781, "logps/chosen": -415.65447998046875, "logps/rejected": -277.6321716308594, "loss": 0.1151, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.455759525299072, "rewards/margins": 5.79727840423584, "rewards/rejected": -0.3415187895298004, "step": 4590 }, { "epoch": 2.1, "learning_rate": 9.994926433282596e-08, "logits/chosen": -4.893523693084717, "logits/rejected": -4.815844535827637, "logps/chosen": -403.8277282714844, "logps/rejected": -273.79046630859375, "loss": 0.1119, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.801535606384277, "rewards/margins": 6.423213005065918, "rewards/rejected": -0.6216781139373779, "step": 4600 }, { "epoch": 2.1, "eval_logits/chosen": -4.837458610534668, "eval_logits/rejected": -4.76613187789917, "eval_logps/chosen": -402.1892395019531, "eval_logps/rejected": -266.9855651855469, "eval_loss": 0.1471233069896698, "eval_rewards/accuracies": 0.924580991268158, "eval_rewards/chosen": 4.857246398925781, "eval_rewards/margins": 5.706577301025391, "eval_rewards/rejected": -0.8493306040763855, "eval_runtime": 215.7586, "eval_samples_per_second": 13.265, "eval_steps_per_second": 0.83, "step": 4600 }, { "epoch": 2.1, "learning_rate": 9.944190766108573e-08, "logits/chosen": -4.939009666442871, "logits/rejected": -4.856883525848389, "logps/chosen": -412.0589904785156, "logps/rejected": -266.16448974609375, "loss": 0.1158, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.313650608062744, "rewards/margins": 5.450104713439941, "rewards/rejected": -0.13645382225513458, "step": 4610 }, { "epoch": 2.11, "learning_rate": 9.89345509893455e-08, "logits/chosen": -4.901943683624268, "logits/rejected": -4.835053443908691, "logps/chosen": -394.895751953125, "logps/rejected": -275.56329345703125, "loss": 0.1291, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.285827159881592, "rewards/margins": 6.178305625915527, "rewards/rejected": -0.8924779891967773, "step": 4620 }, { "epoch": 2.11, "learning_rate": 9.842719431760526e-08, "logits/chosen": -4.90523624420166, "logits/rejected": -4.812392234802246, "logps/chosen": -436.1763610839844, "logps/rejected": -273.03594970703125, "loss": 0.1156, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.966449737548828, "rewards/margins": 6.913215637207031, "rewards/rejected": -0.9467660188674927, "step": 4630 }, { "epoch": 2.12, "learning_rate": 9.791983764586503e-08, "logits/chosen": -4.946971416473389, "logits/rejected": -4.850774765014648, "logps/chosen": -411.6910095214844, "logps/rejected": -251.10546875, "loss": 0.1445, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.642851829528809, "rewards/margins": 6.211692810058594, "rewards/rejected": -0.5688411593437195, "step": 4640 }, { "epoch": 2.12, "learning_rate": 9.74124809741248e-08, "logits/chosen": -4.972862243652344, "logits/rejected": -4.887436389923096, "logps/chosen": -412.7035217285156, "logps/rejected": -272.003173828125, "loss": 0.1126, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.466536521911621, "rewards/margins": 5.860616683959961, "rewards/rejected": -0.3940798342227936, "step": 4650 }, { "epoch": 2.13, "learning_rate": 9.690512430238456e-08, "logits/chosen": -4.986374855041504, "logits/rejected": -4.896953582763672, "logps/chosen": -429.959716796875, "logps/rejected": -268.0804443359375, "loss": 0.111, "rewards/accuracies": 0.9375, "rewards/chosen": 5.6851701736450195, "rewards/margins": 6.596620082855225, "rewards/rejected": -0.9114507436752319, "step": 4660 }, { "epoch": 2.13, "learning_rate": 9.639776763064433e-08, "logits/chosen": -4.944240570068359, "logits/rejected": -4.866330146789551, "logps/chosen": -427.99749755859375, "logps/rejected": -284.3707275390625, "loss": 0.1333, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.510575771331787, "rewards/margins": 5.576759338378906, "rewards/rejected": -0.06618337333202362, "step": 4670 }, { "epoch": 2.14, "learning_rate": 9.58904109589041e-08, "logits/chosen": -4.952332973480225, "logits/rejected": -4.85488224029541, "logps/chosen": -419.88812255859375, "logps/rejected": -261.99444580078125, "loss": 0.1208, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.377404689788818, "rewards/margins": 5.8292460441589355, "rewards/rejected": -0.4518415331840515, "step": 4680 }, { "epoch": 2.14, "learning_rate": 9.538305428716386e-08, "logits/chosen": -4.9548492431640625, "logits/rejected": -4.875304222106934, "logps/chosen": -414.1785583496094, "logps/rejected": -275.9156494140625, "loss": 0.1223, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.936046600341797, "rewards/margins": 6.323944568634033, "rewards/rejected": -0.3878982961177826, "step": 4690 }, { "epoch": 2.15, "learning_rate": 9.487569761542363e-08, "logits/chosen": -4.965857028961182, "logits/rejected": -4.880849361419678, "logps/chosen": -418.05999755859375, "logps/rejected": -257.09100341796875, "loss": 0.1175, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.957980155944824, "rewards/margins": 6.946159362792969, "rewards/rejected": -0.9881793260574341, "step": 4700 }, { "epoch": 2.15, "eval_logits/chosen": -4.895074844360352, "eval_logits/rejected": -4.825559139251709, "eval_logps/chosen": -402.114013671875, "eval_logps/rejected": -266.9460144042969, "eval_loss": 0.14797915518283844, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.894859313964844, "eval_rewards/margins": 5.724398136138916, "eval_rewards/rejected": -0.829538881778717, "eval_runtime": 356.2315, "eval_samples_per_second": 8.034, "eval_steps_per_second": 0.502, "step": 4700 }, { "epoch": 2.15, "learning_rate": 9.43683409436834e-08, "logits/chosen": -4.9392876625061035, "logits/rejected": -4.874741077423096, "logps/chosen": -420.13690185546875, "logps/rejected": -293.1278991699219, "loss": 0.1058, "rewards/accuracies": 0.9375, "rewards/chosen": 5.169350624084473, "rewards/margins": 6.15454626083374, "rewards/rejected": -0.9851959347724915, "step": 4710 }, { "epoch": 2.15, "learning_rate": 9.386098427194316e-08, "logits/chosen": -4.939810752868652, "logits/rejected": -4.855282306671143, "logps/chosen": -423.61328125, "logps/rejected": -267.7987060546875, "loss": 0.1017, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.260304927825928, "rewards/margins": 6.520499229431152, "rewards/rejected": -1.2601946592330933, "step": 4720 }, { "epoch": 2.16, "learning_rate": 9.335362760020293e-08, "logits/chosen": -4.978135585784912, "logits/rejected": -4.8993000984191895, "logps/chosen": -400.39129638671875, "logps/rejected": -260.77081298828125, "loss": 0.1389, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.9541046619415283, "rewards/margins": 4.892237186431885, "rewards/rejected": -0.9381322860717773, "step": 4730 }, { "epoch": 2.16, "learning_rate": 9.28462709284627e-08, "logits/chosen": -4.93670129776001, "logits/rejected": -4.858290672302246, "logps/chosen": -414.9359436035156, "logps/rejected": -265.9009704589844, "loss": 0.1035, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.426181793212891, "rewards/margins": 6.721365451812744, "rewards/rejected": -1.2951838970184326, "step": 4740 }, { "epoch": 2.17, "learning_rate": 9.233891425672246e-08, "logits/chosen": -4.93917179107666, "logits/rejected": -4.859288215637207, "logps/chosen": -411.41552734375, "logps/rejected": -267.6401062011719, "loss": 0.1212, "rewards/accuracies": 0.9375, "rewards/chosen": 5.8256120681762695, "rewards/margins": 6.604338645935059, "rewards/rejected": -0.7787266969680786, "step": 4750 }, { "epoch": 2.17, "learning_rate": 9.183155758498223e-08, "logits/chosen": -4.946847915649414, "logits/rejected": -4.87341833114624, "logps/chosen": -402.25726318359375, "logps/rejected": -271.4898376464844, "loss": 0.1086, "rewards/accuracies": 0.9375, "rewards/chosen": 4.882460594177246, "rewards/margins": 6.579829216003418, "rewards/rejected": -1.6973682641983032, "step": 4760 }, { "epoch": 2.18, "learning_rate": 9.1324200913242e-08, "logits/chosen": -4.974339485168457, "logits/rejected": -4.901873588562012, "logps/chosen": -422.5555114746094, "logps/rejected": -292.73785400390625, "loss": 0.1235, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.000344276428223, "rewards/margins": 4.982012748718262, "rewards/rejected": 0.018331050872802734, "step": 4770 }, { "epoch": 2.18, "learning_rate": 9.081684424150176e-08, "logits/chosen": -4.9155731201171875, "logits/rejected": -4.839664936065674, "logps/chosen": -430.61029052734375, "logps/rejected": -284.0818786621094, "loss": 0.1027, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.856447696685791, "rewards/margins": 6.118809223175049, "rewards/rejected": -0.2623615264892578, "step": 4780 }, { "epoch": 2.19, "learning_rate": 9.030948756976153e-08, "logits/chosen": -4.933501243591309, "logits/rejected": -4.844796180725098, "logps/chosen": -442.38104248046875, "logps/rejected": -264.9707946777344, "loss": 0.1029, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.808096408843994, "rewards/margins": 7.046914577484131, "rewards/rejected": -1.2388185262680054, "step": 4790 }, { "epoch": 2.19, "learning_rate": 8.98021308980213e-08, "logits/chosen": -4.953664779663086, "logits/rejected": -4.878441333770752, "logps/chosen": -418.5375061035156, "logps/rejected": -277.14776611328125, "loss": 0.1046, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.459037780761719, "rewards/margins": 6.463744163513184, "rewards/rejected": -1.0047073364257812, "step": 4800 }, { "epoch": 2.19, "eval_logits/chosen": -4.850941181182861, "eval_logits/rejected": -4.784369468688965, "eval_logps/chosen": -401.92584228515625, "eval_logps/rejected": -266.7252197265625, "eval_loss": 0.14572064578533173, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 4.9889421463012695, "eval_rewards/margins": 5.708088397979736, "eval_rewards/rejected": -0.7191460728645325, "eval_runtime": 173.7003, "eval_samples_per_second": 16.477, "eval_steps_per_second": 1.031, "step": 4800 }, { "epoch": 2.2, "learning_rate": 8.929477422628106e-08, "logits/chosen": -4.928905010223389, "logits/rejected": -4.8359527587890625, "logps/chosen": -431.77972412109375, "logps/rejected": -276.5912170410156, "loss": 0.1126, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.947134971618652, "rewards/margins": 6.539161682128906, "rewards/rejected": -0.5920271873474121, "step": 4810 }, { "epoch": 2.2, "learning_rate": 8.878741755454083e-08, "logits/chosen": -4.963539123535156, "logits/rejected": -4.876279354095459, "logps/chosen": -433.887939453125, "logps/rejected": -278.1951599121094, "loss": 0.1367, "rewards/accuracies": 0.9375, "rewards/chosen": 4.715191841125488, "rewards/margins": 5.488936424255371, "rewards/rejected": -0.7737458944320679, "step": 4820 }, { "epoch": 2.2, "learning_rate": 8.82800608828006e-08, "logits/chosen": -4.9363884925842285, "logits/rejected": -4.8639750480651855, "logps/chosen": -415.55029296875, "logps/rejected": -270.5118408203125, "loss": 0.1137, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.740364074707031, "rewards/margins": 5.3627543449401855, "rewards/rejected": -0.6223903894424438, "step": 4830 }, { "epoch": 2.21, "learning_rate": 8.777270421106036e-08, "logits/chosen": -4.976495742797852, "logits/rejected": -4.903439044952393, "logps/chosen": -407.7938537597656, "logps/rejected": -274.5249938964844, "loss": 0.114, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.366100311279297, "rewards/margins": 5.371488571166992, "rewards/rejected": -1.0053876638412476, "step": 4840 }, { "epoch": 2.21, "learning_rate": 8.726534753932013e-08, "logits/chosen": -4.981869697570801, "logits/rejected": -4.908095359802246, "logps/chosen": -394.3970031738281, "logps/rejected": -271.2975769042969, "loss": 0.1384, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 4.310720443725586, "rewards/margins": 5.318100929260254, "rewards/rejected": -1.007380485534668, "step": 4850 }, { "epoch": 2.22, "learning_rate": 8.67579908675799e-08, "logits/chosen": -4.953721523284912, "logits/rejected": -4.888033866882324, "logps/chosen": -395.9389343261719, "logps/rejected": -272.9427490234375, "loss": 0.1087, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.902317523956299, "rewards/margins": 5.503976345062256, "rewards/rejected": -0.6016592979431152, "step": 4860 }, { "epoch": 2.22, "learning_rate": 8.625063419583966e-08, "logits/chosen": -4.893526077270508, "logits/rejected": -4.820248603820801, "logps/chosen": -396.91314697265625, "logps/rejected": -264.3607482910156, "loss": 0.1006, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.5225019454956055, "rewards/margins": 5.815639972686768, "rewards/rejected": -0.29313772916793823, "step": 4870 }, { "epoch": 2.23, "learning_rate": 8.574327752409943e-08, "logits/chosen": -4.871951103210449, "logits/rejected": -4.804513454437256, "logps/chosen": -386.0025939941406, "logps/rejected": -271.83380126953125, "loss": 0.1045, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.69227933883667, "rewards/margins": 5.2295684814453125, "rewards/rejected": -0.5372893810272217, "step": 4880 }, { "epoch": 2.23, "learning_rate": 8.52359208523592e-08, "logits/chosen": -4.943947792053223, "logits/rejected": -4.854506015777588, "logps/chosen": -434.2711486816406, "logps/rejected": -269.70501708984375, "loss": 0.1344, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.765451908111572, "rewards/margins": 6.52508544921875, "rewards/rejected": -0.7596337199211121, "step": 4890 }, { "epoch": 2.24, "learning_rate": 8.472856418061896e-08, "logits/chosen": -4.933685779571533, "logits/rejected": -4.845057487487793, "logps/chosen": -436.9561462402344, "logps/rejected": -271.1663513183594, "loss": 0.1267, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.287613868713379, "rewards/margins": 6.664238929748535, "rewards/rejected": -1.3766252994537354, "step": 4900 }, { "epoch": 2.24, "eval_logits/chosen": -4.891152858734131, "eval_logits/rejected": -4.82438325881958, "eval_logps/chosen": -403.1356201171875, "eval_logps/rejected": -267.7229919433594, "eval_loss": 0.14908429980278015, "eval_rewards/accuracies": 0.9189944267272949, "eval_rewards/chosen": 4.384059429168701, "eval_rewards/margins": 5.602104187011719, "eval_rewards/rejected": -1.2180449962615967, "eval_runtime": 316.8538, "eval_samples_per_second": 9.033, "eval_steps_per_second": 0.565, "step": 4900 }, { "epoch": 2.24, "learning_rate": 8.422120750887873e-08, "logits/chosen": -4.950315475463867, "logits/rejected": -4.866333484649658, "logps/chosen": -439.230224609375, "logps/rejected": -265.25885009765625, "loss": 0.1387, "rewards/accuracies": 0.9375, "rewards/chosen": 5.89914608001709, "rewards/margins": 6.5950212478637695, "rewards/rejected": -0.6958745718002319, "step": 4910 }, { "epoch": 2.25, "learning_rate": 8.37138508371385e-08, "logits/chosen": -4.966004371643066, "logits/rejected": -4.884432792663574, "logps/chosen": -426.314208984375, "logps/rejected": -273.4166259765625, "loss": 0.1246, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.195490837097168, "rewards/margins": 7.2494049072265625, "rewards/rejected": -1.0539133548736572, "step": 4920 }, { "epoch": 2.25, "learning_rate": 8.320649416539826e-08, "logits/chosen": -4.949981689453125, "logits/rejected": -4.871314525604248, "logps/chosen": -419.2696838378906, "logps/rejected": -283.3854064941406, "loss": 0.12, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.112128257751465, "rewards/margins": 5.861917972564697, "rewards/rejected": -0.7497900724411011, "step": 4930 }, { "epoch": 2.25, "learning_rate": 8.269913749365803e-08, "logits/chosen": -4.916971683502197, "logits/rejected": -4.840315818786621, "logps/chosen": -438.3779296875, "logps/rejected": -290.32135009765625, "loss": 0.127, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.964548587799072, "rewards/margins": 5.371851444244385, "rewards/rejected": -0.40730294585227966, "step": 4940 }, { "epoch": 2.26, "learning_rate": 8.21917808219178e-08, "logits/chosen": -4.940596580505371, "logits/rejected": -4.865884304046631, "logps/chosen": -429.7864685058594, "logps/rejected": -292.4789123535156, "loss": 0.1182, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.005914688110352, "rewards/margins": 6.13593053817749, "rewards/rejected": -1.1300158500671387, "step": 4950 }, { "epoch": 2.26, "learning_rate": 8.168442415017756e-08, "logits/chosen": -4.933268070220947, "logits/rejected": -4.852160453796387, "logps/chosen": -415.7066955566406, "logps/rejected": -264.57794189453125, "loss": 0.1318, "rewards/accuracies": 0.9375, "rewards/chosen": 4.843461513519287, "rewards/margins": 6.281542778015137, "rewards/rejected": -1.4380815029144287, "step": 4960 }, { "epoch": 2.27, "learning_rate": 8.117706747843733e-08, "logits/chosen": -4.920778751373291, "logits/rejected": -4.845213890075684, "logps/chosen": -412.7632751464844, "logps/rejected": -272.46380615234375, "loss": 0.1058, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.523646354675293, "rewards/margins": 6.303154945373535, "rewards/rejected": -0.7795082330703735, "step": 4970 }, { "epoch": 2.27, "learning_rate": 8.06697108066971e-08, "logits/chosen": -4.894315719604492, "logits/rejected": -4.812371730804443, "logps/chosen": -428.7547912597656, "logps/rejected": -265.3595886230469, "loss": 0.1227, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.306768894195557, "rewards/margins": 6.813757419586182, "rewards/rejected": -1.506988286972046, "step": 4980 }, { "epoch": 2.28, "learning_rate": 8.016235413495687e-08, "logits/chosen": -4.961252689361572, "logits/rejected": -4.900317192077637, "logps/chosen": -395.6136779785156, "logps/rejected": -286.63641357421875, "loss": 0.1083, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.203457355499268, "rewards/margins": 5.187814235687256, "rewards/rejected": -0.9843567609786987, "step": 4990 }, { "epoch": 2.28, "learning_rate": 7.965499746321664e-08, "logits/chosen": -4.975726127624512, "logits/rejected": -4.887692451477051, "logps/chosen": -439.3357849121094, "logps/rejected": -289.05224609375, "loss": 0.1188, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.263838768005371, "rewards/margins": 6.590150356292725, "rewards/rejected": -1.3263108730316162, "step": 5000 }, { "epoch": 2.28, "eval_logits/chosen": -4.869111061096191, "eval_logits/rejected": -4.801842212677002, "eval_logps/chosen": -402.1961364746094, "eval_logps/rejected": -266.8502197265625, "eval_loss": 0.14445415139198303, "eval_rewards/accuracies": 0.9189944267272949, "eval_rewards/chosen": 4.853791236877441, "eval_rewards/margins": 5.635433197021484, "eval_rewards/rejected": -0.7816421389579773, "eval_runtime": 437.1418, "eval_samples_per_second": 6.547, "eval_steps_per_second": 0.409, "step": 5000 }, { "epoch": 2.29, "learning_rate": 7.91476407914764e-08, "logits/chosen": -4.935378074645996, "logits/rejected": -4.862700462341309, "logps/chosen": -403.7070617675781, "logps/rejected": -269.7569885253906, "loss": 0.1273, "rewards/accuracies": 0.9375, "rewards/chosen": 4.781562805175781, "rewards/margins": 5.172652244567871, "rewards/rejected": -0.3910895884037018, "step": 5010 }, { "epoch": 2.29, "learning_rate": 7.864028411973617e-08, "logits/chosen": -4.970273494720459, "logits/rejected": -4.896665573120117, "logps/chosen": -400.79632568359375, "logps/rejected": -263.92791748046875, "loss": 0.1373, "rewards/accuracies": 0.9375, "rewards/chosen": 5.150163173675537, "rewards/margins": 5.754698753356934, "rewards/rejected": -0.6045348644256592, "step": 5020 }, { "epoch": 2.3, "learning_rate": 7.813292744799594e-08, "logits/chosen": -4.989663124084473, "logits/rejected": -4.913241386413574, "logps/chosen": -426.0763244628906, "logps/rejected": -281.7149963378906, "loss": 0.0901, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.004095554351807, "rewards/margins": 6.267375469207764, "rewards/rejected": -1.2632801532745361, "step": 5030 }, { "epoch": 2.3, "learning_rate": 7.76255707762557e-08, "logits/chosen": -4.9605841636657715, "logits/rejected": -4.892775535583496, "logps/chosen": -379.1793212890625, "logps/rejected": -268.114013671875, "loss": 0.1281, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.635950088500977, "rewards/margins": 5.475337505340576, "rewards/rejected": -0.8393872380256653, "step": 5040 }, { "epoch": 2.31, "learning_rate": 7.711821410451547e-08, "logits/chosen": -5.007980823516846, "logits/rejected": -4.9325408935546875, "logps/chosen": -418.32080078125, "logps/rejected": -274.6629943847656, "loss": 0.116, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.806796550750732, "rewards/margins": 6.233861446380615, "rewards/rejected": -1.4270646572113037, "step": 5050 }, { "epoch": 2.31, "learning_rate": 7.661085743277524e-08, "logits/chosen": -4.988902568817139, "logits/rejected": -4.908154010772705, "logps/chosen": -438.2625427246094, "logps/rejected": -292.6121520996094, "loss": 0.1328, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.4749250411987305, "rewards/margins": 6.453294277191162, "rewards/rejected": -0.978369414806366, "step": 5060 }, { "epoch": 2.31, "learning_rate": 7.6103500761035e-08, "logits/chosen": -4.9929094314575195, "logits/rejected": -4.919339656829834, "logps/chosen": -413.50262451171875, "logps/rejected": -273.2449645996094, "loss": 0.1284, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.636375904083252, "rewards/margins": 5.5855913162231445, "rewards/rejected": -0.949215292930603, "step": 5070 }, { "epoch": 2.32, "learning_rate": 7.559614408929477e-08, "logits/chosen": -5.008554935455322, "logits/rejected": -4.917429447174072, "logps/chosen": -442.903076171875, "logps/rejected": -281.75323486328125, "loss": 0.1189, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 6.027169704437256, "rewards/margins": 6.616377830505371, "rewards/rejected": -0.5892075300216675, "step": 5080 }, { "epoch": 2.32, "learning_rate": 7.508878741755454e-08, "logits/chosen": -5.009249687194824, "logits/rejected": -4.92385721206665, "logps/chosen": -425.30706787109375, "logps/rejected": -267.21820068359375, "loss": 0.1067, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.628747463226318, "rewards/margins": 6.654224395751953, "rewards/rejected": -1.0254766941070557, "step": 5090 }, { "epoch": 2.33, "learning_rate": 7.45814307458143e-08, "logits/chosen": -5.019815921783447, "logits/rejected": -4.9420037269592285, "logps/chosen": -432.5726013183594, "logps/rejected": -287.12005615234375, "loss": 0.1105, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.014256954193115, "rewards/margins": 5.804014205932617, "rewards/rejected": -0.789757251739502, "step": 5100 }, { "epoch": 2.33, "eval_logits/chosen": -4.93497896194458, "eval_logits/rejected": -4.86862850189209, "eval_logps/chosen": -402.595947265625, "eval_logps/rejected": -267.2577819824219, "eval_loss": 0.14500565826892853, "eval_rewards/accuracies": 0.9217877388000488, "eval_rewards/chosen": 4.653924465179443, "eval_rewards/margins": 5.6393327713012695, "eval_rewards/rejected": -0.9854086637496948, "eval_runtime": 594.4869, "eval_samples_per_second": 4.814, "eval_steps_per_second": 0.301, "step": 5100 }, { "epoch": 2.33, "learning_rate": 7.407407407407407e-08, "logits/chosen": -4.959694862365723, "logits/rejected": -4.8935346603393555, "logps/chosen": -391.87176513671875, "logps/rejected": -270.29730224609375, "loss": 0.125, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.914012432098389, "rewards/margins": 5.5261101722717285, "rewards/rejected": -0.6120975613594055, "step": 5110 }, { "epoch": 2.34, "learning_rate": 7.356671740233384e-08, "logits/chosen": -4.94987678527832, "logits/rejected": -4.862252235412598, "logps/chosen": -424.09149169921875, "logps/rejected": -259.6130065917969, "loss": 0.1419, "rewards/accuracies": 0.9375, "rewards/chosen": 4.669945240020752, "rewards/margins": 5.864947319030762, "rewards/rejected": -1.195001482963562, "step": 5120 }, { "epoch": 2.34, "learning_rate": 7.30593607305936e-08, "logits/chosen": -4.980747222900391, "logits/rejected": -4.911506175994873, "logps/chosen": -418.5074157714844, "logps/rejected": -274.6866760253906, "loss": 0.1258, "rewards/accuracies": 0.9375, "rewards/chosen": 5.17288064956665, "rewards/margins": 6.053205490112305, "rewards/rejected": -0.8803244829177856, "step": 5130 }, { "epoch": 2.35, "learning_rate": 7.255200405885337e-08, "logits/chosen": -4.95458459854126, "logits/rejected": -4.880162239074707, "logps/chosen": -413.6949768066406, "logps/rejected": -281.94561767578125, "loss": 0.096, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.372581481933594, "rewards/margins": 5.800955772399902, "rewards/rejected": -0.428374707698822, "step": 5140 }, { "epoch": 2.35, "learning_rate": 7.204464738711314e-08, "logits/chosen": -4.942924976348877, "logits/rejected": -4.8632330894470215, "logps/chosen": -431.28924560546875, "logps/rejected": -274.4268493652344, "loss": 0.141, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.9338178634643555, "rewards/margins": 6.473837375640869, "rewards/rejected": -1.540019154548645, "step": 5150 }, { "epoch": 2.36, "learning_rate": 7.15372907153729e-08, "logits/chosen": -4.985091686248779, "logits/rejected": -4.901397705078125, "logps/chosen": -423.67169189453125, "logps/rejected": -274.3668518066406, "loss": 0.129, "rewards/accuracies": 0.9375, "rewards/chosen": 4.892068386077881, "rewards/margins": 6.101632118225098, "rewards/rejected": -1.2095637321472168, "step": 5160 }, { "epoch": 2.36, "learning_rate": 7.102993404363267e-08, "logits/chosen": -4.965261459350586, "logits/rejected": -4.888654708862305, "logps/chosen": -422.13800048828125, "logps/rejected": -282.96722412109375, "loss": 0.0986, "rewards/accuracies": 0.9375, "rewards/chosen": 6.372607231140137, "rewards/margins": 6.842642784118652, "rewards/rejected": -0.470036119222641, "step": 5170 }, { "epoch": 2.36, "learning_rate": 7.052257737189244e-08, "logits/chosen": -4.946279048919678, "logits/rejected": -4.878521919250488, "logps/chosen": -404.4757995605469, "logps/rejected": -267.77203369140625, "loss": 0.1076, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.3603105545043945, "rewards/margins": 6.353577613830566, "rewards/rejected": -0.9932675361633301, "step": 5180 }, { "epoch": 2.37, "learning_rate": 7.00152207001522e-08, "logits/chosen": -4.992409706115723, "logits/rejected": -4.913617134094238, "logps/chosen": -414.3451232910156, "logps/rejected": -273.63525390625, "loss": 0.1248, "rewards/accuracies": 1.0, "rewards/chosen": 5.477243900299072, "rewards/margins": 6.704034328460693, "rewards/rejected": -1.2267907857894897, "step": 5190 }, { "epoch": 2.37, "learning_rate": 6.950786402841197e-08, "logits/chosen": -4.954291343688965, "logits/rejected": -4.882170677185059, "logps/chosen": -417.62652587890625, "logps/rejected": -272.7351989746094, "loss": 0.1213, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.817889213562012, "rewards/margins": 6.062017917633057, "rewards/rejected": -1.2441282272338867, "step": 5200 }, { "epoch": 2.37, "eval_logits/chosen": -4.933072566986084, "eval_logits/rejected": -4.866455554962158, "eval_logps/chosen": -402.8253173828125, "eval_logps/rejected": -267.4176940917969, "eval_loss": 0.1474716067314148, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 4.539218425750732, "eval_rewards/margins": 5.604598045349121, "eval_rewards/rejected": -1.0653802156448364, "eval_runtime": 212.8053, "eval_samples_per_second": 13.449, "eval_steps_per_second": 0.841, "step": 5200 }, { "epoch": 2.38, "learning_rate": 6.900050735667174e-08, "logits/chosen": -4.9929986000061035, "logits/rejected": -4.919256687164307, "logps/chosen": -409.965576171875, "logps/rejected": -276.0762634277344, "loss": 0.1193, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.851831436157227, "rewards/margins": 6.132826805114746, "rewards/rejected": -1.2809956073760986, "step": 5210 }, { "epoch": 2.38, "learning_rate": 6.84931506849315e-08, "logits/chosen": -4.972918510437012, "logits/rejected": -4.911537170410156, "logps/chosen": -388.3201904296875, "logps/rejected": -291.0181884765625, "loss": 0.1161, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.178013801574707, "rewards/margins": 5.637327671051025, "rewards/rejected": -0.4593137800693512, "step": 5220 }, { "epoch": 2.39, "learning_rate": 6.798579401319127e-08, "logits/chosen": -4.959627151489258, "logits/rejected": -4.897181510925293, "logps/chosen": -419.61761474609375, "logps/rejected": -291.63226318359375, "loss": 0.1144, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.961781978607178, "rewards/margins": 6.368288993835449, "rewards/rejected": -0.4065069258213043, "step": 5230 }, { "epoch": 2.39, "learning_rate": 6.747843734145104e-08, "logits/chosen": -4.9389519691467285, "logits/rejected": -4.871059417724609, "logps/chosen": -386.10595703125, "logps/rejected": -257.45196533203125, "loss": 0.1104, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.882128715515137, "rewards/margins": 5.749431610107422, "rewards/rejected": -0.8673030734062195, "step": 5240 }, { "epoch": 2.4, "learning_rate": 6.69710806697108e-08, "logits/chosen": -4.975493907928467, "logits/rejected": -4.8933796882629395, "logps/chosen": -429.8955993652344, "logps/rejected": -275.78204345703125, "loss": 0.1294, "rewards/accuracies": 0.9375, "rewards/chosen": 5.631252288818359, "rewards/margins": 6.5381364822387695, "rewards/rejected": -0.9068845510482788, "step": 5250 }, { "epoch": 2.4, "learning_rate": 6.646372399797057e-08, "logits/chosen": -4.975050926208496, "logits/rejected": -4.8958740234375, "logps/chosen": -392.5599670410156, "logps/rejected": -250.8602752685547, "loss": 0.1026, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.6310811042785645, "rewards/margins": 5.290577411651611, "rewards/rejected": -0.6594969034194946, "step": 5260 }, { "epoch": 2.41, "learning_rate": 6.595636732623034e-08, "logits/chosen": -4.960488319396973, "logits/rejected": -4.890093803405762, "logps/chosen": -410.0890197753906, "logps/rejected": -278.31829833984375, "loss": 0.1051, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.8370361328125, "rewards/margins": 5.981797218322754, "rewards/rejected": -1.1447603702545166, "step": 5270 }, { "epoch": 2.41, "learning_rate": 6.54490106544901e-08, "logits/chosen": -4.961228370666504, "logits/rejected": -4.8835906982421875, "logps/chosen": -413.35595703125, "logps/rejected": -274.21234130859375, "loss": 0.1078, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.265295505523682, "rewards/margins": 6.561466217041016, "rewards/rejected": -1.2961704730987549, "step": 5280 }, { "epoch": 2.41, "learning_rate": 6.494165398274987e-08, "logits/chosen": -4.9524312019348145, "logits/rejected": -4.86895227432251, "logps/chosen": -428.94073486328125, "logps/rejected": -281.37200927734375, "loss": 0.1085, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.9870829582214355, "rewards/margins": 5.82766056060791, "rewards/rejected": -0.8405774235725403, "step": 5290 }, { "epoch": 2.42, "learning_rate": 6.443429731100964e-08, "logits/chosen": -4.969119071960449, "logits/rejected": -4.8904876708984375, "logps/chosen": -431.27734375, "logps/rejected": -287.9958801269531, "loss": 0.1193, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.379242420196533, "rewards/margins": 6.459146976470947, "rewards/rejected": -1.079904317855835, "step": 5300 }, { "epoch": 2.42, "eval_logits/chosen": -4.900157928466797, "eval_logits/rejected": -4.835659027099609, "eval_logps/chosen": -402.1291809082031, "eval_logps/rejected": -266.9618835449219, "eval_loss": 0.14745400846004486, "eval_rewards/accuracies": 0.924580991268158, "eval_rewards/chosen": 4.887284278869629, "eval_rewards/margins": 5.724755764007568, "eval_rewards/rejected": -0.8374713659286499, "eval_runtime": 316.686, "eval_samples_per_second": 9.037, "eval_steps_per_second": 0.565, "step": 5300 }, { "epoch": 2.42, "learning_rate": 6.39269406392694e-08, "logits/chosen": -4.965531349182129, "logits/rejected": -4.895511627197266, "logps/chosen": -407.11553955078125, "logps/rejected": -281.6117248535156, "loss": 0.1023, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.846563339233398, "rewards/margins": 5.87354040145874, "rewards/rejected": -1.0269768238067627, "step": 5310 }, { "epoch": 2.43, "learning_rate": 6.341958396752917e-08, "logits/chosen": -4.973780155181885, "logits/rejected": -4.89611291885376, "logps/chosen": -422.46435546875, "logps/rejected": -273.09124755859375, "loss": 0.1182, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.556332111358643, "rewards/margins": 6.558829307556152, "rewards/rejected": -1.0024974346160889, "step": 5320 }, { "epoch": 2.43, "learning_rate": 6.291222729578894e-08, "logits/chosen": -4.963789939880371, "logits/rejected": -4.8901238441467285, "logps/chosen": -406.53155517578125, "logps/rejected": -271.52783203125, "loss": 0.1037, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.972264289855957, "rewards/margins": 5.207543849945068, "rewards/rejected": -0.23527908325195312, "step": 5330 }, { "epoch": 2.44, "learning_rate": 6.24048706240487e-08, "logits/chosen": -4.944197177886963, "logits/rejected": -4.8659281730651855, "logps/chosen": -425.10589599609375, "logps/rejected": -270.50213623046875, "loss": 0.1128, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.5039496421813965, "rewards/margins": 5.7906622886657715, "rewards/rejected": -0.2867124676704407, "step": 5340 }, { "epoch": 2.44, "learning_rate": 6.189751395230847e-08, "logits/chosen": -4.947718620300293, "logits/rejected": -4.868649005889893, "logps/chosen": -441.27728271484375, "logps/rejected": -282.53460693359375, "loss": 0.1027, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.473177433013916, "rewards/margins": 6.102600574493408, "rewards/rejected": -0.6294231414794922, "step": 5350 }, { "epoch": 2.45, "learning_rate": 6.139015728056824e-08, "logits/chosen": -4.957098960876465, "logits/rejected": -4.88673210144043, "logps/chosen": -407.8499450683594, "logps/rejected": -267.80670166015625, "loss": 0.1045, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.3863525390625, "rewards/margins": 6.200811862945557, "rewards/rejected": -0.8144596219062805, "step": 5360 }, { "epoch": 2.45, "learning_rate": 6.0882800608828e-08, "logits/chosen": -4.947127342224121, "logits/rejected": -4.883804798126221, "logps/chosen": -394.66302490234375, "logps/rejected": -266.9524841308594, "loss": 0.1145, "rewards/accuracies": 0.9375, "rewards/chosen": 4.919065952301025, "rewards/margins": 5.902022361755371, "rewards/rejected": -0.9829570651054382, "step": 5370 }, { "epoch": 2.46, "learning_rate": 6.037544393708777e-08, "logits/chosen": -4.983623027801514, "logits/rejected": -4.911072731018066, "logps/chosen": -418.916748046875, "logps/rejected": -283.73876953125, "loss": 0.0938, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.571389675140381, "rewards/margins": 6.318342685699463, "rewards/rejected": -0.746953010559082, "step": 5380 }, { "epoch": 2.46, "learning_rate": 5.986808726534754e-08, "logits/chosen": -4.970729827880859, "logits/rejected": -4.888007164001465, "logps/chosen": -437.43194580078125, "logps/rejected": -271.5250244140625, "loss": 0.1342, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.516953468322754, "rewards/margins": 6.336077690124512, "rewards/rejected": -0.8191248178482056, "step": 5390 }, { "epoch": 2.46, "learning_rate": 5.93607305936073e-08, "logits/chosen": -4.959385871887207, "logits/rejected": -4.888867378234863, "logps/chosen": -413.42218017578125, "logps/rejected": -270.72247314453125, "loss": 0.1084, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.037797927856445, "rewards/margins": 5.420968055725098, "rewards/rejected": -0.383169949054718, "step": 5400 }, { "epoch": 2.46, "eval_logits/chosen": -4.938099384307861, "eval_logits/rejected": -4.87076997756958, "eval_logps/chosen": -402.3046875, "eval_logps/rejected": -267.1296081542969, "eval_loss": 0.1477077156305313, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 4.799520015716553, "eval_rewards/margins": 5.720855236053467, "eval_rewards/rejected": -0.9213350415229797, "eval_runtime": 395.4676, "eval_samples_per_second": 7.237, "eval_steps_per_second": 0.453, "step": 5400 }, { "epoch": 2.47, "learning_rate": 5.8853373921867065e-08, "logits/chosen": -5.001718997955322, "logits/rejected": -4.934907913208008, "logps/chosen": -420.31329345703125, "logps/rejected": -301.7361145019531, "loss": 0.0944, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.106191635131836, "rewards/margins": 5.415187835693359, "rewards/rejected": -0.30899596214294434, "step": 5410 }, { "epoch": 2.47, "learning_rate": 5.834601725012683e-08, "logits/chosen": -4.9787421226501465, "logits/rejected": -4.9013142585754395, "logps/chosen": -408.85003662109375, "logps/rejected": -270.8046875, "loss": 0.1252, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.473206996917725, "rewards/margins": 6.279058456420898, "rewards/rejected": -0.8058512806892395, "step": 5420 }, { "epoch": 2.48, "learning_rate": 5.78386605783866e-08, "logits/chosen": -4.959836006164551, "logits/rejected": -4.878729343414307, "logps/chosen": -417.4236755371094, "logps/rejected": -265.082275390625, "loss": 0.1029, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.735897064208984, "rewards/margins": 6.742075443267822, "rewards/rejected": -1.0061780214309692, "step": 5430 }, { "epoch": 2.48, "learning_rate": 5.7331303906646365e-08, "logits/chosen": -4.958975791931152, "logits/rejected": -4.8921217918396, "logps/chosen": -402.37310791015625, "logps/rejected": -279.6372375488281, "loss": 0.1202, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.42415714263916, "rewards/margins": 6.3349432945251465, "rewards/rejected": -0.9107854962348938, "step": 5440 }, { "epoch": 2.49, "learning_rate": 5.682394723490613e-08, "logits/chosen": -4.968079566955566, "logits/rejected": -4.881294250488281, "logps/chosen": -424.2637634277344, "logps/rejected": -271.9144287109375, "loss": 0.0975, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 6.050034999847412, "rewards/margins": 6.865139961242676, "rewards/rejected": -0.8151048421859741, "step": 5450 }, { "epoch": 2.49, "learning_rate": 5.63165905631659e-08, "logits/chosen": -4.966390132904053, "logits/rejected": -4.891523838043213, "logps/chosen": -437.28350830078125, "logps/rejected": -289.5374450683594, "loss": 0.1079, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.479045391082764, "rewards/margins": 6.213299751281738, "rewards/rejected": -0.7342538833618164, "step": 5460 }, { "epoch": 2.5, "learning_rate": 5.5809233891425665e-08, "logits/chosen": -4.956564426422119, "logits/rejected": -4.872923851013184, "logps/chosen": -402.48114013671875, "logps/rejected": -252.98526000976562, "loss": 0.1148, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.949686050415039, "rewards/margins": 5.65186882019043, "rewards/rejected": -0.702182948589325, "step": 5470 }, { "epoch": 2.5, "learning_rate": 5.530187721968543e-08, "logits/chosen": -4.92812442779541, "logits/rejected": -4.847708225250244, "logps/chosen": -441.1371154785156, "logps/rejected": -278.5785827636719, "loss": 0.1136, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.350710868835449, "rewards/margins": 5.618605136871338, "rewards/rejected": -0.2678944170475006, "step": 5480 }, { "epoch": 2.51, "learning_rate": 5.47945205479452e-08, "logits/chosen": -4.9335246086120605, "logits/rejected": -4.854302883148193, "logps/chosen": -442.29498291015625, "logps/rejected": -281.7381896972656, "loss": 0.1285, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.096883296966553, "rewards/margins": 5.944703578948975, "rewards/rejected": -0.8478206396102905, "step": 5490 }, { "epoch": 2.51, "learning_rate": 5.4287163876204964e-08, "logits/chosen": -4.93631649017334, "logits/rejected": -4.85787296295166, "logps/chosen": -409.46722412109375, "logps/rejected": -280.10858154296875, "loss": 0.103, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.503188133239746, "rewards/margins": 6.457062721252441, "rewards/rejected": -0.9538741111755371, "step": 5500 }, { "epoch": 2.51, "eval_logits/chosen": -4.8687028884887695, "eval_logits/rejected": -4.8037109375, "eval_logps/chosen": -401.94757080078125, "eval_logps/rejected": -266.9614562988281, "eval_loss": 0.14503486454486847, "eval_rewards/accuracies": 0.924580991268158, "eval_rewards/chosen": 4.978078365325928, "eval_rewards/margins": 5.815342903137207, "eval_rewards/rejected": -0.8372648358345032, "eval_runtime": 220.297, "eval_samples_per_second": 12.992, "eval_steps_per_second": 0.813, "step": 5500 }, { "epoch": 2.52, "learning_rate": 5.377980720446473e-08, "logits/chosen": -4.932882785797119, "logits/rejected": -4.865015506744385, "logps/chosen": -395.84515380859375, "logps/rejected": -263.9732666015625, "loss": 0.1068, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.530322074890137, "rewards/margins": 5.451108455657959, "rewards/rejected": -0.9207857251167297, "step": 5510 }, { "epoch": 2.52, "learning_rate": 5.32724505327245e-08, "logits/chosen": -4.9130330085754395, "logits/rejected": -4.8336992263793945, "logps/chosen": -427.09429931640625, "logps/rejected": -282.5550537109375, "loss": 0.1302, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.095369338989258, "rewards/margins": 6.372256278991699, "rewards/rejected": -0.2768869400024414, "step": 5520 }, { "epoch": 2.52, "learning_rate": 5.2765093860984264e-08, "logits/chosen": -4.922102451324463, "logits/rejected": -4.859912395477295, "logps/chosen": -412.48236083984375, "logps/rejected": -288.0726013183594, "loss": 0.1168, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.046185493469238, "rewards/margins": 5.882688999176025, "rewards/rejected": -0.8365031480789185, "step": 5530 }, { "epoch": 2.53, "learning_rate": 5.225773718924403e-08, "logits/chosen": -4.9300947189331055, "logits/rejected": -4.848336219787598, "logps/chosen": -421.8499450683594, "logps/rejected": -263.25823974609375, "loss": 0.1126, "rewards/accuracies": 0.9375, "rewards/chosen": 5.161808490753174, "rewards/margins": 6.059567451477051, "rewards/rejected": -0.8977592587471008, "step": 5540 }, { "epoch": 2.53, "learning_rate": 5.17503805175038e-08, "logits/chosen": -4.886312961578369, "logits/rejected": -4.823468208312988, "logps/chosen": -404.383544921875, "logps/rejected": -279.4756774902344, "loss": 0.1209, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.301919460296631, "rewards/margins": 6.089094638824463, "rewards/rejected": -0.7871745824813843, "step": 5550 }, { "epoch": 2.54, "learning_rate": 5.1243023845763564e-08, "logits/chosen": -4.916286468505859, "logits/rejected": -4.837094306945801, "logps/chosen": -438.546630859375, "logps/rejected": -281.37890625, "loss": 0.1276, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.071647644042969, "rewards/margins": 6.549139499664307, "rewards/rejected": -1.4774912595748901, "step": 5560 }, { "epoch": 2.54, "learning_rate": 5.073566717402333e-08, "logits/chosen": -4.9400153160095215, "logits/rejected": -4.875535488128662, "logps/chosen": -398.88922119140625, "logps/rejected": -273.6789245605469, "loss": 0.1357, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.947793006896973, "rewards/margins": 6.04646635055542, "rewards/rejected": -1.0986732244491577, "step": 5570 }, { "epoch": 2.55, "learning_rate": 5.02283105022831e-08, "logits/chosen": -4.936364650726318, "logits/rejected": -4.851689338684082, "logps/chosen": -442.80694580078125, "logps/rejected": -274.3759460449219, "loss": 0.1153, "rewards/accuracies": 0.9375, "rewards/chosen": 4.710545539855957, "rewards/margins": 5.9929609298706055, "rewards/rejected": -1.2824147939682007, "step": 5580 }, { "epoch": 2.55, "learning_rate": 4.9720953830542864e-08, "logits/chosen": -4.929666042327881, "logits/rejected": -4.867574691772461, "logps/chosen": -394.205810546875, "logps/rejected": -287.29443359375, "loss": 0.1011, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.268439292907715, "rewards/margins": 5.93007755279541, "rewards/rejected": -1.6616379022598267, "step": 5590 }, { "epoch": 2.56, "learning_rate": 4.921359715880263e-08, "logits/chosen": -4.90685510635376, "logits/rejected": -4.830386161804199, "logps/chosen": -415.8175354003906, "logps/rejected": -262.47979736328125, "loss": 0.1032, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.596839427947998, "rewards/margins": 6.292741298675537, "rewards/rejected": -0.6959022283554077, "step": 5600 }, { "epoch": 2.56, "eval_logits/chosen": -4.857320308685303, "eval_logits/rejected": -4.7928385734558105, "eval_logps/chosen": -402.0453796386719, "eval_logps/rejected": -267.193603515625, "eval_loss": 0.14494411647319794, "eval_rewards/accuracies": 0.9217877388000488, "eval_rewards/chosen": 4.929175853729248, "eval_rewards/margins": 5.882509708404541, "eval_rewards/rejected": -0.9533332586288452, "eval_runtime": 312.7265, "eval_samples_per_second": 9.152, "eval_steps_per_second": 0.572, "step": 5600 }, { "epoch": 2.56, "learning_rate": 4.87062404870624e-08, "logits/chosen": -4.920735836029053, "logits/rejected": -4.844926357269287, "logps/chosen": -416.06134033203125, "logps/rejected": -265.2876892089844, "loss": 0.1033, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.746376991271973, "rewards/margins": 6.716986656188965, "rewards/rejected": -0.9706104397773743, "step": 5610 }, { "epoch": 2.57, "learning_rate": 4.8198883815322164e-08, "logits/chosen": -4.943557262420654, "logits/rejected": -4.8767476081848145, "logps/chosen": -409.99713134765625, "logps/rejected": -282.8511047363281, "loss": 0.1296, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.584543704986572, "rewards/margins": 6.185892581939697, "rewards/rejected": -1.6013485193252563, "step": 5620 }, { "epoch": 2.57, "learning_rate": 4.769152714358193e-08, "logits/chosen": -4.94488000869751, "logits/rejected": -4.87455940246582, "logps/chosen": -401.20867919921875, "logps/rejected": -279.72503662109375, "loss": 0.087, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.61879825592041, "rewards/margins": 6.583451271057129, "rewards/rejected": -0.9646530151367188, "step": 5630 }, { "epoch": 2.57, "learning_rate": 4.71841704718417e-08, "logits/chosen": -4.935222625732422, "logits/rejected": -4.860478401184082, "logps/chosen": -412.70147705078125, "logps/rejected": -268.69183349609375, "loss": 0.1155, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.908216953277588, "rewards/margins": 6.139021873474121, "rewards/rejected": -1.2308051586151123, "step": 5640 }, { "epoch": 2.58, "learning_rate": 4.6676813800101464e-08, "logits/chosen": -4.93255090713501, "logits/rejected": -4.851061820983887, "logps/chosen": -420.4076232910156, "logps/rejected": -278.3027648925781, "loss": 0.1041, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.2539520263671875, "rewards/margins": 5.82532262802124, "rewards/rejected": -0.5713706016540527, "step": 5650 }, { "epoch": 2.58, "learning_rate": 4.616945712836123e-08, "logits/chosen": -4.932182312011719, "logits/rejected": -4.848535060882568, "logps/chosen": -425.7295837402344, "logps/rejected": -274.24737548828125, "loss": 0.1067, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.0354838371276855, "rewards/margins": 6.304403781890869, "rewards/rejected": -1.2689199447631836, "step": 5660 }, { "epoch": 2.59, "learning_rate": 4.5662100456621e-08, "logits/chosen": -4.925627708435059, "logits/rejected": -4.847448825836182, "logps/chosen": -428.55914306640625, "logps/rejected": -278.5158386230469, "loss": 0.0993, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.9800214767456055, "rewards/margins": 5.693356990814209, "rewards/rejected": -0.7133355736732483, "step": 5670 }, { "epoch": 2.59, "learning_rate": 4.5154743784880764e-08, "logits/chosen": -4.920600414276123, "logits/rejected": -4.8634185791015625, "logps/chosen": -384.6602478027344, "logps/rejected": -289.63226318359375, "loss": 0.1246, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.5014824867248535, "rewards/margins": 6.372673034667969, "rewards/rejected": -0.8711902499198914, "step": 5680 }, { "epoch": 2.6, "learning_rate": 4.464738711314053e-08, "logits/chosen": -4.920612812042236, "logits/rejected": -4.844324588775635, "logps/chosen": -405.53350830078125, "logps/rejected": -262.6418762207031, "loss": 0.1035, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.387974739074707, "rewards/margins": 6.786190986633301, "rewards/rejected": -1.3982162475585938, "step": 5690 }, { "epoch": 2.6, "learning_rate": 4.41400304414003e-08, "logits/chosen": -4.894991397857666, "logits/rejected": -4.81160831451416, "logps/chosen": -431.01336669921875, "logps/rejected": -267.3921813964844, "loss": 0.1076, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.905381679534912, "rewards/margins": 7.270198822021484, "rewards/rejected": -1.3648170232772827, "step": 5700 }, { "epoch": 2.6, "eval_logits/chosen": -4.836541652679443, "eval_logits/rejected": -4.771533966064453, "eval_logps/chosen": -401.5494384765625, "eval_logps/rejected": -266.7731628417969, "eval_loss": 0.14462918043136597, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 5.177152156829834, "eval_rewards/margins": 5.920259475708008, "eval_rewards/rejected": -0.7431077361106873, "eval_runtime": 402.7678, "eval_samples_per_second": 7.106, "eval_steps_per_second": 0.444, "step": 5700 }, { "epoch": 2.61, "learning_rate": 4.3632673769660064e-08, "logits/chosen": -4.9254231452941895, "logits/rejected": -4.837775230407715, "logps/chosen": -443.3357849121094, "logps/rejected": -287.3525695800781, "loss": 0.1073, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.151810646057129, "rewards/margins": 6.514415740966797, "rewards/rejected": -0.3626047670841217, "step": 5710 }, { "epoch": 2.61, "learning_rate": 4.312531709791983e-08, "logits/chosen": -4.93767786026001, "logits/rejected": -4.873133182525635, "logps/chosen": -424.30621337890625, "logps/rejected": -294.3512878417969, "loss": 0.1316, "rewards/accuracies": 0.9375, "rewards/chosen": 5.4539055824279785, "rewards/margins": 5.966094970703125, "rewards/rejected": -0.5121897459030151, "step": 5720 }, { "epoch": 2.62, "learning_rate": 4.26179604261796e-08, "logits/chosen": -4.926243782043457, "logits/rejected": -4.848898887634277, "logps/chosen": -392.4992370605469, "logps/rejected": -257.875, "loss": 0.1342, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.8231072425842285, "rewards/margins": 5.60843563079834, "rewards/rejected": -0.7853285670280457, "step": 5730 }, { "epoch": 2.62, "learning_rate": 4.2110603754439363e-08, "logits/chosen": -4.899616241455078, "logits/rejected": -4.8267107009887695, "logps/chosen": -401.23779296875, "logps/rejected": -265.3388671875, "loss": 0.1116, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.27294921875, "rewards/margins": 6.229853630065918, "rewards/rejected": -0.9569042921066284, "step": 5740 }, { "epoch": 2.62, "learning_rate": 4.160324708269913e-08, "logits/chosen": -4.9103875160217285, "logits/rejected": -4.834478855133057, "logps/chosen": -424.2123107910156, "logps/rejected": -290.844482421875, "loss": 0.1207, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.291749000549316, "rewards/margins": 6.304253578186035, "rewards/rejected": -1.0125043392181396, "step": 5750 }, { "epoch": 2.63, "learning_rate": 4.10958904109589e-08, "logits/chosen": -4.9463348388671875, "logits/rejected": -4.874329566955566, "logps/chosen": -398.1748962402344, "logps/rejected": -274.3821105957031, "loss": 0.1322, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.786447048187256, "rewards/margins": 5.323861598968506, "rewards/rejected": -0.5374141335487366, "step": 5760 }, { "epoch": 2.63, "learning_rate": 4.0588533739218663e-08, "logits/chosen": -4.921504020690918, "logits/rejected": -4.838692665100098, "logps/chosen": -407.69598388671875, "logps/rejected": -244.59878540039062, "loss": 0.0942, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.630707740783691, "rewards/margins": 6.862261772155762, "rewards/rejected": -1.2315542697906494, "step": 5770 }, { "epoch": 2.64, "learning_rate": 4.0081177067478437e-08, "logits/chosen": -4.943502902984619, "logits/rejected": -4.860692977905273, "logps/chosen": -428.2420959472656, "logps/rejected": -271.67279052734375, "loss": 0.1281, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.6189165115356445, "rewards/margins": 6.388761043548584, "rewards/rejected": -0.7698448896408081, "step": 5780 }, { "epoch": 2.64, "learning_rate": 3.95738203957382e-08, "logits/chosen": -4.926566123962402, "logits/rejected": -4.880103588104248, "logps/chosen": -365.5216064453125, "logps/rejected": -281.39166259765625, "loss": 0.1381, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.30989933013916, "rewards/margins": 5.030462741851807, "rewards/rejected": -0.7205637097358704, "step": 5790 }, { "epoch": 2.65, "learning_rate": 3.906646372399797e-08, "logits/chosen": -4.919816017150879, "logits/rejected": -4.836727619171143, "logps/chosen": -431.1061096191406, "logps/rejected": -268.39898681640625, "loss": 0.1048, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.920688629150391, "rewards/margins": 6.9955949783325195, "rewards/rejected": -1.074906587600708, "step": 5800 }, { "epoch": 2.65, "eval_logits/chosen": -4.882045745849609, "eval_logits/rejected": -4.8149495124816895, "eval_logps/chosen": -402.25494384765625, "eval_logps/rejected": -267.357666015625, "eval_loss": 0.14384929835796356, "eval_rewards/accuracies": 0.9217877388000488, "eval_rewards/chosen": 4.824397563934326, "eval_rewards/margins": 5.8597588539123535, "eval_rewards/rejected": -1.035361647605896, "eval_runtime": 511.6331, "eval_samples_per_second": 5.594, "eval_steps_per_second": 0.35, "step": 5800 }, { "epoch": 2.65, "learning_rate": 3.8559107052257736e-08, "logits/chosen": -4.932694435119629, "logits/rejected": -4.868044853210449, "logps/chosen": -369.7104187011719, "logps/rejected": -263.9983825683594, "loss": 0.118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.549539089202881, "rewards/margins": 5.914671421051025, "rewards/rejected": -1.3651320934295654, "step": 5810 }, { "epoch": 2.66, "learning_rate": 3.80517503805175e-08, "logits/chosen": -4.9247589111328125, "logits/rejected": -4.842148303985596, "logps/chosen": -451.47613525390625, "logps/rejected": -299.9072570800781, "loss": 0.1147, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.102829933166504, "rewards/margins": 6.0603227615356445, "rewards/rejected": -0.957493007183075, "step": 5820 }, { "epoch": 2.66, "learning_rate": 3.754439370877727e-08, "logits/chosen": -4.9485979080200195, "logits/rejected": -4.8723297119140625, "logps/chosen": -414.8936462402344, "logps/rejected": -272.13006591796875, "loss": 0.1107, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 4.821213722229004, "rewards/margins": 6.178685188293457, "rewards/rejected": -1.3574720621109009, "step": 5830 }, { "epoch": 2.67, "learning_rate": 3.7037037037037036e-08, "logits/chosen": -4.985118865966797, "logits/rejected": -4.914344787597656, "logps/chosen": -398.11767578125, "logps/rejected": -280.87689208984375, "loss": 0.0689, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.160405158996582, "rewards/margins": 5.994385242462158, "rewards/rejected": -0.8339805603027344, "step": 5840 }, { "epoch": 2.67, "learning_rate": 3.65296803652968e-08, "logits/chosen": -4.962017059326172, "logits/rejected": -4.886102676391602, "logps/chosen": -402.51251220703125, "logps/rejected": -269.8755187988281, "loss": 0.1096, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.846424579620361, "rewards/margins": 5.706874847412109, "rewards/rejected": -0.8604499697685242, "step": 5850 }, { "epoch": 2.67, "learning_rate": 3.602232369355657e-08, "logits/chosen": -4.986819267272949, "logits/rejected": -4.910802364349365, "logps/chosen": -417.13238525390625, "logps/rejected": -273.75091552734375, "loss": 0.0969, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.2972612380981445, "rewards/margins": 6.706035614013672, "rewards/rejected": -1.4087746143341064, "step": 5860 }, { "epoch": 2.68, "learning_rate": 3.5514967021816336e-08, "logits/chosen": -4.986905097961426, "logits/rejected": -4.905413627624512, "logps/chosen": -434.11175537109375, "logps/rejected": -291.5127868652344, "loss": 0.1526, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.724877834320068, "rewards/margins": 6.2800164222717285, "rewards/rejected": -0.5551384687423706, "step": 5870 }, { "epoch": 2.68, "learning_rate": 3.50076103500761e-08, "logits/chosen": -4.9583234786987305, "logits/rejected": -4.886194229125977, "logps/chosen": -429.7208557128906, "logps/rejected": -291.4436950683594, "loss": 0.0906, "rewards/accuracies": 0.9375, "rewards/chosen": 5.845227241516113, "rewards/margins": 6.951490879058838, "rewards/rejected": -1.1062636375427246, "step": 5880 }, { "epoch": 2.69, "learning_rate": 3.450025367833587e-08, "logits/chosen": -4.956194877624512, "logits/rejected": -4.8850250244140625, "logps/chosen": -412.7322692871094, "logps/rejected": -281.173583984375, "loss": 0.1224, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.013502597808838, "rewards/margins": 6.392571449279785, "rewards/rejected": -1.3790686130523682, "step": 5890 }, { "epoch": 2.69, "learning_rate": 3.3992897006595636e-08, "logits/chosen": -4.961340427398682, "logits/rejected": -4.886641025543213, "logps/chosen": -420.67578125, "logps/rejected": -287.6502380371094, "loss": 0.0975, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.895220756530762, "rewards/margins": 6.38983678817749, "rewards/rejected": -1.4946151971817017, "step": 5900 }, { "epoch": 2.69, "eval_logits/chosen": -4.89227819442749, "eval_logits/rejected": -4.824686050415039, "eval_logps/chosen": -402.537841796875, "eval_logps/rejected": -267.58331298828125, "eval_loss": 0.14460624754428864, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.682952880859375, "eval_rewards/margins": 5.831155776977539, "eval_rewards/rejected": -1.1482025384902954, "eval_runtime": 212.6503, "eval_samples_per_second": 13.459, "eval_steps_per_second": 0.842, "step": 5900 }, { "epoch": 2.7, "learning_rate": 3.34855403348554e-08, "logits/chosen": -4.941916465759277, "logits/rejected": -4.857578277587891, "logps/chosen": -428.84588623046875, "logps/rejected": -268.8080139160156, "loss": 0.1051, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.618954658508301, "rewards/margins": 7.224569797515869, "rewards/rejected": -1.605615258216858, "step": 5910 }, { "epoch": 2.7, "learning_rate": 3.297818366311517e-08, "logits/chosen": -4.932914733886719, "logits/rejected": -4.868758201599121, "logps/chosen": -410.5572204589844, "logps/rejected": -283.00396728515625, "loss": 0.1044, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.094294548034668, "rewards/margins": 5.818046569824219, "rewards/rejected": -0.7237521409988403, "step": 5920 }, { "epoch": 2.71, "learning_rate": 3.2470826991374936e-08, "logits/chosen": -4.950967788696289, "logits/rejected": -4.868668079376221, "logps/chosen": -438.07470703125, "logps/rejected": -281.27191162109375, "loss": 0.1106, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.914918899536133, "rewards/margins": 6.703428745269775, "rewards/rejected": -0.7885104417800903, "step": 5930 }, { "epoch": 2.71, "learning_rate": 3.19634703196347e-08, "logits/chosen": -4.9339470863342285, "logits/rejected": -4.853811264038086, "logps/chosen": -437.03143310546875, "logps/rejected": -292.36798095703125, "loss": 0.1152, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.951041221618652, "rewards/margins": 5.891929626464844, "rewards/rejected": -0.9408879280090332, "step": 5940 }, { "epoch": 2.72, "learning_rate": 3.145611364789447e-08, "logits/chosen": -4.921416282653809, "logits/rejected": -4.844716548919678, "logps/chosen": -398.63458251953125, "logps/rejected": -273.3787536621094, "loss": 0.1185, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.037073612213135, "rewards/margins": 6.602618217468262, "rewards/rejected": -1.5655443668365479, "step": 5950 }, { "epoch": 2.72, "learning_rate": 3.0948756976154236e-08, "logits/chosen": -4.922245025634766, "logits/rejected": -4.844263553619385, "logps/chosen": -409.8789978027344, "logps/rejected": -256.9624938964844, "loss": 0.1101, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.812539577484131, "rewards/margins": 6.806302547454834, "rewards/rejected": -0.9937637448310852, "step": 5960 }, { "epoch": 2.73, "learning_rate": 3.0441400304414e-08, "logits/chosen": -4.91012716293335, "logits/rejected": -4.8456315994262695, "logps/chosen": -412.77874755859375, "logps/rejected": -283.9595947265625, "loss": 0.1102, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 5.214759826660156, "rewards/margins": 5.565724849700928, "rewards/rejected": -0.35096487402915955, "step": 5970 }, { "epoch": 2.73, "learning_rate": 2.993404363267377e-08, "logits/chosen": -4.929327487945557, "logits/rejected": -4.869770050048828, "logps/chosen": -374.94427490234375, "logps/rejected": -274.50811767578125, "loss": 0.1096, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.515448570251465, "rewards/margins": 5.815885543823242, "rewards/rejected": -1.3004369735717773, "step": 5980 }, { "epoch": 2.73, "learning_rate": 2.9426686960933532e-08, "logits/chosen": -4.940583229064941, "logits/rejected": -4.855121612548828, "logps/chosen": -434.57171630859375, "logps/rejected": -282.553466796875, "loss": 0.1149, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.8109283447265625, "rewards/margins": 6.7603020668029785, "rewards/rejected": -0.9493740797042847, "step": 5990 }, { "epoch": 2.74, "learning_rate": 2.89193302891933e-08, "logits/chosen": -4.943351745605469, "logits/rejected": -4.8761796951293945, "logps/chosen": -403.87689208984375, "logps/rejected": -282.23089599609375, "loss": 0.1251, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.785261154174805, "rewards/margins": 5.84893274307251, "rewards/rejected": -1.0636717081069946, "step": 6000 }, { "epoch": 2.74, "eval_logits/chosen": -4.871729850769043, "eval_logits/rejected": -4.805281162261963, "eval_logps/chosen": -402.243408203125, "eval_logps/rejected": -267.36798095703125, "eval_loss": 0.14327535033226013, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.83017635345459, "eval_rewards/margins": 5.870711326599121, "eval_rewards/rejected": -1.0405347347259521, "eval_runtime": 406.7339, "eval_samples_per_second": 7.037, "eval_steps_per_second": 0.44, "step": 6000 }, { "epoch": 2.74, "learning_rate": 2.8411973617453066e-08, "logits/chosen": -4.950901031494141, "logits/rejected": -4.88382625579834, "logps/chosen": -407.19677734375, "logps/rejected": -277.3570251464844, "loss": 0.1248, "rewards/accuracies": 1.0, "rewards/chosen": 5.3168745040893555, "rewards/margins": 6.530680179595947, "rewards/rejected": -1.2138051986694336, "step": 6010 }, { "epoch": 2.75, "learning_rate": 2.7904616945712832e-08, "logits/chosen": -4.936407566070557, "logits/rejected": -4.8455891609191895, "logps/chosen": -465.6396484375, "logps/rejected": -271.69903564453125, "loss": 0.1036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 6.2963385581970215, "rewards/margins": 7.286116600036621, "rewards/rejected": -0.989777684211731, "step": 6020 }, { "epoch": 2.75, "learning_rate": 2.73972602739726e-08, "logits/chosen": -4.963925838470459, "logits/rejected": -4.882255554199219, "logps/chosen": -419.179443359375, "logps/rejected": -271.31817626953125, "loss": 0.1257, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.638518333435059, "rewards/margins": 5.843934059143066, "rewards/rejected": -1.2054158449172974, "step": 6030 }, { "epoch": 2.76, "learning_rate": 2.6889903602232366e-08, "logits/chosen": -4.962245941162109, "logits/rejected": -4.885477542877197, "logps/chosen": -441.6734313964844, "logps/rejected": -305.0368957519531, "loss": 0.1065, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.994645595550537, "rewards/margins": 6.821511268615723, "rewards/rejected": -0.8268654942512512, "step": 6040 }, { "epoch": 2.76, "learning_rate": 2.6382546930492132e-08, "logits/chosen": -4.948982238769531, "logits/rejected": -4.863287925720215, "logps/chosen": -420.83221435546875, "logps/rejected": -266.15130615234375, "loss": 0.0994, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.330599308013916, "rewards/margins": 6.343254089355469, "rewards/rejected": -1.0126545429229736, "step": 6050 }, { "epoch": 2.77, "learning_rate": 2.58751902587519e-08, "logits/chosen": -4.966174602508545, "logits/rejected": -4.880353927612305, "logps/chosen": -411.3734436035156, "logps/rejected": -261.46917724609375, "loss": 0.1492, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.58519983291626, "rewards/margins": 6.655205726623535, "rewards/rejected": -1.0700066089630127, "step": 6060 }, { "epoch": 2.77, "learning_rate": 2.5367833587011665e-08, "logits/chosen": -4.961338996887207, "logits/rejected": -4.87571382522583, "logps/chosen": -423.657958984375, "logps/rejected": -262.57769775390625, "loss": 0.0939, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.414395809173584, "rewards/margins": 6.490010738372803, "rewards/rejected": -1.0756146907806396, "step": 6070 }, { "epoch": 2.78, "learning_rate": 2.4860476915271432e-08, "logits/chosen": -4.93961238861084, "logits/rejected": -4.838839530944824, "logps/chosen": -459.1656188964844, "logps/rejected": -264.1505432128906, "loss": 0.1014, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 6.627755165100098, "rewards/margins": 7.168264865875244, "rewards/rejected": -0.5405099987983704, "step": 6080 }, { "epoch": 2.78, "learning_rate": 2.43531202435312e-08, "logits/chosen": -4.917438507080078, "logits/rejected": -4.850402355194092, "logps/chosen": -378.2708435058594, "logps/rejected": -267.6152648925781, "loss": 0.1027, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.5027642250061035, "rewards/margins": 5.241448402404785, "rewards/rejected": -0.738683819770813, "step": 6090 }, { "epoch": 2.78, "learning_rate": 2.3845763571790965e-08, "logits/chosen": -4.967135906219482, "logits/rejected": -4.892510414123535, "logps/chosen": -413.3404846191406, "logps/rejected": -268.9817810058594, "loss": 0.1279, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 6.038172721862793, "rewards/margins": 7.014352321624756, "rewards/rejected": -0.9761795997619629, "step": 6100 }, { "epoch": 2.78, "eval_logits/chosen": -4.890483856201172, "eval_logits/rejected": -4.823766708374023, "eval_logps/chosen": -402.0884704589844, "eval_logps/rejected": -267.2005920410156, "eval_loss": 0.14332087337970734, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.907647132873535, "eval_rewards/margins": 5.864484786987305, "eval_rewards/rejected": -0.9568384289741516, "eval_runtime": 465.1982, "eval_samples_per_second": 6.152, "eval_steps_per_second": 0.385, "step": 6100 }, { "epoch": 2.79, "learning_rate": 2.3338406900050732e-08, "logits/chosen": -4.957682132720947, "logits/rejected": -4.8741559982299805, "logps/chosen": -414.59796142578125, "logps/rejected": -250.1722412109375, "loss": 0.1035, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.5447821617126465, "rewards/margins": 6.044272422790527, "rewards/rejected": -0.4994896352291107, "step": 6110 }, { "epoch": 2.79, "learning_rate": 2.28310502283105e-08, "logits/chosen": -4.9550580978393555, "logits/rejected": -4.884500026702881, "logps/chosen": -410.00360107421875, "logps/rejected": -281.7815246582031, "loss": 0.1131, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.904871940612793, "rewards/margins": 5.838001728057861, "rewards/rejected": -0.9331291317939758, "step": 6120 }, { "epoch": 2.8, "learning_rate": 2.2323693556570265e-08, "logits/chosen": -4.958915710449219, "logits/rejected": -4.893693447113037, "logps/chosen": -384.3592529296875, "logps/rejected": -272.64056396484375, "loss": 0.1198, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.516094207763672, "rewards/margins": 5.1204118728637695, "rewards/rejected": -0.6043173670768738, "step": 6130 }, { "epoch": 2.8, "learning_rate": 2.1816336884830032e-08, "logits/chosen": -4.941610813140869, "logits/rejected": -4.865566730499268, "logps/chosen": -419.16534423828125, "logps/rejected": -273.712646484375, "loss": 0.1349, "rewards/accuracies": 0.9375, "rewards/chosen": 5.3687849044799805, "rewards/margins": 6.0056071281433105, "rewards/rejected": -0.6368213891983032, "step": 6140 }, { "epoch": 2.81, "learning_rate": 2.13089802130898e-08, "logits/chosen": -4.950615882873535, "logits/rejected": -4.850595474243164, "logps/chosen": -437.6183166503906, "logps/rejected": -259.6206359863281, "loss": 0.1141, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.8536810874938965, "rewards/margins": 6.972989559173584, "rewards/rejected": -1.1193089485168457, "step": 6150 }, { "epoch": 2.81, "learning_rate": 2.0801623541349565e-08, "logits/chosen": -4.938338279724121, "logits/rejected": -4.873866081237793, "logps/chosen": -410.7518005371094, "logps/rejected": -286.6542663574219, "loss": 0.1002, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.8696184158325195, "rewards/margins": 6.2553606033325195, "rewards/rejected": -0.3857421278953552, "step": 6160 }, { "epoch": 2.82, "learning_rate": 2.0294266869609332e-08, "logits/chosen": -4.963109016418457, "logits/rejected": -4.884449481964111, "logps/chosen": -418.2764587402344, "logps/rejected": -277.1029052734375, "loss": 0.1087, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.219601631164551, "rewards/margins": 5.753309726715088, "rewards/rejected": -0.5337079763412476, "step": 6170 }, { "epoch": 2.82, "learning_rate": 1.97869101978691e-08, "logits/chosen": -4.974201679229736, "logits/rejected": -4.894139766693115, "logps/chosen": -438.12738037109375, "logps/rejected": -282.75201416015625, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": 5.24376106262207, "rewards/margins": 6.206584930419922, "rewards/rejected": -0.9628242254257202, "step": 6180 }, { "epoch": 2.83, "learning_rate": 1.9279553526128868e-08, "logits/chosen": -4.9838948249816895, "logits/rejected": -4.890003204345703, "logps/chosen": -428.54010009765625, "logps/rejected": -273.41033935546875, "loss": 0.1032, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.8647871017456055, "rewards/margins": 7.1772565841674805, "rewards/rejected": -1.3124698400497437, "step": 6190 }, { "epoch": 2.83, "learning_rate": 1.8772196854388635e-08, "logits/chosen": -4.967891216278076, "logits/rejected": -4.892298698425293, "logps/chosen": -408.5283203125, "logps/rejected": -272.31829833984375, "loss": 0.1334, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.83841609954834, "rewards/margins": 5.935000419616699, "rewards/rejected": -1.0965845584869385, "step": 6200 }, { "epoch": 2.83, "eval_logits/chosen": -4.906968593597412, "eval_logits/rejected": -4.840149879455566, "eval_logps/chosen": -402.0962829589844, "eval_logps/rejected": -267.22344970703125, "eval_loss": 0.14336001873016357, "eval_rewards/accuracies": 0.910614550113678, "eval_rewards/chosen": 4.903751850128174, "eval_rewards/margins": 5.87202262878418, "eval_rewards/rejected": -0.9682710766792297, "eval_runtime": 363.2044, "eval_samples_per_second": 7.88, "eval_steps_per_second": 0.493, "step": 6200 }, { "epoch": 2.83, "learning_rate": 1.82648401826484e-08, "logits/chosen": -4.962432861328125, "logits/rejected": -4.882079124450684, "logps/chosen": -414.3349609375, "logps/rejected": -269.98541259765625, "loss": 0.1236, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 5.156820774078369, "rewards/margins": 5.846132755279541, "rewards/rejected": -0.6893118023872375, "step": 6210 }, { "epoch": 2.84, "learning_rate": 1.7757483510908168e-08, "logits/chosen": -4.958737850189209, "logits/rejected": -4.890353202819824, "logps/chosen": -403.20306396484375, "logps/rejected": -276.6078796386719, "loss": 0.1176, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.5241193771362305, "rewards/margins": 6.465207576751709, "rewards/rejected": -0.9410883188247681, "step": 6220 }, { "epoch": 2.84, "learning_rate": 1.7250126839167935e-08, "logits/chosen": -4.961437702178955, "logits/rejected": -4.907729148864746, "logps/chosen": -374.70599365234375, "logps/rejected": -289.2994079589844, "loss": 0.1045, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.346192359924316, "rewards/margins": 5.849404335021973, "rewards/rejected": -0.503212571144104, "step": 6230 }, { "epoch": 2.85, "learning_rate": 1.67427701674277e-08, "logits/chosen": -4.945562839508057, "logits/rejected": -4.864018440246582, "logps/chosen": -416.59637451171875, "logps/rejected": -267.43536376953125, "loss": 0.1098, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.590190887451172, "rewards/margins": 6.956812858581543, "rewards/rejected": -1.3666220903396606, "step": 6240 }, { "epoch": 2.85, "learning_rate": 1.6235413495687468e-08, "logits/chosen": -4.9468231201171875, "logits/rejected": -4.873073577880859, "logps/chosen": -416.75311279296875, "logps/rejected": -284.90924072265625, "loss": 0.1106, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 6.271980285644531, "rewards/margins": 6.698386192321777, "rewards/rejected": -0.4264054298400879, "step": 6250 }, { "epoch": 2.86, "learning_rate": 1.5728056823947235e-08, "logits/chosen": -4.954755783081055, "logits/rejected": -4.8815717697143555, "logps/chosen": -423.7811584472656, "logps/rejected": -274.4423828125, "loss": 0.0981, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 6.242772102355957, "rewards/margins": 7.047906398773193, "rewards/rejected": -0.8051339983940125, "step": 6260 }, { "epoch": 2.86, "learning_rate": 1.5220700152207e-08, "logits/chosen": -4.950672149658203, "logits/rejected": -4.884222984313965, "logps/chosen": -403.21783447265625, "logps/rejected": -279.2829895019531, "loss": 0.1043, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.099035263061523, "rewards/margins": 6.5117902755737305, "rewards/rejected": -1.4127554893493652, "step": 6270 }, { "epoch": 2.87, "learning_rate": 1.4713343480466766e-08, "logits/chosen": -4.972266674041748, "logits/rejected": -4.899314880371094, "logps/chosen": -410.87835693359375, "logps/rejected": -279.7785949707031, "loss": 0.0971, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.641701698303223, "rewards/margins": 5.523205280303955, "rewards/rejected": -0.8815029859542847, "step": 6280 }, { "epoch": 2.87, "learning_rate": 1.4205986808726533e-08, "logits/chosen": -4.977224826812744, "logits/rejected": -4.885519504547119, "logps/chosen": -434.43017578125, "logps/rejected": -260.52691650390625, "loss": 0.0982, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.612240791320801, "rewards/margins": 6.317038536071777, "rewards/rejected": -0.7047973871231079, "step": 6290 }, { "epoch": 2.88, "learning_rate": 1.36986301369863e-08, "logits/chosen": -4.960092544555664, "logits/rejected": -4.884345054626465, "logps/chosen": -407.2979736328125, "logps/rejected": -275.28997802734375, "loss": 0.111, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.398829460144043, "rewards/margins": 6.124081611633301, "rewards/rejected": -0.7252525091171265, "step": 6300 }, { "epoch": 2.88, "eval_logits/chosen": -4.9000654220581055, "eval_logits/rejected": -4.832996368408203, "eval_logps/chosen": -402.1418151855469, "eval_logps/rejected": -267.266845703125, "eval_loss": 0.1432226449251175, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 4.880965232849121, "eval_rewards/margins": 5.870927333831787, "eval_rewards/rejected": -0.989962637424469, "eval_runtime": 235.0792, "eval_samples_per_second": 12.175, "eval_steps_per_second": 0.761, "step": 6300 }, { "epoch": 2.88, "learning_rate": 1.3191273465246066e-08, "logits/chosen": -4.972257137298584, "logits/rejected": -4.8879475593566895, "logps/chosen": -444.9588928222656, "logps/rejected": -279.5383605957031, "loss": 0.1151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.959007263183594, "rewards/margins": 5.655477046966553, "rewards/rejected": -0.6964699625968933, "step": 6310 }, { "epoch": 2.88, "learning_rate": 1.2683916793505833e-08, "logits/chosen": -4.967973709106445, "logits/rejected": -4.894082069396973, "logps/chosen": -444.32696533203125, "logps/rejected": -301.29400634765625, "loss": 0.1558, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.60256814956665, "rewards/margins": 6.356666564941406, "rewards/rejected": -0.7540984153747559, "step": 6320 }, { "epoch": 2.89, "learning_rate": 1.21765601217656e-08, "logits/chosen": -4.959591865539551, "logits/rejected": -4.883488655090332, "logps/chosen": -404.097412109375, "logps/rejected": -265.98504638671875, "loss": 0.1156, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.701376438140869, "rewards/margins": 6.108952522277832, "rewards/rejected": -1.4075767993927002, "step": 6330 }, { "epoch": 2.89, "learning_rate": 1.1669203450025366e-08, "logits/chosen": -4.9669928550720215, "logits/rejected": -4.891518592834473, "logps/chosen": -410.58416748046875, "logps/rejected": -279.4032287597656, "loss": 0.1255, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.344937801361084, "rewards/margins": 6.024382591247559, "rewards/rejected": -0.6794454455375671, "step": 6340 }, { "epoch": 2.9, "learning_rate": 1.1161846778285133e-08, "logits/chosen": -4.958381175994873, "logits/rejected": -4.883745193481445, "logps/chosen": -388.25, "logps/rejected": -258.48004150390625, "loss": 0.1151, "rewards/accuracies": 0.9375, "rewards/chosen": 4.789067268371582, "rewards/margins": 5.552566051483154, "rewards/rejected": -0.7634989619255066, "step": 6350 }, { "epoch": 2.9, "learning_rate": 1.06544901065449e-08, "logits/chosen": -4.954177379608154, "logits/rejected": -4.878336429595947, "logps/chosen": -415.78521728515625, "logps/rejected": -276.9290771484375, "loss": 0.118, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.801333427429199, "rewards/margins": 6.955748081207275, "rewards/rejected": -1.1544137001037598, "step": 6360 }, { "epoch": 2.91, "learning_rate": 1.0147133434804666e-08, "logits/chosen": -4.955416679382324, "logits/rejected": -4.871819019317627, "logps/chosen": -425.78363037109375, "logps/rejected": -262.81903076171875, "loss": 0.1127, "rewards/accuracies": 0.9375, "rewards/chosen": 5.30413293838501, "rewards/margins": 6.716325283050537, "rewards/rejected": -1.4121922254562378, "step": 6370 }, { "epoch": 2.91, "learning_rate": 9.639776763064434e-09, "logits/chosen": -4.95179557800293, "logits/rejected": -4.871451377868652, "logps/chosen": -413.1435546875, "logps/rejected": -263.28179931640625, "loss": 0.1059, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.233330249786377, "rewards/margins": 6.52274227142334, "rewards/rejected": -1.2894119024276733, "step": 6380 }, { "epoch": 2.92, "learning_rate": 9.1324200913242e-09, "logits/chosen": -4.956809043884277, "logits/rejected": -4.887081146240234, "logps/chosen": -398.07232666015625, "logps/rejected": -265.42474365234375, "loss": 0.1085, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.9278059005737305, "rewards/margins": 5.913747310638428, "rewards/rejected": -0.9859414100646973, "step": 6390 }, { "epoch": 2.92, "learning_rate": 8.625063419583967e-09, "logits/chosen": -4.961661338806152, "logits/rejected": -4.884273529052734, "logps/chosen": -404.3682861328125, "logps/rejected": -266.78619384765625, "loss": 0.1204, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.485603332519531, "rewards/margins": 6.3795366287231445, "rewards/rejected": -0.8939326405525208, "step": 6400 }, { "epoch": 2.92, "eval_logits/chosen": -4.896713733673096, "eval_logits/rejected": -4.829170227050781, "eval_logps/chosen": -402.0501403808594, "eval_logps/rejected": -267.19134521484375, "eval_loss": 0.14334213733673096, "eval_rewards/accuracies": 0.9134078025817871, "eval_rewards/chosen": 4.926786422729492, "eval_rewards/margins": 5.878993511199951, "eval_rewards/rejected": -0.9522069096565247, "eval_runtime": 417.5356, "eval_samples_per_second": 6.855, "eval_steps_per_second": 0.429, "step": 6400 }, { "epoch": 2.93, "learning_rate": 8.117706747843734e-09, "logits/chosen": -4.939919471740723, "logits/rejected": -4.862345218658447, "logps/chosen": -412.582763671875, "logps/rejected": -273.4056701660156, "loss": 0.1091, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.892035961151123, "rewards/margins": 7.217637062072754, "rewards/rejected": -1.3256006240844727, "step": 6410 }, { "epoch": 2.93, "learning_rate": 7.6103500761035e-09, "logits/chosen": -4.963162422180176, "logits/rejected": -4.877333641052246, "logps/chosen": -422.65673828125, "logps/rejected": -266.286376953125, "loss": 0.1153, "rewards/accuracies": 0.9375, "rewards/chosen": 5.283825874328613, "rewards/margins": 6.1046528816223145, "rewards/rejected": -0.820827841758728, "step": 6420 }, { "epoch": 2.94, "learning_rate": 7.1029934043632664e-09, "logits/chosen": -4.9887871742248535, "logits/rejected": -4.90464973449707, "logps/chosen": -438.92449951171875, "logps/rejected": -273.2255554199219, "loss": 0.1012, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.667872428894043, "rewards/margins": 6.802371025085449, "rewards/rejected": -1.134499430656433, "step": 6430 }, { "epoch": 2.94, "learning_rate": 6.595636732623033e-09, "logits/chosen": -4.958290100097656, "logits/rejected": -4.889904975891113, "logps/chosen": -420.11163330078125, "logps/rejected": -286.64349365234375, "loss": 0.1154, "rewards/accuracies": 0.9375, "rewards/chosen": 5.804112434387207, "rewards/margins": 6.268657207489014, "rewards/rejected": -0.46454495191574097, "step": 6440 }, { "epoch": 2.94, "learning_rate": 6.0882800608828e-09, "logits/chosen": -4.961657524108887, "logits/rejected": -4.90249490737915, "logps/chosen": -410.54913330078125, "logps/rejected": -298.6160583496094, "loss": 0.0972, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.818478584289551, "rewards/margins": 5.917497634887695, "rewards/rejected": -1.0990185737609863, "step": 6450 }, { "epoch": 2.95, "learning_rate": 5.580923389142566e-09, "logits/chosen": -4.964001178741455, "logits/rejected": -4.883702278137207, "logps/chosen": -419.25213623046875, "logps/rejected": -283.06866455078125, "loss": 0.1095, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.641483306884766, "rewards/margins": 6.6281304359436035, "rewards/rejected": -0.9866467714309692, "step": 6460 }, { "epoch": 2.95, "learning_rate": 5.073566717402333e-09, "logits/chosen": -4.980655193328857, "logits/rejected": -4.896926403045654, "logps/chosen": -436.05438232421875, "logps/rejected": -285.7574157714844, "loss": 0.1075, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.490670204162598, "rewards/margins": 6.687801361083984, "rewards/rejected": -1.1971312761306763, "step": 6470 }, { "epoch": 2.96, "learning_rate": 4.5662100456621e-09, "logits/chosen": -4.963423728942871, "logits/rejected": -4.883721351623535, "logps/chosen": -420.724609375, "logps/rejected": -274.7815856933594, "loss": 0.1325, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.362826347351074, "rewards/margins": 6.2329840660095215, "rewards/rejected": -0.8701577186584473, "step": 6480 }, { "epoch": 2.96, "learning_rate": 4.058853373921867e-09, "logits/chosen": -4.977153778076172, "logits/rejected": -4.882737159729004, "logps/chosen": -426.246826171875, "logps/rejected": -259.0433044433594, "loss": 0.1068, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.8855180740356445, "rewards/margins": 7.137026309967041, "rewards/rejected": -1.2515079975128174, "step": 6490 }, { "epoch": 2.97, "learning_rate": 3.5514967021816332e-09, "logits/chosen": -4.969964504241943, "logits/rejected": -4.900024890899658, "logps/chosen": -403.82952880859375, "logps/rejected": -288.90118408203125, "loss": 0.12, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.317718505859375, "rewards/margins": 5.362739086151123, "rewards/rejected": -0.04502048343420029, "step": 6500 }, { "epoch": 2.97, "eval_logits/chosen": -4.902114391326904, "eval_logits/rejected": -4.83406400680542, "eval_logps/chosen": -402.2420959472656, "eval_logps/rejected": -267.3595275878906, "eval_loss": 0.1431497484445572, "eval_rewards/accuracies": 0.916201114654541, "eval_rewards/chosen": 4.830814838409424, "eval_rewards/margins": 5.867116451263428, "eval_rewards/rejected": -1.0363017320632935, "eval_runtime": 307.6936, "eval_samples_per_second": 9.301, "eval_steps_per_second": 0.582, "step": 6500 }, { "epoch": 2.97, "learning_rate": 3.0441400304414e-09, "logits/chosen": -4.977284908294678, "logits/rejected": -4.893540382385254, "logps/chosen": -429.06060791015625, "logps/rejected": -278.15020751953125, "loss": 0.1127, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.552095890045166, "rewards/margins": 6.157132148742676, "rewards/rejected": -0.6050364971160889, "step": 6510 }, { "epoch": 2.98, "learning_rate": 2.5367833587011665e-09, "logits/chosen": -4.980680465698242, "logits/rejected": -4.888558864593506, "logps/chosen": -431.88226318359375, "logps/rejected": -272.1903381347656, "loss": 0.1114, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.5667829513549805, "rewards/margins": 6.514996528625488, "rewards/rejected": -0.9482134580612183, "step": 6520 }, { "epoch": 2.98, "learning_rate": 2.0294266869609335e-09, "logits/chosen": -4.959232807159424, "logits/rejected": -4.899811744689941, "logps/chosen": -396.2096252441406, "logps/rejected": -281.13470458984375, "loss": 0.1145, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 5.085214138031006, "rewards/margins": 6.39267110824585, "rewards/rejected": -1.3074569702148438, "step": 6530 }, { "epoch": 2.99, "learning_rate": 1.5220700152207e-09, "logits/chosen": -4.9475860595703125, "logits/rejected": -4.870965957641602, "logps/chosen": -424.00897216796875, "logps/rejected": -270.4258117675781, "loss": 0.1258, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.14150857925415, "rewards/margins": 5.961328029632568, "rewards/rejected": -0.8198191523551941, "step": 6540 }, { "epoch": 2.99, "learning_rate": 1.0147133434804667e-09, "logits/chosen": -4.968661308288574, "logits/rejected": -4.865440368652344, "logps/chosen": -452.23052978515625, "logps/rejected": -270.912109375, "loss": 0.1091, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 5.628878116607666, "rewards/margins": 6.277059555053711, "rewards/rejected": -0.6481815576553345, "step": 6550 }, { "epoch": 2.99, "learning_rate": 5.073566717402334e-10, "logits/chosen": -4.97634744644165, "logits/rejected": -4.9013237953186035, "logps/chosen": -412.3560485839844, "logps/rejected": -271.295166015625, "loss": 0.1044, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.285799980163574, "rewards/margins": 6.230406284332275, "rewards/rejected": -0.9446064233779907, "step": 6560 }, { "epoch": 3.0, "learning_rate": 0.0, "logits/chosen": -4.957233905792236, "logits/rejected": -4.878509044647217, "logps/chosen": -408.51611328125, "logps/rejected": -269.8291015625, "loss": 0.1045, "rewards/accuracies": 0.9375, "rewards/chosen": 4.21966028213501, "rewards/margins": 5.19924783706665, "rewards/rejected": -0.9795879125595093, "step": 6570 }, { "epoch": 3.0, "step": 6570, "total_flos": 0.0, "train_loss": 0.15624731566807995, "train_runtime": 68304.4343, "train_samples_per_second": 6.158, "train_steps_per_second": 0.096 } ], "logging_steps": 10, "max_steps": 6570, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }