{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8888888888888888, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.309734513274336e-07, "logits/chosen": -2.1858699321746826, "logits/rejected": -2.25400972366333, "logps/chosen": -292.4839172363281, "logps/rejected": -334.2861633300781, "loss": 0.6928, "rewards/accuracies": 0.625, "rewards/chosen": -0.0007014082511886954, "rewards/margins": 0.0008057593367993832, "rewards/rejected": -0.001507167937234044, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.415929203539823e-06, "logits/chosen": -2.2499358654022217, "logits/rejected": -2.2245681285858154, "logps/chosen": -323.0448303222656, "logps/rejected": -341.9175109863281, "loss": 0.6941, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0023523904383182526, "rewards/margins": -0.0019129945430904627, "rewards/rejected": -0.00043939598253928125, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.3008849557522127e-06, "logits/chosen": -2.2502377033233643, "logits/rejected": -2.235426425933838, "logps/chosen": -309.60076904296875, "logps/rejected": -354.3961181640625, "loss": 0.6933, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.005819912068545818, "rewards/margins": -0.00021856316016055644, "rewards/rejected": -0.0056013488210737705, "step": 30 }, { "epoch": 0.02, "learning_rate": 3.185840707964602e-06, "logits/chosen": -2.2594857215881348, "logits/rejected": -2.231959819793701, "logps/chosen": -342.497802734375, "logps/rejected": -361.927734375, "loss": 0.6939, "rewards/accuracies": 0.5, "rewards/chosen": -0.017969723790884018, "rewards/margins": -0.001382522750645876, "rewards/rejected": -0.01658720150589943, "step": 40 }, { "epoch": 0.02, "learning_rate": 4.070796460176992e-06, "logits/chosen": -2.278099775314331, "logits/rejected": -2.2154829502105713, "logps/chosen": -334.44879150390625, "logps/rejected": -324.4710998535156, "loss": 0.6928, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.03501028195023537, "rewards/margins": 0.000740527524612844, "rewards/rejected": -0.03575081005692482, "step": 50 }, { "epoch": 0.03, "learning_rate": 4.955752212389381e-06, "logits/chosen": -2.28778338432312, "logits/rejected": -2.1848011016845703, "logps/chosen": -329.4461669921875, "logps/rejected": -304.49163818359375, "loss": 0.693, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0479893684387207, "rewards/margins": 0.0005268483073450625, "rewards/rejected": -0.04851621761918068, "step": 60 }, { "epoch": 0.03, "learning_rate": 5.840707964601771e-06, "logits/chosen": -2.118542194366455, "logits/rejected": -2.1866343021392822, "logps/chosen": -299.27447509765625, "logps/rejected": -326.0574645996094, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.049522001296281815, "rewards/margins": 0.001656264765188098, "rewards/rejected": -0.05117826536297798, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.72566371681416e-06, "logits/chosen": -2.17606782913208, "logits/rejected": -2.1878247261047363, "logps/chosen": -327.1267395019531, "logps/rejected": -323.746337890625, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.050967562943696976, "rewards/margins": 0.0074631692841649055, "rewards/rejected": -0.05843073129653931, "step": 80 }, { "epoch": 0.04, "learning_rate": 7.610619469026549e-06, "logits/chosen": -2.3069217205047607, "logits/rejected": -2.1109657287597656, "logps/chosen": -359.98150634765625, "logps/rejected": -322.8603820800781, "loss": 0.7002, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.05969462916254997, "rewards/margins": -0.013257542625069618, "rewards/rejected": -0.0464370921254158, "step": 90 }, { "epoch": 0.04, "learning_rate": 8.495575221238938e-06, "logits/chosen": -2.2969472408294678, "logits/rejected": -2.2404885292053223, "logps/chosen": -328.78704833984375, "logps/rejected": -334.45281982421875, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.005047931335866451, "rewards/margins": 0.0031173895113170147, "rewards/rejected": 0.0019305419409647584, "step": 100 }, { "epoch": 0.05, "learning_rate": 9.380530973451329e-06, "logits/chosen": -2.2497150897979736, "logits/rejected": -2.220237970352173, "logps/chosen": -333.1561584472656, "logps/rejected": -314.7790832519531, "loss": 0.6961, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.015577316284179688, "rewards/margins": -0.005401826463639736, "rewards/rejected": 0.02097914181649685, "step": 110 }, { "epoch": 0.05, "learning_rate": 9.999951373555555e-06, "logits/chosen": -2.301379442214966, "logits/rejected": -2.223681926727295, "logps/chosen": -351.5559387207031, "logps/rejected": -326.63287353515625, "loss": 0.6889, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.012599905952811241, "rewards/margins": 0.009558334946632385, "rewards/rejected": 0.003041572170332074, "step": 120 }, { "epoch": 0.06, "learning_rate": 9.999086929743288e-06, "logits/chosen": -2.2710835933685303, "logits/rejected": -2.227280855178833, "logps/chosen": -321.6353454589844, "logps/rejected": -332.7576599121094, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.040375690907239914, "rewards/margins": 0.004883688408881426, "rewards/rejected": 0.03549199923872948, "step": 130 }, { "epoch": 0.06, "learning_rate": 9.997142113313472e-06, "logits/chosen": -2.212054491043091, "logits/rejected": -2.202702283859253, "logps/chosen": -322.11651611328125, "logps/rejected": -309.7989501953125, "loss": 0.6871, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0492350198328495, "rewards/margins": 0.013679656200110912, "rewards/rejected": 0.035555362701416016, "step": 140 }, { "epoch": 0.07, "learning_rate": 9.994117344568142e-06, "logits/chosen": -2.2250020503997803, "logits/rejected": -2.2318902015686035, "logps/chosen": -317.3855895996094, "logps/rejected": -337.94805908203125, "loss": 0.6924, "rewards/accuracies": 0.5, "rewards/chosen": 0.03159898519515991, "rewards/margins": 0.0040366738103330135, "rewards/rejected": 0.02756231650710106, "step": 150 }, { "epoch": 0.07, "learning_rate": 9.990013277202137e-06, "logits/chosen": -2.2112176418304443, "logits/rejected": -2.3512566089630127, "logps/chosen": -337.7769775390625, "logps/rejected": -419.68450927734375, "loss": 0.7042, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.02137443795800209, "rewards/margins": -0.019264575093984604, "rewards/rejected": -0.00210986053571105, "step": 160 }, { "epoch": 0.08, "learning_rate": 9.984830798161828e-06, "logits/chosen": -2.2544150352478027, "logits/rejected": -2.1911208629608154, "logps/chosen": -384.29718017578125, "logps/rejected": -362.35308837890625, "loss": 0.6841, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11389386653900146, "rewards/margins": 0.022078361362218857, "rewards/rejected": -0.13597223162651062, "step": 170 }, { "epoch": 0.08, "learning_rate": 9.978571027453433e-06, "logits/chosen": -2.3017163276672363, "logits/rejected": -2.12226939201355, "logps/chosen": -364.39837646484375, "logps/rejected": -286.0245361328125, "loss": 0.6959, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.12149347364902496, "rewards/margins": 0.000388662883779034, "rewards/rejected": -0.12188214063644409, "step": 180 }, { "epoch": 0.08, "learning_rate": 9.971235317900968e-06, "logits/chosen": -2.1424872875213623, "logits/rejected": -2.239366054534912, "logps/chosen": -283.5924377441406, "logps/rejected": -315.69586181640625, "loss": 0.6965, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.04944513365626335, "rewards/margins": -0.0020489301532506943, "rewards/rejected": -0.04739619791507721, "step": 190 }, { "epoch": 0.09, "learning_rate": 9.962825254853888e-06, "logits/chosen": -2.3371052742004395, "logits/rejected": -2.248575210571289, "logps/chosen": -406.6221923828125, "logps/rejected": -363.2230529785156, "loss": 0.6814, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.03307682275772095, "rewards/margins": 0.028196487575769424, "rewards/rejected": 0.004880332853645086, "step": 200 }, { "epoch": 0.09, "learning_rate": 9.953342655844465e-06, "logits/chosen": -2.281076669692993, "logits/rejected": -2.124605655670166, "logps/chosen": -329.9849548339844, "logps/rejected": -306.5705261230469, "loss": 0.6745, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04553469270467758, "rewards/margins": 0.04089093953371048, "rewards/rejected": 0.004643745254725218, "step": 210 }, { "epoch": 0.1, "learning_rate": 9.942789570194995e-06, "logits/chosen": -2.2274227142333984, "logits/rejected": -2.195772409439087, "logps/chosen": -353.90277099609375, "logps/rejected": -329.13055419921875, "loss": 0.6709, "rewards/accuracies": 0.5, "rewards/chosen": 0.03678930178284645, "rewards/margins": 0.05163549259305, "rewards/rejected": -0.014846190810203552, "step": 220 }, { "epoch": 0.1, "learning_rate": 9.931168278574916e-06, "logits/chosen": -2.290721893310547, "logits/rejected": -2.233349561691284, "logps/chosen": -384.83087158203125, "logps/rejected": -375.786376953125, "loss": 0.6738, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.10011746734380722, "rewards/margins": 0.0485808365046978, "rewards/rejected": 0.05153663828969002, "step": 230 }, { "epoch": 0.11, "learning_rate": 9.919797871024877e-06, "logits/chosen": -2.3008508682250977, "logits/rejected": -2.2407467365264893, "logps/chosen": -341.2652587890625, "logps/rejected": -304.7622985839844, "loss": 0.6569, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.061227262020111084, "rewards/margins": 0.07875394821166992, "rewards/rejected": -0.01752668246626854, "step": 240 }, { "epoch": 0.11, "learning_rate": 9.906154097672858e-06, "logits/chosen": -2.3239777088165283, "logits/rejected": -2.2359421253204346, "logps/chosen": -357.5738220214844, "logps/rejected": -333.55389404296875, "loss": 0.6474, "rewards/accuracies": 0.625, "rewards/chosen": 0.16475871205329895, "rewards/margins": 0.10896603018045425, "rewards/rejected": 0.05579269677400589, "step": 250 }, { "epoch": 0.12, "learning_rate": 9.89145003578833e-06, "logits/chosen": -2.3069913387298584, "logits/rejected": -2.225893497467041, "logps/chosen": -337.88299560546875, "logps/rejected": -319.11016845703125, "loss": 0.6595, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.17844273149967194, "rewards/margins": 0.08880583941936493, "rewards/rejected": 0.08963687717914581, "step": 260 }, { "epoch": 0.12, "learning_rate": 9.875688863124766e-06, "logits/chosen": -2.22190260887146, "logits/rejected": -2.2968430519104004, "logps/chosen": -391.5494384765625, "logps/rejected": -404.64178466796875, "loss": 0.6949, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.26371732354164124, "rewards/margins": 0.03252069279551506, "rewards/rejected": 0.23119667172431946, "step": 270 }, { "epoch": 0.12, "learning_rate": 9.858873985892058e-06, "logits/chosen": -2.2825188636779785, "logits/rejected": -2.19154691696167, "logps/chosen": -354.6551818847656, "logps/rejected": -353.5287170410156, "loss": 0.6837, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.13825781643390656, "rewards/margins": 0.03303035721182823, "rewards/rejected": 0.10522744804620743, "step": 280 }, { "epoch": 0.13, "learning_rate": 9.841009038020401e-06, "logits/chosen": -2.2181854248046875, "logits/rejected": -2.2422547340393066, "logps/chosen": -332.62322998046875, "logps/rejected": -351.6214599609375, "loss": 0.6746, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.17069143056869507, "rewards/margins": 0.07059639692306519, "rewards/rejected": 0.10009505599737167, "step": 290 }, { "epoch": 0.13, "learning_rate": 9.82209788037494e-06, "logits/chosen": -2.283082962036133, "logits/rejected": -2.3039584159851074, "logps/chosen": -367.76708984375, "logps/rejected": -379.209228515625, "loss": 0.7081, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.12761621177196503, "rewards/margins": 0.001193371368572116, "rewards/rejected": 0.12642285227775574, "step": 300 }, { "epoch": 0.14, "learning_rate": 9.80214459992139e-06, "logits/chosen": -2.297591209411621, "logits/rejected": -2.2650771141052246, "logps/chosen": -359.8524169921875, "logps/rejected": -390.44195556640625, "loss": 0.6335, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.07557342946529388, "rewards/margins": 0.15836670994758606, "rewards/rejected": -0.08279327303171158, "step": 310 }, { "epoch": 0.14, "learning_rate": 9.781153508842785e-06, "logits/chosen": -2.2711267471313477, "logits/rejected": -2.2797353267669678, "logps/chosen": -329.4121398925781, "logps/rejected": -345.84393310546875, "loss": 0.675, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09101514518260956, "rewards/margins": 0.05387691408395767, "rewards/rejected": -0.14489206671714783, "step": 320 }, { "epoch": 0.15, "learning_rate": 9.759129143607547e-06, "logits/chosen": -2.2991256713867188, "logits/rejected": -2.1713948249816895, "logps/chosen": -373.73992919921875, "logps/rejected": -298.8330993652344, "loss": 0.6611, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07734711468219757, "rewards/margins": 0.09413515031337738, "rewards/rejected": -0.17148226499557495, "step": 330 }, { "epoch": 0.15, "learning_rate": 9.736076263989103e-06, "logits/chosen": -2.333172559738159, "logits/rejected": -2.2931008338928223, "logps/chosen": -384.9156188964844, "logps/rejected": -363.5679016113281, "loss": 0.6449, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.04307403415441513, "rewards/margins": 0.1350451558828354, "rewards/rejected": -0.09197112172842026, "step": 340 }, { "epoch": 0.16, "learning_rate": 9.711999852037226e-06, "logits/chosen": -2.3204524517059326, "logits/rejected": -2.256392240524292, "logps/chosen": -376.4149169921875, "logps/rejected": -337.93402099609375, "loss": 0.681, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.2001509964466095, "rewards/margins": 0.0656595304608345, "rewards/rejected": 0.1344914734363556, "step": 350 }, { "epoch": 0.16, "learning_rate": 9.68690511100134e-06, "logits/chosen": -2.222977638244629, "logits/rejected": -2.2059781551361084, "logps/chosen": -326.2198181152344, "logps/rejected": -322.86907958984375, "loss": 0.7169, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.12599822878837585, "rewards/margins": -0.01360931433737278, "rewards/rejected": 0.13960754871368408, "step": 360 }, { "epoch": 0.16, "learning_rate": 9.660797464206035e-06, "logits/chosen": -2.2420578002929688, "logits/rejected": -2.23136568069458, "logps/chosen": -338.4748840332031, "logps/rejected": -357.66705322265625, "loss": 0.6701, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.26268088817596436, "rewards/margins": 0.08094726502895355, "rewards/rejected": 0.181733638048172, "step": 370 }, { "epoch": 0.17, "learning_rate": 9.633682553879e-06, "logits/chosen": -2.276688814163208, "logits/rejected": -2.234923839569092, "logps/chosen": -317.33599853515625, "logps/rejected": -312.63897705078125, "loss": 0.6804, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.23342204093933105, "rewards/margins": 0.0563444122672081, "rewards/rejected": 0.17707762122154236, "step": 380 }, { "epoch": 0.17, "learning_rate": 9.605566239931666e-06, "logits/chosen": -2.3001625537872314, "logits/rejected": -2.2134346961975098, "logps/chosen": -357.8388977050781, "logps/rejected": -349.38995361328125, "loss": 0.6357, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.3796467185020447, "rewards/margins": 0.167959064245224, "rewards/rejected": 0.21168763935565948, "step": 390 }, { "epoch": 0.18, "learning_rate": 9.576454598692797e-06, "logits/chosen": -2.296462297439575, "logits/rejected": -2.226733922958374, "logps/chosen": -358.35015869140625, "logps/rejected": -326.0476989746094, "loss": 0.6382, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.36672210693359375, "rewards/margins": 0.16903677582740784, "rewards/rejected": 0.19768527150154114, "step": 400 }, { "epoch": 0.18, "learning_rate": 9.546353921595306e-06, "logits/chosen": -2.289577007293701, "logits/rejected": -2.279940128326416, "logps/chosen": -337.85699462890625, "logps/rejected": -340.87261962890625, "loss": 0.7079, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.3491610884666443, "rewards/margins": 0.029783133417367935, "rewards/rejected": 0.31937795877456665, "step": 410 }, { "epoch": 0.19, "learning_rate": 9.515270713816589e-06, "logits/chosen": -2.314282178878784, "logits/rejected": -2.1990668773651123, "logps/chosen": -380.24554443359375, "logps/rejected": -341.0552062988281, "loss": 0.6806, "rewards/accuracies": 0.625, "rewards/chosen": 0.3242203891277313, "rewards/margins": 0.10805626213550568, "rewards/rejected": 0.21616414189338684, "step": 420 }, { "epoch": 0.19, "learning_rate": 9.483211692872669e-06, "logits/chosen": -2.2007763385772705, "logits/rejected": -2.1976895332336426, "logps/chosen": -307.19464111328125, "logps/rejected": -318.5234069824219, "loss": 0.7432, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.22343340516090393, "rewards/margins": -0.031447634100914, "rewards/rejected": 0.25488102436065674, "step": 430 }, { "epoch": 0.2, "learning_rate": 9.450183787166447e-06, "logits/chosen": -2.1776084899902344, "logits/rejected": -2.263404369354248, "logps/chosen": -275.30230712890625, "logps/rejected": -335.97637939453125, "loss": 0.6777, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.10852464288473129, "rewards/margins": 0.05528046935796738, "rewards/rejected": 0.053244173526763916, "step": 440 }, { "epoch": 0.2, "learning_rate": 9.41619413449037e-06, "logits/chosen": -2.341031551361084, "logits/rejected": -2.2925498485565186, "logps/chosen": -387.17315673828125, "logps/rejected": -408.65350341796875, "loss": 0.6467, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.17925263941287994, "rewards/margins": 0.12131942808628082, "rewards/rejected": 0.057933200150728226, "step": 450 }, { "epoch": 0.2, "learning_rate": 9.381250080483864e-06, "logits/chosen": -2.305234432220459, "logits/rejected": -2.29388689994812, "logps/chosen": -353.67547607421875, "logps/rejected": -365.3600769042969, "loss": 0.6973, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.020944729447364807, "rewards/margins": 0.018988361582159996, "rewards/rejected": 0.001956367399543524, "step": 460 }, { "epoch": 0.21, "learning_rate": 9.345359177045827e-06, "logits/chosen": -2.2121920585632324, "logits/rejected": -2.1668667793273926, "logps/chosen": -318.66827392578125, "logps/rejected": -298.404052734375, "loss": 0.672, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.009936051443219185, "rewards/margins": 0.07024586200714111, "rewards/rejected": -0.08018191158771515, "step": 470 }, { "epoch": 0.21, "learning_rate": 9.308529180702568e-06, "logits/chosen": -2.2666916847229004, "logits/rejected": -2.283783435821533, "logps/chosen": -359.14825439453125, "logps/rejected": -384.94073486328125, "loss": 0.6523, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.030093509703874588, "rewards/margins": 0.1233237162232399, "rewards/rejected": -0.1534171998500824, "step": 480 }, { "epoch": 0.22, "learning_rate": 9.270768050931515e-06, "logits/chosen": -2.2703680992126465, "logits/rejected": -2.3521008491516113, "logps/chosen": -336.55548095703125, "logps/rejected": -373.68902587890625, "loss": 0.6823, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.19773463904857635, "rewards/margins": 0.06342988461256027, "rewards/rejected": -0.2611645460128784, "step": 490 }, { "epoch": 0.22, "learning_rate": 9.232083948441046e-06, "logits/chosen": -2.263096332550049, "logits/rejected": -2.1758663654327393, "logps/chosen": -366.25714111328125, "logps/rejected": -322.65081787109375, "loss": 0.6963, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1464572548866272, "rewards/margins": 0.025711650028824806, "rewards/rejected": -0.17216889560222626, "step": 500 }, { "epoch": 0.23, "learning_rate": 9.192485233406862e-06, "logits/chosen": -2.3192970752716064, "logits/rejected": -2.3752708435058594, "logps/chosen": -383.03753662109375, "logps/rejected": -406.3360900878906, "loss": 0.6627, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13319934904575348, "rewards/margins": 0.0994311273097992, "rewards/rejected": -0.23263044655323029, "step": 510 }, { "epoch": 0.23, "learning_rate": 9.151980463665227e-06, "logits/chosen": -2.2946994304656982, "logits/rejected": -2.2120468616485596, "logps/chosen": -390.14434814453125, "logps/rejected": -344.0641174316406, "loss": 0.6981, "rewards/accuracies": 0.5, "rewards/chosen": -0.12970566749572754, "rewards/margins": 0.05780962109565735, "rewards/rejected": -0.1875152885913849, "step": 520 }, { "epoch": 0.24, "learning_rate": 9.1105783928635e-06, "logits/chosen": -2.2886319160461426, "logits/rejected": -2.2648708820343018, "logps/chosen": -373.304931640625, "logps/rejected": -381.96368408203125, "loss": 0.7022, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.011115001514554024, "rewards/margins": 0.03471168875694275, "rewards/rejected": -0.04582669958472252, "step": 530 }, { "epoch": 0.24, "learning_rate": 9.068287968568355e-06, "logits/chosen": -2.2562637329101562, "logits/rejected": -2.2379746437072754, "logps/chosen": -336.50640869140625, "logps/rejected": -369.13037109375, "loss": 0.6873, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.1720694601535797, "rewards/margins": 0.060669075697660446, "rewards/rejected": 0.11140035092830658, "step": 540 }, { "epoch": 0.24, "learning_rate": 9.02511833033208e-06, "logits/chosen": -2.145764112472534, "logits/rejected": -2.1581664085388184, "logps/chosen": -320.99456787109375, "logps/rejected": -324.7594299316406, "loss": 0.6312, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02777281031012535, "rewards/margins": 0.16521447896957397, "rewards/rejected": -0.19298730790615082, "step": 550 }, { "epoch": 0.25, "learning_rate": 8.981078807717396e-06, "logits/chosen": -2.316991090774536, "logits/rejected": -2.169630765914917, "logps/chosen": -417.3232421875, "logps/rejected": -368.39617919921875, "loss": 0.6415, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.12395425885915756, "rewards/margins": 0.15927435457706451, "rewards/rejected": -0.03532009571790695, "step": 560 }, { "epoch": 0.25, "learning_rate": 8.936178918281209e-06, "logits/chosen": -2.3391947746276855, "logits/rejected": -2.359314441680908, "logps/chosen": -379.1593322753906, "logps/rejected": -404.58868408203125, "loss": 0.7159, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0663943886756897, "rewards/margins": -0.007743087597191334, "rewards/rejected": 0.07413747161626816, "step": 570 }, { "epoch": 0.26, "learning_rate": 8.890428365517728e-06, "logits/chosen": -2.3254919052124023, "logits/rejected": -2.2909200191497803, "logps/chosen": -377.60736083984375, "logps/rejected": -365.9610595703125, "loss": 0.6832, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.23426219820976257, "rewards/margins": 0.05870335176587105, "rewards/rejected": 0.17555885016918182, "step": 580 }, { "epoch": 0.26, "learning_rate": 8.843837036761404e-06, "logits/chosen": -2.247920513153076, "logits/rejected": -2.1772730350494385, "logps/chosen": -299.9126892089844, "logps/rejected": -299.29736328125, "loss": 0.6474, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.20609867572784424, "rewards/margins": 0.12030823528766632, "rewards/rejected": 0.08579044044017792, "step": 590 }, { "epoch": 0.27, "learning_rate": 8.796415001050154e-06, "logits/chosen": -2.295091152191162, "logits/rejected": -2.246346950531006, "logps/chosen": -389.3216552734375, "logps/rejected": -371.30157470703125, "loss": 0.6311, "rewards/accuracies": 0.625, "rewards/chosen": 0.25310301780700684, "rewards/margins": 0.18558058142662048, "rewards/rejected": 0.06752243638038635, "step": 600 }, { "epoch": 0.27, "learning_rate": 8.748172506949274e-06, "logits/chosen": -2.27669358253479, "logits/rejected": -2.1988024711608887, "logps/chosen": -326.1456298828125, "logps/rejected": -309.17266845703125, "loss": 0.6849, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.05873417109251022, "rewards/margins": 0.07899702340364456, "rewards/rejected": -0.020262856036424637, "step": 610 }, { "epoch": 0.28, "learning_rate": 8.699119980336602e-06, "logits/chosen": -2.302666187286377, "logits/rejected": -2.2827186584472656, "logps/chosen": -364.0043640136719, "logps/rejected": -372.041748046875, "loss": 0.693, "rewards/accuracies": 0.5, "rewards/chosen": 0.23366883397102356, "rewards/margins": 0.0409202054142952, "rewards/rejected": 0.19274859130382538, "step": 620 }, { "epoch": 0.28, "learning_rate": 8.649268022149333e-06, "logits/chosen": -2.282480001449585, "logits/rejected": -2.2400062084198, "logps/chosen": -333.30194091796875, "logps/rejected": -321.1686096191406, "loss": 0.6733, "rewards/accuracies": 0.625, "rewards/chosen": 0.3089759349822998, "rewards/margins": 0.10994930565357208, "rewards/rejected": 0.19902662932872772, "step": 630 }, { "epoch": 0.28, "learning_rate": 8.59862740609301e-06, "logits/chosen": -2.3325583934783936, "logits/rejected": -2.396918535232544, "logps/chosen": -383.0022888183594, "logps/rejected": -438.1582946777344, "loss": 0.6617, "rewards/accuracies": 0.625, "rewards/chosen": 0.38308969140052795, "rewards/margins": 0.12253421545028687, "rewards/rejected": 0.2605554461479187, "step": 640 }, { "epoch": 0.29, "learning_rate": 8.547209076313172e-06, "logits/chosen": -2.3535332679748535, "logits/rejected": -2.3711094856262207, "logps/chosen": -378.6181640625, "logps/rejected": -433.81005859375, "loss": 0.6061, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.33381205797195435, "rewards/margins": 0.23929791152477264, "rewards/rejected": 0.09451412409543991, "step": 650 }, { "epoch": 0.29, "learning_rate": 8.495024145030174e-06, "logits/chosen": -2.195960283279419, "logits/rejected": -2.2019705772399902, "logps/chosen": -330.01177978515625, "logps/rejected": -352.898193359375, "loss": 0.6155, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02304258942604065, "rewards/margins": 0.21534284949302673, "rewards/rejected": -0.23838546872138977, "step": 660 }, { "epoch": 0.3, "learning_rate": 8.442083890137678e-06, "logits/chosen": -2.2961819171905518, "logits/rejected": -2.2526700496673584, "logps/chosen": -343.67987060546875, "logps/rejected": -348.7483825683594, "loss": 0.7059, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05699265003204346, "rewards/margins": 0.0024743645917624235, "rewards/rejected": -0.05946701765060425, "step": 670 }, { "epoch": 0.3, "learning_rate": 8.388399752765344e-06, "logits/chosen": -2.2721645832061768, "logits/rejected": -2.2435104846954346, "logps/chosen": -377.99664306640625, "logps/rejected": -376.784912109375, "loss": 0.6238, "rewards/accuracies": 0.625, "rewards/chosen": -0.22479982674121857, "rewards/margins": 0.24280264973640442, "rewards/rejected": -0.4676024317741394, "step": 680 }, { "epoch": 0.31, "learning_rate": 8.333983334806248e-06, "logits/chosen": -2.2859599590301514, "logits/rejected": -2.241246461868286, "logps/chosen": -367.9365234375, "logps/rejected": -335.52740478515625, "loss": 0.6717, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09134645760059357, "rewards/margins": 0.09670265018939972, "rewards/rejected": -0.1880491077899933, "step": 690 }, { "epoch": 0.31, "learning_rate": 8.278846396409534e-06, "logits/chosen": -2.306518077850342, "logits/rejected": -2.265807867050171, "logps/chosen": -370.5268249511719, "logps/rejected": -363.80718994140625, "loss": 0.6953, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02050144411623478, "rewards/margins": 0.0992293506860733, "rewards/rejected": -0.07872792333364487, "step": 700 }, { "epoch": 0.32, "learning_rate": 8.223000853438904e-06, "logits/chosen": -2.3641223907470703, "logits/rejected": -2.272670269012451, "logps/chosen": -395.78509521484375, "logps/rejected": -397.1343994140625, "loss": 0.6263, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.156661841785535e-05, "rewards/margins": 0.2341923713684082, "rewards/rejected": -0.23422393202781677, "step": 710 }, { "epoch": 0.32, "learning_rate": 8.166458774897413e-06, "logits/chosen": -2.30328631401062, "logits/rejected": -2.234039306640625, "logps/chosen": -379.7477722167969, "logps/rejected": -355.75677490234375, "loss": 0.605, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.024682385846972466, "rewards/margins": 0.26411938667297363, "rewards/rejected": -0.28880181908607483, "step": 720 }, { "epoch": 0.32, "learning_rate": 8.109232380319194e-06, "logits/chosen": -2.2999930381774902, "logits/rejected": -2.2953243255615234, "logps/chosen": -407.1230163574219, "logps/rejected": -414.182373046875, "loss": 0.7166, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.06269857287406921, "rewards/margins": 0.01421293430030346, "rewards/rejected": 0.0484856478869915, "step": 730 }, { "epoch": 0.33, "learning_rate": 8.051334037128661e-06, "logits/chosen": -2.2836692333221436, "logits/rejected": -2.2380998134613037, "logps/chosen": -332.956787109375, "logps/rejected": -330.85601806640625, "loss": 0.7164, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.043525341898202896, "rewards/margins": 0.0009438946726731956, "rewards/rejected": 0.04258145019412041, "step": 740 }, { "epoch": 0.33, "learning_rate": 7.99277625796771e-06, "logits/chosen": -2.200336217880249, "logits/rejected": -2.1876537799835205, "logps/chosen": -325.07611083984375, "logps/rejected": -318.10784912109375, "loss": 0.7158, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05763138085603714, "rewards/margins": 0.018384983763098717, "rewards/rejected": -0.0760163813829422, "step": 750 }, { "epoch": 0.34, "learning_rate": 7.933571697991582e-06, "logits/chosen": -2.3422603607177734, "logits/rejected": -2.2664551734924316, "logps/chosen": -401.63275146484375, "logps/rejected": -351.42767333984375, "loss": 0.6953, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.008800688199698925, "rewards/margins": 0.04444243758916855, "rewards/rejected": -0.05324311926960945, "step": 760 }, { "epoch": 0.34, "learning_rate": 7.873733152133898e-06, "logits/chosen": -2.2099857330322266, "logits/rejected": -2.236807346343994, "logps/chosen": -311.5889892578125, "logps/rejected": -328.91033935546875, "loss": 0.6992, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.11291439831256866, "rewards/margins": 0.023546913638710976, "rewards/rejected": 0.08936748653650284, "step": 770 }, { "epoch": 0.35, "learning_rate": 7.813273552341496e-06, "logits/chosen": -2.3059163093566895, "logits/rejected": -2.280585527420044, "logps/chosen": -330.9400329589844, "logps/rejected": -347.64056396484375, "loss": 0.6812, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.044351525604724884, "rewards/margins": 0.0922970399260521, "rewards/rejected": -0.047945525497198105, "step": 780 }, { "epoch": 0.35, "learning_rate": 7.75220596477966e-06, "logits/chosen": -2.263115644454956, "logits/rejected": -2.2254600524902344, "logps/chosen": -325.22198486328125, "logps/rejected": -316.40875244140625, "loss": 0.6262, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08847782015800476, "rewards/margins": 0.24120266735553741, "rewards/rejected": -0.15272484719753265, "step": 790 }, { "epoch": 0.36, "learning_rate": 7.690543587008332e-06, "logits/chosen": -2.2187132835388184, "logits/rejected": -2.2646350860595703, "logps/chosen": -401.48687744140625, "logps/rejected": -387.3714294433594, "loss": 0.6596, "rewards/accuracies": 0.5, "rewards/chosen": 0.21927690505981445, "rewards/margins": 0.1301509588956833, "rewards/rejected": 0.08912594616413116, "step": 800 }, { "epoch": 0.36, "learning_rate": 7.628299745129943e-06, "logits/chosen": -2.2820262908935547, "logits/rejected": -2.2334964275360107, "logps/chosen": -403.6439208984375, "logps/rejected": -374.96270751953125, "loss": 0.7398, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.02083228901028633, "rewards/margins": -0.01027420163154602, "rewards/rejected": -0.010558092966675758, "step": 810 }, { "epoch": 0.36, "learning_rate": 7.565487890909448e-06, "logits/chosen": -2.322711229324341, "logits/rejected": -2.225168466567993, "logps/chosen": -337.26605224609375, "logps/rejected": -304.8133544921875, "loss": 0.6559, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.2134527862071991, "rewards/margins": 0.1341491937637329, "rewards/rejected": 0.07930360734462738, "step": 820 }, { "epoch": 0.37, "learning_rate": 7.502121598867218e-06, "logits/chosen": -2.2647910118103027, "logits/rejected": -2.2931771278381348, "logps/chosen": -353.45660400390625, "logps/rejected": -313.0556945800781, "loss": 0.6721, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.16318438947200775, "rewards/margins": 0.0658370777964592, "rewards/rejected": 0.09734731912612915, "step": 830 }, { "epoch": 0.37, "learning_rate": 7.438214563345389e-06, "logits/chosen": -2.352962017059326, "logits/rejected": -2.329225778579712, "logps/chosen": -387.4513244628906, "logps/rejected": -395.37347412109375, "loss": 0.6693, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.19145812094211578, "rewards/margins": 0.13347746431827545, "rewards/rejected": 0.05798065662384033, "step": 840 }, { "epoch": 0.38, "learning_rate": 7.373780595548334e-06, "logits/chosen": -2.310203790664673, "logits/rejected": -2.2119874954223633, "logps/chosen": -377.05657958984375, "logps/rejected": -357.82525634765625, "loss": 0.7045, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.4518454670906067, "rewards/margins": 0.05518758296966553, "rewards/rejected": 0.3966578543186188, "step": 850 }, { "epoch": 0.38, "learning_rate": 7.3088336205578565e-06, "logits/chosen": -2.240410327911377, "logits/rejected": -2.215846538543701, "logps/chosen": -350.8703308105469, "logps/rejected": -368.5628662109375, "loss": 0.6456, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.1876397579908371, "rewards/margins": 0.16387517750263214, "rewards/rejected": 0.0237645972520113, "step": 860 }, { "epoch": 0.39, "learning_rate": 7.243387674323794e-06, "logits/chosen": -2.2497904300689697, "logits/rejected": -2.232779026031494, "logps/chosen": -339.3749084472656, "logps/rejected": -359.62493896484375, "loss": 0.6597, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.22228892147541046, "rewards/margins": 0.12651677429676056, "rewards/rejected": 0.09577211737632751, "step": 870 }, { "epoch": 0.39, "learning_rate": 7.177456900630645e-06, "logits/chosen": -2.256024122238159, "logits/rejected": -2.2079262733459473, "logps/chosen": -340.0914611816406, "logps/rejected": -322.68011474609375, "loss": 0.5837, "rewards/accuracies": 0.75, "rewards/chosen": 0.16695842146873474, "rewards/margins": 0.28337720036506653, "rewards/rejected": -0.1164187639951706, "step": 880 }, { "epoch": 0.4, "learning_rate": 7.111055548040911e-06, "logits/chosen": -2.3057608604431152, "logits/rejected": -2.2699170112609863, "logps/chosen": -389.82611083984375, "logps/rejected": -382.68829345703125, "loss": 0.5632, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.19717621803283691, "rewards/margins": 0.36688321828842163, "rewards/rejected": -0.1697070151567459, "step": 890 }, { "epoch": 0.4, "learning_rate": 7.044197966815773e-06, "logits/chosen": -2.25697922706604, "logits/rejected": -2.107326030731201, "logps/chosen": -320.3851623535156, "logps/rejected": -288.55108642578125, "loss": 0.6459, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.034488920122385025, "rewards/margins": 0.16461703181266785, "rewards/rejected": -0.13012811541557312, "step": 900 }, { "epoch": 0.4, "learning_rate": 6.976898605813798e-06, "logits/chosen": -2.269026041030884, "logits/rejected": -2.305229663848877, "logps/chosen": -335.22540283203125, "logps/rejected": -391.1421813964844, "loss": 0.7153, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.16712869703769684, "rewards/margins": 0.05030643194913864, "rewards/rejected": 0.1168222427368164, "step": 910 }, { "epoch": 0.41, "learning_rate": 6.90917200936835e-06, "logits/chosen": -2.202436923980713, "logits/rejected": -2.1774916648864746, "logps/chosen": -308.787109375, "logps/rejected": -331.55987548828125, "loss": 0.7907, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.020061034709215164, "rewards/margins": -0.09425730258226395, "rewards/rejected": 0.07419625669717789, "step": 920 }, { "epoch": 0.41, "learning_rate": 6.841032814144345e-06, "logits/chosen": -2.2023422718048096, "logits/rejected": -2.215259552001953, "logps/chosen": -303.5005187988281, "logps/rejected": -343.00146484375, "loss": 0.6898, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.026828575879335403, "rewards/margins": 0.07769123464822769, "rewards/rejected": -0.05086265876889229, "step": 930 }, { "epoch": 0.42, "learning_rate": 6.772495745975067e-06, "logits/chosen": -2.268451452255249, "logits/rejected": -2.1880459785461426, "logps/chosen": -352.240966796875, "logps/rejected": -341.0179748535156, "loss": 0.6377, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02395152859389782, "rewards/margins": 0.17894978821277618, "rewards/rejected": -0.20290131866931915, "step": 940 }, { "epoch": 0.42, "learning_rate": 6.703575616679709e-06, "logits/chosen": -2.382638454437256, "logits/rejected": -2.3348803520202637, "logps/chosen": -395.88372802734375, "logps/rejected": -385.67364501953125, "loss": 0.6581, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00013340116129256785, "rewards/margins": 0.17107084393501282, "rewards/rejected": -0.1709374487400055, "step": 950 }, { "epoch": 0.43, "learning_rate": 6.634287320862334e-06, "logits/chosen": -2.332146644592285, "logits/rejected": -2.196887254714966, "logps/chosen": -367.8846435546875, "logps/rejected": -342.24224853515625, "loss": 0.6204, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.21408149600028992, "rewards/margins": 0.27911919355392456, "rewards/rejected": -0.06503769755363464, "step": 960 }, { "epoch": 0.43, "learning_rate": 6.564645832692938e-06, "logits/chosen": -2.2684531211853027, "logits/rejected": -2.2622039318084717, "logps/chosen": -356.80859375, "logps/rejected": -363.5769958496094, "loss": 0.7088, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.05582839250564575, "rewards/margins": 0.07232120633125305, "rewards/rejected": -0.016492802649736404, "step": 970 }, { "epoch": 0.44, "learning_rate": 6.494666202671329e-06, "logits/chosen": -2.297036647796631, "logits/rejected": -2.165566921234131, "logps/chosen": -358.6860046386719, "logps/rejected": -317.59002685546875, "loss": 0.5921, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.30202144384384155, "rewards/margins": 0.3027155101299286, "rewards/rejected": -0.0006940944003872573, "step": 980 }, { "epoch": 0.44, "learning_rate": 6.424363554374496e-06, "logits/chosen": -2.3090157508850098, "logits/rejected": -2.232266902923584, "logps/chosen": -363.88226318359375, "logps/rejected": -358.2498779296875, "loss": 0.6638, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.47291359305381775, "rewards/margins": 0.14908143877983093, "rewards/rejected": 0.3238321542739868, "step": 990 }, { "epoch": 0.44, "learning_rate": 6.353753081188194e-06, "logits/chosen": -2.2434115409851074, "logits/rejected": -2.301614284515381, "logps/chosen": -314.8789978027344, "logps/rejected": -350.7088928222656, "loss": 0.727, "rewards/accuracies": 0.5, "rewards/chosen": 0.19925834238529205, "rewards/margins": 0.04580863565206528, "rewards/rejected": 0.15344971418380737, "step": 1000 }, { "epoch": 0.45, "learning_rate": 6.28285004302345e-06, "logits/chosen": -2.266707420349121, "logits/rejected": -2.236722469329834, "logps/chosen": -321.0040283203125, "logps/rejected": -336.6592102050781, "loss": 0.6677, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.1193336620926857, "rewards/margins": 0.17302492260932922, "rewards/rejected": -0.053691256791353226, "step": 1010 }, { "epoch": 0.45, "learning_rate": 6.2116697630186685e-06, "logits/chosen": -2.303358554840088, "logits/rejected": -2.149106740951538, "logps/chosen": -351.23590087890625, "logps/rejected": -350.1204833984375, "loss": 0.6293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.17574825882911682, "rewards/margins": 0.1902790516614914, "rewards/rejected": -0.014530802145600319, "step": 1020 }, { "epoch": 0.46, "learning_rate": 6.140227624228098e-06, "logits/chosen": -2.375432252883911, "logits/rejected": -2.297983169555664, "logps/chosen": -366.21368408203125, "logps/rejected": -378.6297912597656, "loss": 0.6357, "rewards/accuracies": 0.625, "rewards/chosen": 0.3224946856498718, "rewards/margins": 0.2146320790052414, "rewards/rejected": 0.10786261409521103, "step": 1030 }, { "epoch": 0.46, "learning_rate": 6.068539066297331e-06, "logits/chosen": -2.318620204925537, "logits/rejected": -2.2646164894104004, "logps/chosen": -367.49298095703125, "logps/rejected": -360.1875305175781, "loss": 0.6089, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.14375829696655273, "rewards/margins": 0.2504768967628479, "rewards/rejected": -0.10671859979629517, "step": 1040 }, { "epoch": 0.47, "learning_rate": 5.996619582126586e-06, "logits/chosen": -2.322288751602173, "logits/rejected": -2.3236374855041504, "logps/chosen": -367.33343505859375, "logps/rejected": -372.8912658691406, "loss": 0.7435, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.08361749351024628, "rewards/margins": 0.028830815106630325, "rewards/rejected": 0.054786670953035355, "step": 1050 }, { "epoch": 0.47, "learning_rate": 5.924484714522473e-06, "logits/chosen": -2.2468628883361816, "logits/rejected": -2.2435338497161865, "logps/chosen": -354.2232666015625, "logps/rejected": -318.03851318359375, "loss": 0.6024, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.1713722050189972, "rewards/margins": 0.26503580808639526, "rewards/rejected": -0.09366358816623688, "step": 1060 }, { "epoch": 0.48, "learning_rate": 5.8521500528389685e-06, "logits/chosen": -2.2789225578308105, "logits/rejected": -2.250373125076294, "logps/chosen": -337.760986328125, "logps/rejected": -343.9210510253906, "loss": 0.6352, "rewards/accuracies": 0.625, "rewards/chosen": -0.043280668556690216, "rewards/margins": 0.275790274143219, "rewards/rejected": -0.3190709352493286, "step": 1070 }, { "epoch": 0.48, "learning_rate": 5.779631229608352e-06, "logits/chosen": -2.3031325340270996, "logits/rejected": -2.2297275066375732, "logps/chosen": -345.22265625, "logps/rejected": -361.78680419921875, "loss": 0.6227, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.18280446529388428, "rewards/margins": 0.2825770378112793, "rewards/rejected": -0.09977257996797562, "step": 1080 }, { "epoch": 0.48, "learning_rate": 5.706943917162786e-06, "logits/chosen": -2.3648791313171387, "logits/rejected": -2.2548999786376953, "logps/chosen": -348.91815185546875, "logps/rejected": -315.13653564453125, "loss": 0.6339, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.2526555061340332, "rewards/margins": 0.18896642327308655, "rewards/rejected": 0.06368909776210785, "step": 1090 }, { "epoch": 0.49, "learning_rate": 5.634103824247312e-06, "logits/chosen": -2.241288900375366, "logits/rejected": -2.208639621734619, "logps/chosen": -335.605224609375, "logps/rejected": -334.7170715332031, "loss": 0.632, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13065436482429504, "rewards/margins": 0.2133084535598755, "rewards/rejected": -0.34396281838417053, "step": 1100 }, { "epoch": 0.49, "learning_rate": 5.561126692624963e-06, "logits/chosen": -2.2892165184020996, "logits/rejected": -2.253537178039551, "logps/chosen": -380.8193054199219, "logps/rejected": -344.45684814453125, "loss": 0.677, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.10464837402105331, "rewards/margins": 0.13998612761497498, "rewards/rejected": -0.035337746143341064, "step": 1110 }, { "epoch": 0.5, "learning_rate": 5.488028293674759e-06, "logits/chosen": -2.1598775386810303, "logits/rejected": -2.3442585468292236, "logps/chosen": -295.97161865234375, "logps/rejected": -376.0238952636719, "loss": 0.6603, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.1714015007019043, "rewards/margins": 0.1644255667924881, "rewards/rejected": 0.006975936703383923, "step": 1120 }, { "epoch": 0.5, "learning_rate": 5.414824424983282e-06, "logits/chosen": -2.253049373626709, "logits/rejected": -2.313413143157959, "logps/chosen": -350.61126708984375, "logps/rejected": -394.3390197753906, "loss": 0.7526, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3026641309261322, "rewards/margins": 0.06349755823612213, "rewards/rejected": -0.3661617040634155, "step": 1130 }, { "epoch": 0.51, "learning_rate": 5.341530906930604e-06, "logits/chosen": -2.32954740524292, "logits/rejected": -2.2630321979522705, "logps/chosen": -389.9427185058594, "logps/rejected": -338.2027893066406, "loss": 0.6504, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0287860669195652, "rewards/margins": 0.21246078610420227, "rewards/rejected": -0.18367469310760498, "step": 1140 }, { "epoch": 0.51, "learning_rate": 5.268163579271276e-06, "logits/chosen": -2.249337673187256, "logits/rejected": -2.19362473487854, "logps/chosen": -330.29559326171875, "logps/rejected": -327.573486328125, "loss": 0.644, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0009714558837004006, "rewards/margins": 0.18328654766082764, "rewards/rejected": -0.18425801396369934, "step": 1150 }, { "epoch": 0.52, "learning_rate": 5.1947382977111374e-06, "logits/chosen": -2.2833094596862793, "logits/rejected": -2.203212261199951, "logps/chosen": -360.40142822265625, "logps/rejected": -346.81927490234375, "loss": 0.5783, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.05404200404882431, "rewards/margins": 0.3619672656059265, "rewards/rejected": -0.3079253137111664, "step": 1160 }, { "epoch": 0.52, "learning_rate": 5.128619108610792e-06, "logits/chosen": -2.310303211212158, "logits/rejected": -2.28350567817688, "logps/chosen": -339.02398681640625, "logps/rejected": -357.2115173339844, "loss": 0.6813, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.07422615587711334, "rewards/margins": 0.14461743831634521, "rewards/rejected": -0.07039125263690948, "step": 1170 }, { "epoch": 0.52, "learning_rate": 5.055127439202268e-06, "logits/chosen": -2.309981107711792, "logits/rejected": -2.2555365562438965, "logps/chosen": -339.52301025390625, "logps/rejected": -363.42657470703125, "loss": 0.5414, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.13452677428722382, "rewards/margins": 0.5444313287734985, "rewards/rejected": -0.4099045693874359, "step": 1180 }, { "epoch": 0.53, "learning_rate": 4.9816238559829586e-06, "logits/chosen": -2.371007204055786, "logits/rejected": -2.2399134635925293, "logps/chosen": -387.3955383300781, "logps/rejected": -373.3992919921875, "loss": 0.6567, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04877791926264763, "rewards/margins": 0.20018813014030457, "rewards/rejected": -0.2489660531282425, "step": 1190 }, { "epoch": 0.53, "learning_rate": 4.908124244105435e-06, "logits/chosen": -2.1801342964172363, "logits/rejected": -2.1720447540283203, "logps/chosen": -307.62103271484375, "logps/rejected": -318.6053771972656, "loss": 0.6848, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10059481859207153, "rewards/margins": 0.0915575847029686, "rewards/rejected": -0.19215238094329834, "step": 1200 }, { "epoch": 0.54, "learning_rate": 4.834644487864005e-06, "logits/chosen": -2.299656391143799, "logits/rejected": -2.1939291954040527, "logps/chosen": -364.6031188964844, "logps/rejected": -340.6778259277344, "loss": 0.7091, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.1433655321598053, "rewards/margins": 0.1040463000535965, "rewards/rejected": 0.039319224655628204, "step": 1210 }, { "epoch": 0.54, "learning_rate": 4.7612004672619e-06, "logits/chosen": -2.2465157508850098, "logits/rejected": -2.142528533935547, "logps/chosen": -314.5583801269531, "logps/rejected": -273.532470703125, "loss": 0.7121, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.00577303022146225, "rewards/margins": 0.04662410169839859, "rewards/rejected": -0.05239715054631233, "step": 1220 }, { "epoch": 0.55, "learning_rate": 4.6878080545793765e-06, "logits/chosen": -2.275844097137451, "logits/rejected": -2.2758145332336426, "logps/chosen": -288.96905517578125, "logps/rejected": -304.53265380859375, "loss": 0.6788, "rewards/accuracies": 0.625, "rewards/chosen": 0.18407993018627167, "rewards/margins": 0.13483914732933044, "rewards/rejected": 0.049240779131650925, "step": 1230 }, { "epoch": 0.55, "learning_rate": 4.614483110943502e-06, "logits/chosen": -2.275071620941162, "logits/rejected": -2.2525486946105957, "logps/chosen": -338.60357666015625, "logps/rejected": -337.8529357910156, "loss": 0.6597, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.18449342250823975, "rewards/margins": 0.14333459734916687, "rewards/rejected": 0.04115881025791168, "step": 1240 }, { "epoch": 0.56, "learning_rate": 4.54124148290033e-06, "logits/chosen": -2.2469890117645264, "logits/rejected": -2.2963757514953613, "logps/chosen": -312.69677734375, "logps/rejected": -368.51220703125, "loss": 0.7698, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.211637943983078, "rewards/margins": -0.05843405798077583, "rewards/rejected": 0.27007198333740234, "step": 1250 }, { "epoch": 0.56, "learning_rate": 4.46809899899026e-06, "logits/chosen": -2.20833683013916, "logits/rejected": -2.240799903869629, "logps/chosen": -326.4002380371094, "logps/rejected": -338.2776794433594, "loss": 0.6814, "rewards/accuracies": 0.625, "rewards/chosen": 0.15935859084129333, "rewards/margins": 0.09861962497234344, "rewards/rejected": 0.060738980770111084, "step": 1260 }, { "epoch": 0.56, "learning_rate": 4.395071466327251e-06, "logits/chosen": -2.2200393676757812, "logits/rejected": -2.200827121734619, "logps/chosen": -341.48321533203125, "logps/rejected": -320.7751770019531, "loss": 0.7204, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.03819073364138603, "rewards/margins": 0.06163903325796127, "rewards/rejected": -0.023448294028639793, "step": 1270 }, { "epoch": 0.57, "learning_rate": 4.322174667182689e-06, "logits/chosen": -2.3112101554870605, "logits/rejected": -2.1941494941711426, "logps/chosen": -381.0555114746094, "logps/rejected": -349.847900390625, "loss": 0.6448, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.2540966272354126, "rewards/margins": 0.23512430489063263, "rewards/rejected": 0.018972331658005714, "step": 1280 }, { "epoch": 0.57, "learning_rate": 4.249424355574621e-06, "logits/chosen": -2.361945629119873, "logits/rejected": -2.2803444862365723, "logps/chosen": -397.45330810546875, "logps/rejected": -377.0959777832031, "loss": 0.662, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.18188154697418213, "rewards/margins": 0.205190509557724, "rewards/rejected": -0.023308951407670975, "step": 1290 }, { "epoch": 0.58, "learning_rate": 4.176836253863087e-06, "logits/chosen": -2.3127691745758057, "logits/rejected": -2.185509443283081, "logps/chosen": -359.801025390625, "logps/rejected": -326.85382080078125, "loss": 0.5858, "rewards/accuracies": 0.75, "rewards/chosen": 0.27552157640457153, "rewards/margins": 0.30210158228874207, "rewards/rejected": -0.02657998725771904, "step": 1300 }, { "epoch": 0.58, "learning_rate": 4.1044260493523005e-06, "logits/chosen": -2.219707727432251, "logits/rejected": -2.2081971168518066, "logps/chosen": -325.4644775390625, "logps/rejected": -309.3984069824219, "loss": 0.6114, "rewards/accuracies": 0.625, "rewards/chosen": 0.29091349244117737, "rewards/margins": 0.2537664771080017, "rewards/rejected": 0.037146955728530884, "step": 1310 }, { "epoch": 0.59, "learning_rate": 4.0322093909003965e-06, "logits/chosen": -2.3424715995788574, "logits/rejected": -2.328320026397705, "logps/chosen": -369.3295593261719, "logps/rejected": -420.92987060546875, "loss": 0.5449, "rewards/accuracies": 0.75, "rewards/chosen": 0.4485898017883301, "rewards/margins": 0.42458122968673706, "rewards/rejected": 0.02400858886539936, "step": 1320 }, { "epoch": 0.59, "learning_rate": 3.960201885537504e-06, "logits/chosen": -2.341200113296509, "logits/rejected": -2.253725528717041, "logps/chosen": -372.42633056640625, "logps/rejected": -400.0967102050781, "loss": 0.642, "rewards/accuracies": 0.625, "rewards/chosen": 0.38997507095336914, "rewards/margins": 0.22686178982257843, "rewards/rejected": 0.16311326622962952, "step": 1330 }, { "epoch": 0.6, "learning_rate": 3.888419095092843e-06, "logits/chosen": -2.2710628509521484, "logits/rejected": -2.3019535541534424, "logps/chosen": -336.04296875, "logps/rejected": -364.0369873046875, "loss": 0.6737, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.08392750471830368, "rewards/margins": 0.1348566859960556, "rewards/rejected": -0.05092918127775192, "step": 1340 }, { "epoch": 0.6, "learning_rate": 3.816876532831595e-06, "logits/chosen": -2.1317548751831055, "logits/rejected": -2.1478095054626465, "logps/chosen": -297.4999084472656, "logps/rejected": -314.2597961425781, "loss": 0.6427, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.07686291635036469, "rewards/margins": 0.2395528256893158, "rewards/rejected": -0.16268989443778992, "step": 1350 }, { "epoch": 0.6, "learning_rate": 3.7455896601022677e-06, "logits/chosen": -2.2463011741638184, "logits/rejected": -2.130866527557373, "logps/chosen": -333.47564697265625, "logps/rejected": -299.1008605957031, "loss": 0.6625, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01470687985420227, "rewards/margins": 0.15500742197036743, "rewards/rejected": -0.14030054211616516, "step": 1360 }, { "epoch": 0.61, "learning_rate": 3.6745738829952928e-06, "logits/chosen": -2.3302998542785645, "logits/rejected": -2.3339765071868896, "logps/chosen": -378.35498046875, "logps/rejected": -410.18035888671875, "loss": 0.6558, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.3636806905269623, "rewards/margins": 0.19609752297401428, "rewards/rejected": 0.1675831824541092, "step": 1370 }, { "epoch": 0.61, "learning_rate": 3.6038445490135354e-06, "logits/chosen": -2.3522942066192627, "logits/rejected": -2.3492603302001953, "logps/chosen": -387.61297607421875, "logps/rejected": -422.75054931640625, "loss": 0.6802, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.4821054935455322, "rewards/margins": 0.14728207886219025, "rewards/rejected": 0.3348234295845032, "step": 1380 }, { "epoch": 0.62, "learning_rate": 3.5334169437555e-06, "logits/chosen": -2.2042317390441895, "logits/rejected": -2.272881507873535, "logps/chosen": -345.3319396972656, "logps/rejected": -344.2694091796875, "loss": 0.6958, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.11976834386587143, "rewards/margins": 0.10411565005779266, "rewards/rejected": 0.015652697533369064, "step": 1390 }, { "epoch": 0.62, "learning_rate": 3.4633062876118915e-06, "logits/chosen": -2.310586452484131, "logits/rejected": -2.2318148612976074, "logps/chosen": -339.42095947265625, "logps/rejected": -339.2361755371094, "loss": 0.6279, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.20746886730194092, "rewards/margins": 0.2241462916135788, "rewards/rejected": -0.01667742058634758, "step": 1400 }, { "epoch": 0.63, "learning_rate": 3.3935277324762807e-06, "logits/chosen": -2.2938754558563232, "logits/rejected": -2.3304316997528076, "logps/chosen": -349.04547119140625, "logps/rejected": -412.75042724609375, "loss": 0.6163, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04044444486498833, "rewards/margins": 0.25089383125305176, "rewards/rejected": -0.21044941246509552, "step": 1410 }, { "epoch": 0.63, "learning_rate": 3.324096358470559e-06, "logits/chosen": -2.298367977142334, "logits/rejected": -2.2703890800476074, "logps/chosen": -365.79571533203125, "logps/rejected": -372.6152038574219, "loss": 0.6579, "rewards/accuracies": 0.625, "rewards/chosen": 0.16013844311237335, "rewards/margins": 0.23061330616474152, "rewards/rejected": -0.07047487795352936, "step": 1420 }, { "epoch": 0.64, "learning_rate": 3.255027170685922e-06, "logits/chosen": -2.271730422973633, "logits/rejected": -2.305053234100342, "logps/chosen": -379.85321044921875, "logps/rejected": -405.1103515625, "loss": 0.717, "rewards/accuracies": 0.625, "rewards/chosen": 0.14956679940223694, "rewards/margins": 0.06124185770750046, "rewards/rejected": 0.08832494169473648, "step": 1430 }, { "epoch": 0.64, "learning_rate": 3.186335095940058e-06, "logits/chosen": -2.3461501598358154, "logits/rejected": -2.1821436882019043, "logps/chosen": -382.0367736816406, "logps/rejected": -337.6816101074219, "loss": 0.6432, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07416114211082458, "rewards/margins": 0.22856464982032776, "rewards/rejected": -0.15440352261066437, "step": 1440 }, { "epoch": 0.64, "learning_rate": 3.1180349795512478e-06, "logits/chosen": -2.333571434020996, "logits/rejected": -2.2108778953552246, "logps/chosen": -364.5013427734375, "logps/rejected": -348.1319580078125, "loss": 0.6229, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.042986027896404266, "rewards/margins": 0.23882392048835754, "rewards/rejected": -0.19583788514137268, "step": 1450 }, { "epoch": 0.65, "learning_rate": 3.050141582130093e-06, "logits/chosen": -2.19138765335083, "logits/rejected": -2.2427496910095215, "logps/chosen": -337.9814147949219, "logps/rejected": -330.500244140625, "loss": 0.7066, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.053582824766635895, "rewards/margins": 0.07893103361129761, "rewards/rejected": -0.025348205119371414, "step": 1460 }, { "epoch": 0.65, "learning_rate": 2.982669576389533e-06, "logits/chosen": -2.296982526779175, "logits/rejected": -2.2845733165740967, "logps/chosen": -310.3797912597656, "logps/rejected": -309.05975341796875, "loss": 0.6881, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.22653362154960632, "rewards/margins": 0.06024567037820816, "rewards/rejected": 0.16628797352313995, "step": 1470 }, { "epoch": 0.66, "learning_rate": 2.9156335439738705e-06, "logits/chosen": -2.285391092300415, "logits/rejected": -2.3086414337158203, "logps/chosen": -373.62628173828125, "logps/rejected": -386.38092041015625, "loss": 0.7604, "rewards/accuracies": 0.5, "rewards/chosen": 0.023256815969944, "rewards/margins": -0.014189457520842552, "rewards/rejected": 0.037446290254592896, "step": 1480 }, { "epoch": 0.66, "learning_rate": 2.84904797230748e-06, "logits/chosen": -2.2920703887939453, "logits/rejected": -2.2386538982391357, "logps/chosen": -336.79888916015625, "logps/rejected": -377.5654296875, "loss": 0.5965, "rewards/accuracies": 0.75, "rewards/chosen": 0.3767642080783844, "rewards/margins": 0.3530300557613373, "rewards/rejected": 0.02373412624001503, "step": 1490 }, { "epoch": 0.67, "learning_rate": 2.782927251463854e-06, "logits/chosen": -2.2349536418914795, "logits/rejected": -2.246170997619629, "logps/chosen": -326.43084716796875, "logps/rejected": -355.4977111816406, "loss": 0.6291, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.12856540083885193, "rewards/margins": 0.2676551938056946, "rewards/rejected": -0.13908980786800385, "step": 1500 }, { "epoch": 0.67, "learning_rate": 2.717285671055733e-06, "logits/chosen": -2.2831931114196777, "logits/rejected": -2.2716732025146484, "logps/chosen": -339.9261474609375, "logps/rejected": -372.9583740234375, "loss": 0.6354, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0657893493771553, "rewards/margins": 0.23048046231269836, "rewards/rejected": -0.29626980423927307, "step": 1510 }, { "epoch": 0.68, "learning_rate": 2.652137417146897e-06, "logits/chosen": -2.327761173248291, "logits/rejected": -2.1530885696411133, "logps/chosen": -351.7103271484375, "logps/rejected": -321.41046142578125, "loss": 0.7653, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.15571951866149902, "rewards/margins": -0.01952260732650757, "rewards/rejected": -0.13619689643383026, "step": 1520 }, { "epoch": 0.68, "learning_rate": 2.587496569186378e-06, "logits/chosen": -2.283737897872925, "logits/rejected": -2.2826638221740723, "logps/chosen": -369.670654296875, "logps/rejected": -380.65460205078125, "loss": 0.6403, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.06505627185106277, "rewards/margins": 0.1945182979106903, "rewards/rejected": -0.12946203351020813, "step": 1530 }, { "epoch": 0.68, "learning_rate": 2.5233770969656703e-06, "logits/chosen": -2.2368502616882324, "logits/rejected": -2.2161707878112793, "logps/chosen": -331.0984191894531, "logps/rejected": -336.10040283203125, "loss": 0.6759, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.1922999769449234, "rewards/margins": 0.1443178653717041, "rewards/rejected": 0.04798208177089691, "step": 1540 }, { "epoch": 0.69, "learning_rate": 2.4597928575996917e-06, "logits/chosen": -2.2817587852478027, "logits/rejected": -2.2737059593200684, "logps/chosen": -369.1025085449219, "logps/rejected": -396.74005126953125, "loss": 0.652, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.08208617568016052, "rewards/margins": 0.25736135244369507, "rewards/rejected": -0.17527517676353455, "step": 1550 }, { "epoch": 0.69, "learning_rate": 2.3967575925320417e-06, "logits/chosen": -2.362391471862793, "logits/rejected": -2.2530202865600586, "logps/chosen": -362.65301513671875, "logps/rejected": -345.89776611328125, "loss": 0.6438, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.2820921540260315, "rewards/margins": 0.289219468832016, "rewards/rejected": -0.007127317134290934, "step": 1560 }, { "epoch": 0.7, "learning_rate": 2.334284924565307e-06, "logits/chosen": -2.2167036533355713, "logits/rejected": -2.187164068222046, "logps/chosen": -322.52410888671875, "logps/rejected": -329.1834716796875, "loss": 0.6609, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.04885543882846832, "rewards/margins": 0.21874144673347473, "rewards/rejected": -0.16988599300384521, "step": 1570 }, { "epoch": 0.7, "learning_rate": 2.2723883549169546e-06, "logits/chosen": -2.2128214836120605, "logits/rejected": -2.1517386436462402, "logps/chosen": -297.3167419433594, "logps/rejected": -319.46826171875, "loss": 0.6695, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.1534380465745926, "rewards/margins": 0.17669746279716492, "rewards/rejected": -0.02325943298637867, "step": 1580 }, { "epoch": 0.71, "learning_rate": 2.211081260301559e-06, "logits/chosen": -2.2383880615234375, "logits/rejected": -2.152236223220825, "logps/chosen": -321.6248474121094, "logps/rejected": -295.3414001464844, "loss": 0.6343, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.2368522584438324, "rewards/margins": 0.21582520008087158, "rewards/rejected": 0.021027065813541412, "step": 1590 }, { "epoch": 0.71, "learning_rate": 2.150376890039888e-06, "logits/chosen": -2.142472743988037, "logits/rejected": -2.2683558464050293, "logps/chosen": -304.2695007324219, "logps/rejected": -381.8064270019531, "loss": 0.6457, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.05382103472948074, "rewards/margins": 0.231169655919075, "rewards/rejected": -0.17734862864017487, "step": 1600 }, { "epoch": 0.72, "learning_rate": 2.090288363195546e-06, "logits/chosen": -2.301752805709839, "logits/rejected": -2.245049476623535, "logps/chosen": -360.19940185546875, "logps/rejected": -344.0550231933594, "loss": 0.6983, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.1819063127040863, "rewards/margins": 0.07504500448703766, "rewards/rejected": 0.10686129331588745, "step": 1610 }, { "epoch": 0.72, "learning_rate": 2.0308286657397586e-06, "logits/chosen": -2.1193668842315674, "logits/rejected": -2.279275417327881, "logps/chosen": -290.13494873046875, "logps/rejected": -304.28460693359375, "loss": 0.6585, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.014680122956633568, "rewards/margins": 0.12039873749017715, "rewards/rejected": -0.13507884740829468, "step": 1620 }, { "epoch": 0.72, "learning_rate": 1.972010647744929e-06, "logits/chosen": -2.2673816680908203, "logits/rejected": -2.23976731300354, "logps/chosen": -359.8111267089844, "logps/rejected": -389.32012939453125, "loss": 0.6393, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1408449113368988, "rewards/margins": 0.20818281173706055, "rewards/rejected": -0.34902772307395935, "step": 1630 }, { "epoch": 0.73, "learning_rate": 1.9138470206075468e-06, "logits/chosen": -2.260129690170288, "logits/rejected": -2.1876485347747803, "logps/chosen": -349.2674560546875, "logps/rejected": -373.29351806640625, "loss": 0.6647, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08648316562175751, "rewards/margins": 0.13312320411205292, "rewards/rejected": -0.21960635483264923, "step": 1640 }, { "epoch": 0.73, "learning_rate": 1.8563503543010847e-06, "logits/chosen": -2.2733237743377686, "logits/rejected": -2.239638090133667, "logps/chosen": -357.602294921875, "logps/rejected": -358.47900390625, "loss": 0.6549, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.038054175674915314, "rewards/margins": 0.20417292416095734, "rewards/rejected": -0.1661187708377838, "step": 1650 }, { "epoch": 0.74, "learning_rate": 1.7995330746594492e-06, "logits/chosen": -2.2824442386627197, "logits/rejected": -2.319239854812622, "logps/chosen": -335.9391174316406, "logps/rejected": -376.009765625, "loss": 0.7277, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06412219256162643, "rewards/margins": 0.0529680959880352, "rewards/rejected": -0.11709029972553253, "step": 1660 }, { "epoch": 0.74, "learning_rate": 1.7434074606915908e-06, "logits/chosen": -2.2410006523132324, "logits/rejected": -2.2910315990448, "logps/chosen": -366.2132263183594, "logps/rejected": -422.5810546875, "loss": 0.6248, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.1538994014263153, "rewards/margins": 0.33952516317367554, "rewards/rejected": -0.185625821352005, "step": 1670 }, { "epoch": 0.75, "learning_rate": 1.687985641927819e-06, "logits/chosen": -2.3636865615844727, "logits/rejected": -2.2147748470306396, "logps/chosen": -360.6214294433594, "logps/rejected": -323.7347412109375, "loss": 0.622, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.18724626302719116, "rewards/margins": 0.2453027069568634, "rewards/rejected": -0.05805645138025284, "step": 1680 }, { "epoch": 0.75, "learning_rate": 1.6332795957984688e-06, "logits/chosen": -2.2480177879333496, "logits/rejected": -2.2115044593811035, "logps/chosen": -352.7060852050781, "logps/rejected": -360.0293884277344, "loss": 0.6535, "rewards/accuracies": 0.625, "rewards/chosen": -0.05413411930203438, "rewards/margins": 0.20770862698554993, "rewards/rejected": -0.2618427276611328, "step": 1690 }, { "epoch": 0.76, "learning_rate": 1.5793011450453854e-06, "logits/chosen": -2.230503797531128, "logits/rejected": -2.2694289684295654, "logps/chosen": -292.1943664550781, "logps/rejected": -338.814453125, "loss": 0.7215, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.112449049949646, "rewards/margins": 0.08113683760166168, "rewards/rejected": -0.19358590245246887, "step": 1700 }, { "epoch": 0.76, "learning_rate": 1.5260619551668842e-06, "logits/chosen": -2.186260461807251, "logits/rejected": -2.3265433311462402, "logps/chosen": -315.28778076171875, "logps/rejected": -363.6637268066406, "loss": 0.7206, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10357306897640228, "rewards/margins": 0.0456685833632946, "rewards/rejected": -0.14924165606498718, "step": 1710 }, { "epoch": 0.76, "learning_rate": 1.4735735318966521e-06, "logits/chosen": -2.338139772415161, "logits/rejected": -2.1299831867218018, "logps/chosen": -330.22796630859375, "logps/rejected": -297.2973327636719, "loss": 0.644, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.05218541622161865, "rewards/margins": 0.25528091192245483, "rewards/rejected": -0.20309551060199738, "step": 1720 }, { "epoch": 0.77, "learning_rate": 1.4218472187172212e-06, "logits/chosen": -2.1943013668060303, "logits/rejected": -2.255190372467041, "logps/chosen": -294.1163024902344, "logps/rejected": -333.59228515625, "loss": 0.6324, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09838038682937622, "rewards/margins": 0.21084125339984894, "rewards/rejected": -0.30922168493270874, "step": 1730 }, { "epoch": 0.77, "learning_rate": 1.3708941944084636e-06, "logits/chosen": -2.3456058502197266, "logits/rejected": -2.361806631088257, "logps/chosen": -406.53094482421875, "logps/rejected": -447.1785583496094, "loss": 0.6449, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.2680016756057739, "rewards/margins": 0.210541769862175, "rewards/rejected": 0.057459909468889236, "step": 1740 }, { "epoch": 0.78, "learning_rate": 1.3207254706317174e-06, "logits/chosen": -2.276390790939331, "logits/rejected": -2.296130657196045, "logps/chosen": -342.73260498046875, "logps/rejected": -360.7101135253906, "loss": 0.6452, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.1815967857837677, "rewards/margins": 0.2110695093870163, "rewards/rejected": -0.029472723603248596, "step": 1750 }, { "epoch": 0.78, "learning_rate": 1.2713518895499932e-06, "logits/chosen": -2.2506117820739746, "logits/rejected": -2.1966030597686768, "logps/chosen": -328.85302734375, "logps/rejected": -346.61041259765625, "loss": 0.6133, "rewards/accuracies": 0.625, "rewards/chosen": -0.16987404227256775, "rewards/margins": 0.2849760055541992, "rewards/rejected": -0.45485004782676697, "step": 1760 }, { "epoch": 0.79, "learning_rate": 1.2227841214848519e-06, "logits/chosen": -2.3572230339050293, "logits/rejected": -2.241999387741089, "logps/chosen": -386.4831237792969, "logps/rejected": -324.4116516113281, "loss": 0.623, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.19086837768554688, "rewards/margins": 0.23254597187042236, "rewards/rejected": -0.04167759045958519, "step": 1770 }, { "epoch": 0.79, "learning_rate": 1.175032662610383e-06, "logits/chosen": -2.384322166442871, "logits/rejected": -2.372183322906494, "logps/chosen": -379.8897705078125, "logps/rejected": -384.9872131347656, "loss": 0.6472, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.13912078738212585, "rewards/margins": 0.1807091236114502, "rewards/rejected": -0.041588325053453445, "step": 1780 }, { "epoch": 0.8, "learning_rate": 1.1281078326848438e-06, "logits/chosen": -2.2786386013031006, "logits/rejected": -2.2903854846954346, "logps/chosen": -342.7461242675781, "logps/rejected": -383.90411376953125, "loss": 0.5802, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.15179908275604248, "rewards/margins": 0.4043292999267578, "rewards/rejected": -0.25253021717071533, "step": 1790 }, { "epoch": 0.8, "learning_rate": 1.0820197728204085e-06, "logits/chosen": -2.235412120819092, "logits/rejected": -2.1771421432495117, "logps/chosen": -336.4576721191406, "logps/rejected": -346.7325744628906, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0932273119688034, "rewards/margins": 0.12401854991912842, "rewards/rejected": -0.03079124726355076, "step": 1800 }, { "epoch": 0.8, "learning_rate": 1.0367784432915407e-06, "logits/chosen": -2.2605013847351074, "logits/rejected": -2.2363693714141846, "logps/chosen": -337.21728515625, "logps/rejected": -330.5986633300781, "loss": 0.6855, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.2193947285413742, "rewards/margins": 0.11095724254846573, "rewards/rejected": 0.10843745619058609, "step": 1810 }, { "epoch": 0.81, "learning_rate": 9.923936213824297e-07, "logits/chosen": -2.387052297592163, "logits/rejected": -2.2252724170684814, "logps/chosen": -368.71881103515625, "logps/rejected": -402.789794921875, "loss": 0.6306, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.32878467440605164, "rewards/margins": 0.26738250255584717, "rewards/rejected": 0.06140219047665596, "step": 1820 }, { "epoch": 0.81, "learning_rate": 9.488748992739877e-07, "logits/chosen": -2.2936031818389893, "logits/rejected": -2.3079075813293457, "logps/chosen": -340.22430419921875, "logps/rejected": -390.5373840332031, "loss": 0.653, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.11492462456226349, "rewards/margins": 0.22790834307670593, "rewards/rejected": -0.11298371851444244, "step": 1830 }, { "epoch": 0.82, "learning_rate": 9.062316819708427e-07, "logits/chosen": -2.26062273979187, "logits/rejected": -2.2534077167510986, "logps/chosen": -323.99554443359375, "logps/rejected": -343.2364501953125, "loss": 0.6792, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07681259512901306, "rewards/margins": 0.13750119507312775, "rewards/rejected": -0.2143137902021408, "step": 1840 }, { "epoch": 0.82, "learning_rate": 8.644731852687904e-07, "logits/chosen": -2.1960670948028564, "logits/rejected": -2.252990245819092, "logps/chosen": -338.3189392089844, "logps/rejected": -409.77947998046875, "loss": 0.6429, "rewards/accuracies": 0.625, "rewards/chosen": -0.027236556634306908, "rewards/margins": 0.200010746717453, "rewards/rejected": -0.22724728286266327, "step": 1850 }, { "epoch": 0.83, "learning_rate": 8.236084337631256e-07, "logits/chosen": -2.269155502319336, "logits/rejected": -2.2297897338867188, "logps/chosen": -350.8360900878906, "logps/rejected": -349.05743408203125, "loss": 0.618, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03905141353607178, "rewards/margins": 0.2928921580314636, "rewards/rejected": -0.25384077429771423, "step": 1860 }, { "epoch": 0.83, "learning_rate": 7.836462588983029e-07, "logits/chosen": -2.2999212741851807, "logits/rejected": -2.2945046424865723, "logps/chosen": -331.8743896484375, "logps/rejected": -358.82427978515625, "loss": 0.6854, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.007713166065514088, "rewards/margins": 0.15347187221050262, "rewards/rejected": -0.1457587033510208, "step": 1870 }, { "epoch": 0.84, "learning_rate": 7.445952970593401e-07, "logits/chosen": -2.292762517929077, "logits/rejected": -2.2326605319976807, "logps/chosen": -368.51123046875, "logps/rejected": -343.02362060546875, "loss": 0.7055, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.138399139046669, "rewards/margins": 0.1448075920343399, "rewards/rejected": -0.2832067608833313, "step": 1880 }, { "epoch": 0.84, "learning_rate": 7.064639877053753e-07, "logits/chosen": -2.2553770542144775, "logits/rejected": -2.1739296913146973, "logps/chosen": -347.02081298828125, "logps/rejected": -329.3694152832031, "loss": 0.7244, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0695744976401329, "rewards/margins": 0.0453006774187088, "rewards/rejected": -0.1148751750588417, "step": 1890 }, { "epoch": 0.84, "learning_rate": 6.692605715457734e-07, "logits/chosen": -2.296112537384033, "logits/rejected": -2.2227189540863037, "logps/chosen": -335.0692443847656, "logps/rejected": -342.08563232421875, "loss": 0.6636, "rewards/accuracies": 0.5, "rewards/chosen": 0.09152424335479736, "rewards/margins": 0.14057905972003937, "rewards/rejected": -0.049054812639951706, "step": 1900 }, { "epoch": 0.85, "learning_rate": 6.329930887592067e-07, "logits/chosen": -2.277210235595703, "logits/rejected": -2.263932704925537, "logps/chosen": -373.13623046875, "logps/rejected": -398.31329345703125, "loss": 0.6006, "rewards/accuracies": 0.625, "rewards/chosen": -0.10486678779125214, "rewards/margins": 0.2877123951911926, "rewards/rejected": -0.39257916808128357, "step": 1910 }, { "epoch": 0.85, "learning_rate": 5.976693772560487e-07, "logits/chosen": -2.3237483501434326, "logits/rejected": -2.283463716506958, "logps/chosen": -397.46905517578125, "logps/rejected": -399.92266845703125, "loss": 0.6716, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03626967594027519, "rewards/margins": 0.20212960243225098, "rewards/rejected": -0.23839926719665527, "step": 1920 }, { "epoch": 0.86, "learning_rate": 5.632970709844976e-07, "logits/chosen": -2.2484121322631836, "logits/rejected": -2.2332425117492676, "logps/chosen": -352.50372314453125, "logps/rejected": -382.41290283203125, "loss": 0.7484, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.20201630890369415, "rewards/margins": 0.06586066633462906, "rewards/rejected": -0.2678769528865814, "step": 1930 }, { "epoch": 0.86, "learning_rate": 5.298835982807704e-07, "logits/chosen": -2.324031352996826, "logits/rejected": -2.2343201637268066, "logps/chosen": -387.73211669921875, "logps/rejected": -355.9978942871094, "loss": 0.7038, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.11137855052947998, "rewards/margins": 0.1095174103975296, "rewards/rejected": 0.0018611550331115723, "step": 1940 }, { "epoch": 0.87, "learning_rate": 4.974361802637395e-07, "logits/chosen": -2.3116257190704346, "logits/rejected": -2.272489070892334, "logps/chosen": -362.0296325683594, "logps/rejected": -368.2407531738281, "loss": 0.6696, "rewards/accuracies": 0.625, "rewards/chosen": -0.055863846093416214, "rewards/margins": 0.1546260416507721, "rewards/rejected": -0.2104898989200592, "step": 1950 }, { "epoch": 0.87, "learning_rate": 4.6596182927434395e-07, "logits/chosen": -2.2587242126464844, "logits/rejected": -2.2343411445617676, "logps/chosen": -326.1157531738281, "logps/rejected": -347.9691162109375, "loss": 0.652, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12903109192848206, "rewards/margins": 0.13124972581863403, "rewards/rejected": -0.2602807879447937, "step": 1960 }, { "epoch": 0.88, "learning_rate": 4.354673473601251e-07, "logits/chosen": -2.249849557876587, "logits/rejected": -2.1856112480163574, "logps/chosen": -350.0913391113281, "logps/rejected": -357.5381774902344, "loss": 0.6364, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.04396069794893265, "rewards/margins": 0.2554934620857239, "rewards/rejected": -0.21153274178504944, "step": 1970 }, { "epoch": 0.88, "learning_rate": 4.059593248052107e-07, "logits/chosen": -2.270174741744995, "logits/rejected": -2.214571475982666, "logps/chosen": -363.952880859375, "logps/rejected": -363.4573974609375, "loss": 0.5834, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0837903842329979, "rewards/margins": 0.3650640845298767, "rewards/rejected": -0.2812737226486206, "step": 1980 }, { "epoch": 0.88, "learning_rate": 3.774441387060634e-07, "logits/chosen": -2.3621678352355957, "logits/rejected": -2.304919719696045, "logps/chosen": -411.8487854003906, "logps/rejected": -410.06671142578125, "loss": 0.6429, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.14407065510749817, "rewards/margins": 0.2489246129989624, "rewards/rejected": -0.10485398769378662, "step": 1990 }, { "epoch": 0.89, "learning_rate": 3.4992795159329516e-07, "logits/chosen": -2.3314731121063232, "logits/rejected": -2.2980003356933594, "logps/chosen": -380.12017822265625, "logps/rejected": -410.78887939453125, "loss": 0.6918, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.10951967537403107, "rewards/margins": 0.07033322751522064, "rewards/rejected": 0.03918645530939102, "step": 2000 } ], "logging_steps": 10, "max_steps": 2250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }