bboeun's picture
Upload folder using huggingface_hub
5e272c1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8888888888888888,
"eval_steps": 500,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.309734513274336e-07,
"logits/chosen": -2.1858699321746826,
"logits/rejected": -2.25400972366333,
"logps/chosen": -292.4839172363281,
"logps/rejected": -334.2861633300781,
"loss": 0.6928,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.0007014082511886954,
"rewards/margins": 0.0008057593367993832,
"rewards/rejected": -0.001507167937234044,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 1.415929203539823e-06,
"logits/chosen": -2.2499358654022217,
"logits/rejected": -2.2245681285858154,
"logps/chosen": -323.0448303222656,
"logps/rejected": -341.9175109863281,
"loss": 0.6941,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.0023523904383182526,
"rewards/margins": -0.0019129945430904627,
"rewards/rejected": -0.00043939598253928125,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 2.3008849557522127e-06,
"logits/chosen": -2.2502377033233643,
"logits/rejected": -2.235426425933838,
"logps/chosen": -309.60076904296875,
"logps/rejected": -354.3961181640625,
"loss": 0.6933,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.005819912068545818,
"rewards/margins": -0.00021856316016055644,
"rewards/rejected": -0.0056013488210737705,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 3.185840707964602e-06,
"logits/chosen": -2.2594857215881348,
"logits/rejected": -2.231959819793701,
"logps/chosen": -342.497802734375,
"logps/rejected": -361.927734375,
"loss": 0.6939,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.017969723790884018,
"rewards/margins": -0.001382522750645876,
"rewards/rejected": -0.01658720150589943,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 4.070796460176992e-06,
"logits/chosen": -2.278099775314331,
"logits/rejected": -2.2154829502105713,
"logps/chosen": -334.44879150390625,
"logps/rejected": -324.4710998535156,
"loss": 0.6928,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.03501028195023537,
"rewards/margins": 0.000740527524612844,
"rewards/rejected": -0.03575081005692482,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 4.955752212389381e-06,
"logits/chosen": -2.28778338432312,
"logits/rejected": -2.1848011016845703,
"logps/chosen": -329.4461669921875,
"logps/rejected": -304.49163818359375,
"loss": 0.693,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0479893684387207,
"rewards/margins": 0.0005268483073450625,
"rewards/rejected": -0.04851621761918068,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 5.840707964601771e-06,
"logits/chosen": -2.118542194366455,
"logits/rejected": -2.1866343021392822,
"logps/chosen": -299.27447509765625,
"logps/rejected": -326.0574645996094,
"loss": 0.6925,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.049522001296281815,
"rewards/margins": 0.001656264765188098,
"rewards/rejected": -0.05117826536297798,
"step": 70
},
{
"epoch": 0.04,
"learning_rate": 6.72566371681416e-06,
"logits/chosen": -2.17606782913208,
"logits/rejected": -2.1878247261047363,
"logps/chosen": -327.1267395019531,
"logps/rejected": -323.746337890625,
"loss": 0.6897,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.050967562943696976,
"rewards/margins": 0.0074631692841649055,
"rewards/rejected": -0.05843073129653931,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 7.610619469026549e-06,
"logits/chosen": -2.3069217205047607,
"logits/rejected": -2.1109657287597656,
"logps/chosen": -359.98150634765625,
"logps/rejected": -322.8603820800781,
"loss": 0.7002,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.05969462916254997,
"rewards/margins": -0.013257542625069618,
"rewards/rejected": -0.0464370921254158,
"step": 90
},
{
"epoch": 0.04,
"learning_rate": 8.495575221238938e-06,
"logits/chosen": -2.2969472408294678,
"logits/rejected": -2.2404885292053223,
"logps/chosen": -328.78704833984375,
"logps/rejected": -334.45281982421875,
"loss": 0.6917,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.005047931335866451,
"rewards/margins": 0.0031173895113170147,
"rewards/rejected": 0.0019305419409647584,
"step": 100
},
{
"epoch": 0.05,
"learning_rate": 9.380530973451329e-06,
"logits/chosen": -2.2497150897979736,
"logits/rejected": -2.220237970352173,
"logps/chosen": -333.1561584472656,
"logps/rejected": -314.7790832519531,
"loss": 0.6961,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": 0.015577316284179688,
"rewards/margins": -0.005401826463639736,
"rewards/rejected": 0.02097914181649685,
"step": 110
},
{
"epoch": 0.05,
"learning_rate": 9.999951373555555e-06,
"logits/chosen": -2.301379442214966,
"logits/rejected": -2.223681926727295,
"logps/chosen": -351.5559387207031,
"logps/rejected": -326.63287353515625,
"loss": 0.6889,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.012599905952811241,
"rewards/margins": 0.009558334946632385,
"rewards/rejected": 0.003041572170332074,
"step": 120
},
{
"epoch": 0.06,
"learning_rate": 9.999086929743288e-06,
"logits/chosen": -2.2710835933685303,
"logits/rejected": -2.227280855178833,
"logps/chosen": -321.6353454589844,
"logps/rejected": -332.7576599121094,
"loss": 0.692,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.040375690907239914,
"rewards/margins": 0.004883688408881426,
"rewards/rejected": 0.03549199923872948,
"step": 130
},
{
"epoch": 0.06,
"learning_rate": 9.997142113313472e-06,
"logits/chosen": -2.212054491043091,
"logits/rejected": -2.202702283859253,
"logps/chosen": -322.11651611328125,
"logps/rejected": -309.7989501953125,
"loss": 0.6871,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0492350198328495,
"rewards/margins": 0.013679656200110912,
"rewards/rejected": 0.035555362701416016,
"step": 140
},
{
"epoch": 0.07,
"learning_rate": 9.994117344568142e-06,
"logits/chosen": -2.2250020503997803,
"logits/rejected": -2.2318902015686035,
"logps/chosen": -317.3855895996094,
"logps/rejected": -337.94805908203125,
"loss": 0.6924,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.03159898519515991,
"rewards/margins": 0.0040366738103330135,
"rewards/rejected": 0.02756231650710106,
"step": 150
},
{
"epoch": 0.07,
"learning_rate": 9.990013277202137e-06,
"logits/chosen": -2.2112176418304443,
"logits/rejected": -2.3512566089630127,
"logps/chosen": -337.7769775390625,
"logps/rejected": -419.68450927734375,
"loss": 0.7042,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.02137443795800209,
"rewards/margins": -0.019264575093984604,
"rewards/rejected": -0.00210986053571105,
"step": 160
},
{
"epoch": 0.08,
"learning_rate": 9.984830798161828e-06,
"logits/chosen": -2.2544150352478027,
"logits/rejected": -2.1911208629608154,
"logps/chosen": -384.29718017578125,
"logps/rejected": -362.35308837890625,
"loss": 0.6841,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.11389386653900146,
"rewards/margins": 0.022078361362218857,
"rewards/rejected": -0.13597223162651062,
"step": 170
},
{
"epoch": 0.08,
"learning_rate": 9.978571027453433e-06,
"logits/chosen": -2.3017163276672363,
"logits/rejected": -2.12226939201355,
"logps/chosen": -364.39837646484375,
"logps/rejected": -286.0245361328125,
"loss": 0.6959,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.12149347364902496,
"rewards/margins": 0.000388662883779034,
"rewards/rejected": -0.12188214063644409,
"step": 180
},
{
"epoch": 0.08,
"learning_rate": 9.971235317900968e-06,
"logits/chosen": -2.1424872875213623,
"logits/rejected": -2.239366054534912,
"logps/chosen": -283.5924377441406,
"logps/rejected": -315.69586181640625,
"loss": 0.6965,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.04944513365626335,
"rewards/margins": -0.0020489301532506943,
"rewards/rejected": -0.04739619791507721,
"step": 190
},
{
"epoch": 0.09,
"learning_rate": 9.962825254853888e-06,
"logits/chosen": -2.3371052742004395,
"logits/rejected": -2.248575210571289,
"logps/chosen": -406.6221923828125,
"logps/rejected": -363.2230529785156,
"loss": 0.6814,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.03307682275772095,
"rewards/margins": 0.028196487575769424,
"rewards/rejected": 0.004880332853645086,
"step": 200
},
{
"epoch": 0.09,
"learning_rate": 9.953342655844465e-06,
"logits/chosen": -2.281076669692993,
"logits/rejected": -2.124605655670166,
"logps/chosen": -329.9849548339844,
"logps/rejected": -306.5705261230469,
"loss": 0.6745,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.04553469270467758,
"rewards/margins": 0.04089093953371048,
"rewards/rejected": 0.004643745254725218,
"step": 210
},
{
"epoch": 0.1,
"learning_rate": 9.942789570194995e-06,
"logits/chosen": -2.2274227142333984,
"logits/rejected": -2.195772409439087,
"logps/chosen": -353.90277099609375,
"logps/rejected": -329.13055419921875,
"loss": 0.6709,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.03678930178284645,
"rewards/margins": 0.05163549259305,
"rewards/rejected": -0.014846190810203552,
"step": 220
},
{
"epoch": 0.1,
"learning_rate": 9.931168278574916e-06,
"logits/chosen": -2.290721893310547,
"logits/rejected": -2.233349561691284,
"logps/chosen": -384.83087158203125,
"logps/rejected": -375.786376953125,
"loss": 0.6738,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.10011746734380722,
"rewards/margins": 0.0485808365046978,
"rewards/rejected": 0.05153663828969002,
"step": 230
},
{
"epoch": 0.11,
"learning_rate": 9.919797871024877e-06,
"logits/chosen": -2.3008508682250977,
"logits/rejected": -2.2407467365264893,
"logps/chosen": -341.2652587890625,
"logps/rejected": -304.7622985839844,
"loss": 0.6569,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.061227262020111084,
"rewards/margins": 0.07875394821166992,
"rewards/rejected": -0.01752668246626854,
"step": 240
},
{
"epoch": 0.11,
"learning_rate": 9.906154097672858e-06,
"logits/chosen": -2.3239777088165283,
"logits/rejected": -2.2359421253204346,
"logps/chosen": -357.5738220214844,
"logps/rejected": -333.55389404296875,
"loss": 0.6474,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.16475871205329895,
"rewards/margins": 0.10896603018045425,
"rewards/rejected": 0.05579269677400589,
"step": 250
},
{
"epoch": 0.12,
"learning_rate": 9.89145003578833e-06,
"logits/chosen": -2.3069913387298584,
"logits/rejected": -2.225893497467041,
"logps/chosen": -337.88299560546875,
"logps/rejected": -319.11016845703125,
"loss": 0.6595,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.17844273149967194,
"rewards/margins": 0.08880583941936493,
"rewards/rejected": 0.08963687717914581,
"step": 260
},
{
"epoch": 0.12,
"learning_rate": 9.875688863124766e-06,
"logits/chosen": -2.22190260887146,
"logits/rejected": -2.2968430519104004,
"logps/chosen": -391.5494384765625,
"logps/rejected": -404.64178466796875,
"loss": 0.6949,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.26371732354164124,
"rewards/margins": 0.03252069279551506,
"rewards/rejected": 0.23119667172431946,
"step": 270
},
{
"epoch": 0.12,
"learning_rate": 9.858873985892058e-06,
"logits/chosen": -2.2825188636779785,
"logits/rejected": -2.19154691696167,
"logps/chosen": -354.6551818847656,
"logps/rejected": -353.5287170410156,
"loss": 0.6837,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.13825781643390656,
"rewards/margins": 0.03303035721182823,
"rewards/rejected": 0.10522744804620743,
"step": 280
},
{
"epoch": 0.13,
"learning_rate": 9.841009038020401e-06,
"logits/chosen": -2.2181854248046875,
"logits/rejected": -2.2422547340393066,
"logps/chosen": -332.62322998046875,
"logps/rejected": -351.6214599609375,
"loss": 0.6746,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.17069143056869507,
"rewards/margins": 0.07059639692306519,
"rewards/rejected": 0.10009505599737167,
"step": 290
},
{
"epoch": 0.13,
"learning_rate": 9.82209788037494e-06,
"logits/chosen": -2.283082962036133,
"logits/rejected": -2.3039584159851074,
"logps/chosen": -367.76708984375,
"logps/rejected": -379.209228515625,
"loss": 0.7081,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.12761621177196503,
"rewards/margins": 0.001193371368572116,
"rewards/rejected": 0.12642285227775574,
"step": 300
},
{
"epoch": 0.14,
"learning_rate": 9.80214459992139e-06,
"logits/chosen": -2.297591209411621,
"logits/rejected": -2.2650771141052246,
"logps/chosen": -359.8524169921875,
"logps/rejected": -390.44195556640625,
"loss": 0.6335,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.07557342946529388,
"rewards/margins": 0.15836670994758606,
"rewards/rejected": -0.08279327303171158,
"step": 310
},
{
"epoch": 0.14,
"learning_rate": 9.781153508842785e-06,
"logits/chosen": -2.2711267471313477,
"logits/rejected": -2.2797353267669678,
"logps/chosen": -329.4121398925781,
"logps/rejected": -345.84393310546875,
"loss": 0.675,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.09101514518260956,
"rewards/margins": 0.05387691408395767,
"rewards/rejected": -0.14489206671714783,
"step": 320
},
{
"epoch": 0.15,
"learning_rate": 9.759129143607547e-06,
"logits/chosen": -2.2991256713867188,
"logits/rejected": -2.1713948249816895,
"logps/chosen": -373.73992919921875,
"logps/rejected": -298.8330993652344,
"loss": 0.6611,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.07734711468219757,
"rewards/margins": 0.09413515031337738,
"rewards/rejected": -0.17148226499557495,
"step": 330
},
{
"epoch": 0.15,
"learning_rate": 9.736076263989103e-06,
"logits/chosen": -2.333172559738159,
"logits/rejected": -2.2931008338928223,
"logps/chosen": -384.9156188964844,
"logps/rejected": -363.5679016113281,
"loss": 0.6449,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.04307403415441513,
"rewards/margins": 0.1350451558828354,
"rewards/rejected": -0.09197112172842026,
"step": 340
},
{
"epoch": 0.16,
"learning_rate": 9.711999852037226e-06,
"logits/chosen": -2.3204524517059326,
"logits/rejected": -2.256392240524292,
"logps/chosen": -376.4149169921875,
"logps/rejected": -337.93402099609375,
"loss": 0.681,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.2001509964466095,
"rewards/margins": 0.0656595304608345,
"rewards/rejected": 0.1344914734363556,
"step": 350
},
{
"epoch": 0.16,
"learning_rate": 9.68690511100134e-06,
"logits/chosen": -2.222977638244629,
"logits/rejected": -2.2059781551361084,
"logps/chosen": -326.2198181152344,
"logps/rejected": -322.86907958984375,
"loss": 0.7169,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.12599822878837585,
"rewards/margins": -0.01360931433737278,
"rewards/rejected": 0.13960754871368408,
"step": 360
},
{
"epoch": 0.16,
"learning_rate": 9.660797464206035e-06,
"logits/chosen": -2.2420578002929688,
"logits/rejected": -2.23136568069458,
"logps/chosen": -338.4748840332031,
"logps/rejected": -357.66705322265625,
"loss": 0.6701,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.26268088817596436,
"rewards/margins": 0.08094726502895355,
"rewards/rejected": 0.181733638048172,
"step": 370
},
{
"epoch": 0.17,
"learning_rate": 9.633682553879e-06,
"logits/chosen": -2.276688814163208,
"logits/rejected": -2.234923839569092,
"logps/chosen": -317.33599853515625,
"logps/rejected": -312.63897705078125,
"loss": 0.6804,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.23342204093933105,
"rewards/margins": 0.0563444122672081,
"rewards/rejected": 0.17707762122154236,
"step": 380
},
{
"epoch": 0.17,
"learning_rate": 9.605566239931666e-06,
"logits/chosen": -2.3001625537872314,
"logits/rejected": -2.2134346961975098,
"logps/chosen": -357.8388977050781,
"logps/rejected": -349.38995361328125,
"loss": 0.6357,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.3796467185020447,
"rewards/margins": 0.167959064245224,
"rewards/rejected": 0.21168763935565948,
"step": 390
},
{
"epoch": 0.18,
"learning_rate": 9.576454598692797e-06,
"logits/chosen": -2.296462297439575,
"logits/rejected": -2.226733922958374,
"logps/chosen": -358.35015869140625,
"logps/rejected": -326.0476989746094,
"loss": 0.6382,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.36672210693359375,
"rewards/margins": 0.16903677582740784,
"rewards/rejected": 0.19768527150154114,
"step": 400
},
{
"epoch": 0.18,
"learning_rate": 9.546353921595306e-06,
"logits/chosen": -2.289577007293701,
"logits/rejected": -2.279940128326416,
"logps/chosen": -337.85699462890625,
"logps/rejected": -340.87261962890625,
"loss": 0.7079,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.3491610884666443,
"rewards/margins": 0.029783133417367935,
"rewards/rejected": 0.31937795877456665,
"step": 410
},
{
"epoch": 0.19,
"learning_rate": 9.515270713816589e-06,
"logits/chosen": -2.314282178878784,
"logits/rejected": -2.1990668773651123,
"logps/chosen": -380.24554443359375,
"logps/rejected": -341.0552062988281,
"loss": 0.6806,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.3242203891277313,
"rewards/margins": 0.10805626213550568,
"rewards/rejected": 0.21616414189338684,
"step": 420
},
{
"epoch": 0.19,
"learning_rate": 9.483211692872669e-06,
"logits/chosen": -2.2007763385772705,
"logits/rejected": -2.1976895332336426,
"logps/chosen": -307.19464111328125,
"logps/rejected": -318.5234069824219,
"loss": 0.7432,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.22343340516090393,
"rewards/margins": -0.031447634100914,
"rewards/rejected": 0.25488102436065674,
"step": 430
},
{
"epoch": 0.2,
"learning_rate": 9.450183787166447e-06,
"logits/chosen": -2.1776084899902344,
"logits/rejected": -2.263404369354248,
"logps/chosen": -275.30230712890625,
"logps/rejected": -335.97637939453125,
"loss": 0.6777,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.10852464288473129,
"rewards/margins": 0.05528046935796738,
"rewards/rejected": 0.053244173526763916,
"step": 440
},
{
"epoch": 0.2,
"learning_rate": 9.41619413449037e-06,
"logits/chosen": -2.341031551361084,
"logits/rejected": -2.2925498485565186,
"logps/chosen": -387.17315673828125,
"logps/rejected": -408.65350341796875,
"loss": 0.6467,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.17925263941287994,
"rewards/margins": 0.12131942808628082,
"rewards/rejected": 0.057933200150728226,
"step": 450
},
{
"epoch": 0.2,
"learning_rate": 9.381250080483864e-06,
"logits/chosen": -2.305234432220459,
"logits/rejected": -2.29388689994812,
"logps/chosen": -353.67547607421875,
"logps/rejected": -365.3600769042969,
"loss": 0.6973,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.020944729447364807,
"rewards/margins": 0.018988361582159996,
"rewards/rejected": 0.001956367399543524,
"step": 460
},
{
"epoch": 0.21,
"learning_rate": 9.345359177045827e-06,
"logits/chosen": -2.2121920585632324,
"logits/rejected": -2.1668667793273926,
"logps/chosen": -318.66827392578125,
"logps/rejected": -298.404052734375,
"loss": 0.672,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.009936051443219185,
"rewards/margins": 0.07024586200714111,
"rewards/rejected": -0.08018191158771515,
"step": 470
},
{
"epoch": 0.21,
"learning_rate": 9.308529180702568e-06,
"logits/chosen": -2.2666916847229004,
"logits/rejected": -2.283783435821533,
"logps/chosen": -359.14825439453125,
"logps/rejected": -384.94073486328125,
"loss": 0.6523,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.030093509703874588,
"rewards/margins": 0.1233237162232399,
"rewards/rejected": -0.1534171998500824,
"step": 480
},
{
"epoch": 0.22,
"learning_rate": 9.270768050931515e-06,
"logits/chosen": -2.2703680992126465,
"logits/rejected": -2.3521008491516113,
"logps/chosen": -336.55548095703125,
"logps/rejected": -373.68902587890625,
"loss": 0.6823,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.19773463904857635,
"rewards/margins": 0.06342988461256027,
"rewards/rejected": -0.2611645460128784,
"step": 490
},
{
"epoch": 0.22,
"learning_rate": 9.232083948441046e-06,
"logits/chosen": -2.263096332550049,
"logits/rejected": -2.1758663654327393,
"logps/chosen": -366.25714111328125,
"logps/rejected": -322.65081787109375,
"loss": 0.6963,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.1464572548866272,
"rewards/margins": 0.025711650028824806,
"rewards/rejected": -0.17216889560222626,
"step": 500
},
{
"epoch": 0.23,
"learning_rate": 9.192485233406862e-06,
"logits/chosen": -2.3192970752716064,
"logits/rejected": -2.3752708435058594,
"logps/chosen": -383.03753662109375,
"logps/rejected": -406.3360900878906,
"loss": 0.6627,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.13319934904575348,
"rewards/margins": 0.0994311273097992,
"rewards/rejected": -0.23263044655323029,
"step": 510
},
{
"epoch": 0.23,
"learning_rate": 9.151980463665227e-06,
"logits/chosen": -2.2946994304656982,
"logits/rejected": -2.2120468616485596,
"logps/chosen": -390.14434814453125,
"logps/rejected": -344.0641174316406,
"loss": 0.6981,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.12970566749572754,
"rewards/margins": 0.05780962109565735,
"rewards/rejected": -0.1875152885913849,
"step": 520
},
{
"epoch": 0.24,
"learning_rate": 9.1105783928635e-06,
"logits/chosen": -2.2886319160461426,
"logits/rejected": -2.2648708820343018,
"logps/chosen": -373.304931640625,
"logps/rejected": -381.96368408203125,
"loss": 0.7022,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.011115001514554024,
"rewards/margins": 0.03471168875694275,
"rewards/rejected": -0.04582669958472252,
"step": 530
},
{
"epoch": 0.24,
"learning_rate": 9.068287968568355e-06,
"logits/chosen": -2.2562637329101562,
"logits/rejected": -2.2379746437072754,
"logps/chosen": -336.50640869140625,
"logps/rejected": -369.13037109375,
"loss": 0.6873,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.1720694601535797,
"rewards/margins": 0.060669075697660446,
"rewards/rejected": 0.11140035092830658,
"step": 540
},
{
"epoch": 0.24,
"learning_rate": 9.02511833033208e-06,
"logits/chosen": -2.145764112472534,
"logits/rejected": -2.1581664085388184,
"logps/chosen": -320.99456787109375,
"logps/rejected": -324.7594299316406,
"loss": 0.6312,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.02777281031012535,
"rewards/margins": 0.16521447896957397,
"rewards/rejected": -0.19298730790615082,
"step": 550
},
{
"epoch": 0.25,
"learning_rate": 8.981078807717396e-06,
"logits/chosen": -2.316991090774536,
"logits/rejected": -2.169630765914917,
"logps/chosen": -417.3232421875,
"logps/rejected": -368.39617919921875,
"loss": 0.6415,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.12395425885915756,
"rewards/margins": 0.15927435457706451,
"rewards/rejected": -0.03532009571790695,
"step": 560
},
{
"epoch": 0.25,
"learning_rate": 8.936178918281209e-06,
"logits/chosen": -2.3391947746276855,
"logits/rejected": -2.359314441680908,
"logps/chosen": -379.1593322753906,
"logps/rejected": -404.58868408203125,
"loss": 0.7159,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.0663943886756897,
"rewards/margins": -0.007743087597191334,
"rewards/rejected": 0.07413747161626816,
"step": 570
},
{
"epoch": 0.26,
"learning_rate": 8.890428365517728e-06,
"logits/chosen": -2.3254919052124023,
"logits/rejected": -2.2909200191497803,
"logps/chosen": -377.60736083984375,
"logps/rejected": -365.9610595703125,
"loss": 0.6832,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.23426219820976257,
"rewards/margins": 0.05870335176587105,
"rewards/rejected": 0.17555885016918182,
"step": 580
},
{
"epoch": 0.26,
"learning_rate": 8.843837036761404e-06,
"logits/chosen": -2.247920513153076,
"logits/rejected": -2.1772730350494385,
"logps/chosen": -299.9126892089844,
"logps/rejected": -299.29736328125,
"loss": 0.6474,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.20609867572784424,
"rewards/margins": 0.12030823528766632,
"rewards/rejected": 0.08579044044017792,
"step": 590
},
{
"epoch": 0.27,
"learning_rate": 8.796415001050154e-06,
"logits/chosen": -2.295091152191162,
"logits/rejected": -2.246346950531006,
"logps/chosen": -389.3216552734375,
"logps/rejected": -371.30157470703125,
"loss": 0.6311,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.25310301780700684,
"rewards/margins": 0.18558058142662048,
"rewards/rejected": 0.06752243638038635,
"step": 600
},
{
"epoch": 0.27,
"learning_rate": 8.748172506949274e-06,
"logits/chosen": -2.27669358253479,
"logits/rejected": -2.1988024711608887,
"logps/chosen": -326.1456298828125,
"logps/rejected": -309.17266845703125,
"loss": 0.6849,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.05873417109251022,
"rewards/margins": 0.07899702340364456,
"rewards/rejected": -0.020262856036424637,
"step": 610
},
{
"epoch": 0.28,
"learning_rate": 8.699119980336602e-06,
"logits/chosen": -2.302666187286377,
"logits/rejected": -2.2827186584472656,
"logps/chosen": -364.0043640136719,
"logps/rejected": -372.041748046875,
"loss": 0.693,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.23366883397102356,
"rewards/margins": 0.0409202054142952,
"rewards/rejected": 0.19274859130382538,
"step": 620
},
{
"epoch": 0.28,
"learning_rate": 8.649268022149333e-06,
"logits/chosen": -2.282480001449585,
"logits/rejected": -2.2400062084198,
"logps/chosen": -333.30194091796875,
"logps/rejected": -321.1686096191406,
"loss": 0.6733,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.3089759349822998,
"rewards/margins": 0.10994930565357208,
"rewards/rejected": 0.19902662932872772,
"step": 630
},
{
"epoch": 0.28,
"learning_rate": 8.59862740609301e-06,
"logits/chosen": -2.3325583934783936,
"logits/rejected": -2.396918535232544,
"logps/chosen": -383.0022888183594,
"logps/rejected": -438.1582946777344,
"loss": 0.6617,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.38308969140052795,
"rewards/margins": 0.12253421545028687,
"rewards/rejected": 0.2605554461479187,
"step": 640
},
{
"epoch": 0.29,
"learning_rate": 8.547209076313172e-06,
"logits/chosen": -2.3535332679748535,
"logits/rejected": -2.3711094856262207,
"logps/chosen": -378.6181640625,
"logps/rejected": -433.81005859375,
"loss": 0.6061,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.33381205797195435,
"rewards/margins": 0.23929791152477264,
"rewards/rejected": 0.09451412409543991,
"step": 650
},
{
"epoch": 0.29,
"learning_rate": 8.495024145030174e-06,
"logits/chosen": -2.195960283279419,
"logits/rejected": -2.2019705772399902,
"logps/chosen": -330.01177978515625,
"logps/rejected": -352.898193359375,
"loss": 0.6155,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.02304258942604065,
"rewards/margins": 0.21534284949302673,
"rewards/rejected": -0.23838546872138977,
"step": 660
},
{
"epoch": 0.3,
"learning_rate": 8.442083890137678e-06,
"logits/chosen": -2.2961819171905518,
"logits/rejected": -2.2526700496673584,
"logps/chosen": -343.67987060546875,
"logps/rejected": -348.7483825683594,
"loss": 0.7059,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.05699265003204346,
"rewards/margins": 0.0024743645917624235,
"rewards/rejected": -0.05946701765060425,
"step": 670
},
{
"epoch": 0.3,
"learning_rate": 8.388399752765344e-06,
"logits/chosen": -2.2721645832061768,
"logits/rejected": -2.2435104846954346,
"logps/chosen": -377.99664306640625,
"logps/rejected": -376.784912109375,
"loss": 0.6238,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.22479982674121857,
"rewards/margins": 0.24280264973640442,
"rewards/rejected": -0.4676024317741394,
"step": 680
},
{
"epoch": 0.31,
"learning_rate": 8.333983334806248e-06,
"logits/chosen": -2.2859599590301514,
"logits/rejected": -2.241246461868286,
"logps/chosen": -367.9365234375,
"logps/rejected": -335.52740478515625,
"loss": 0.6717,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.09134645760059357,
"rewards/margins": 0.09670265018939972,
"rewards/rejected": -0.1880491077899933,
"step": 690
},
{
"epoch": 0.31,
"learning_rate": 8.278846396409534e-06,
"logits/chosen": -2.306518077850342,
"logits/rejected": -2.265807867050171,
"logps/chosen": -370.5268249511719,
"logps/rejected": -363.80718994140625,
"loss": 0.6953,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.02050144411623478,
"rewards/margins": 0.0992293506860733,
"rewards/rejected": -0.07872792333364487,
"step": 700
},
{
"epoch": 0.32,
"learning_rate": 8.223000853438904e-06,
"logits/chosen": -2.3641223907470703,
"logits/rejected": -2.272670269012451,
"logps/chosen": -395.78509521484375,
"logps/rejected": -397.1343994140625,
"loss": 0.6263,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -3.156661841785535e-05,
"rewards/margins": 0.2341923713684082,
"rewards/rejected": -0.23422393202781677,
"step": 710
},
{
"epoch": 0.32,
"learning_rate": 8.166458774897413e-06,
"logits/chosen": -2.30328631401062,
"logits/rejected": -2.234039306640625,
"logps/chosen": -379.7477722167969,
"logps/rejected": -355.75677490234375,
"loss": 0.605,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.024682385846972466,
"rewards/margins": 0.26411938667297363,
"rewards/rejected": -0.28880181908607483,
"step": 720
},
{
"epoch": 0.32,
"learning_rate": 8.109232380319194e-06,
"logits/chosen": -2.2999930381774902,
"logits/rejected": -2.2953243255615234,
"logps/chosen": -407.1230163574219,
"logps/rejected": -414.182373046875,
"loss": 0.7166,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.06269857287406921,
"rewards/margins": 0.01421293430030346,
"rewards/rejected": 0.0484856478869915,
"step": 730
},
{
"epoch": 0.33,
"learning_rate": 8.051334037128661e-06,
"logits/chosen": -2.2836692333221436,
"logits/rejected": -2.2380998134613037,
"logps/chosen": -332.956787109375,
"logps/rejected": -330.85601806640625,
"loss": 0.7164,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.043525341898202896,
"rewards/margins": 0.0009438946726731956,
"rewards/rejected": 0.04258145019412041,
"step": 740
},
{
"epoch": 0.33,
"learning_rate": 7.99277625796771e-06,
"logits/chosen": -2.200336217880249,
"logits/rejected": -2.1876537799835205,
"logps/chosen": -325.07611083984375,
"logps/rejected": -318.10784912109375,
"loss": 0.7158,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.05763138085603714,
"rewards/margins": 0.018384983763098717,
"rewards/rejected": -0.0760163813829422,
"step": 750
},
{
"epoch": 0.34,
"learning_rate": 7.933571697991582e-06,
"logits/chosen": -2.3422603607177734,
"logits/rejected": -2.2664551734924316,
"logps/chosen": -401.63275146484375,
"logps/rejected": -351.42767333984375,
"loss": 0.6953,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.008800688199698925,
"rewards/margins": 0.04444243758916855,
"rewards/rejected": -0.05324311926960945,
"step": 760
},
{
"epoch": 0.34,
"learning_rate": 7.873733152133898e-06,
"logits/chosen": -2.2099857330322266,
"logits/rejected": -2.236807346343994,
"logps/chosen": -311.5889892578125,
"logps/rejected": -328.91033935546875,
"loss": 0.6992,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.11291439831256866,
"rewards/margins": 0.023546913638710976,
"rewards/rejected": 0.08936748653650284,
"step": 770
},
{
"epoch": 0.35,
"learning_rate": 7.813273552341496e-06,
"logits/chosen": -2.3059163093566895,
"logits/rejected": -2.280585527420044,
"logps/chosen": -330.9400329589844,
"logps/rejected": -347.64056396484375,
"loss": 0.6812,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.044351525604724884,
"rewards/margins": 0.0922970399260521,
"rewards/rejected": -0.047945525497198105,
"step": 780
},
{
"epoch": 0.35,
"learning_rate": 7.75220596477966e-06,
"logits/chosen": -2.263115644454956,
"logits/rejected": -2.2254600524902344,
"logps/chosen": -325.22198486328125,
"logps/rejected": -316.40875244140625,
"loss": 0.6262,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.08847782015800476,
"rewards/margins": 0.24120266735553741,
"rewards/rejected": -0.15272484719753265,
"step": 790
},
{
"epoch": 0.36,
"learning_rate": 7.690543587008332e-06,
"logits/chosen": -2.2187132835388184,
"logits/rejected": -2.2646350860595703,
"logps/chosen": -401.48687744140625,
"logps/rejected": -387.3714294433594,
"loss": 0.6596,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.21927690505981445,
"rewards/margins": 0.1301509588956833,
"rewards/rejected": 0.08912594616413116,
"step": 800
},
{
"epoch": 0.36,
"learning_rate": 7.628299745129943e-06,
"logits/chosen": -2.2820262908935547,
"logits/rejected": -2.2334964275360107,
"logps/chosen": -403.6439208984375,
"logps/rejected": -374.96270751953125,
"loss": 0.7398,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.02083228901028633,
"rewards/margins": -0.01027420163154602,
"rewards/rejected": -0.010558092966675758,
"step": 810
},
{
"epoch": 0.36,
"learning_rate": 7.565487890909448e-06,
"logits/chosen": -2.322711229324341,
"logits/rejected": -2.225168466567993,
"logps/chosen": -337.26605224609375,
"logps/rejected": -304.8133544921875,
"loss": 0.6559,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.2134527862071991,
"rewards/margins": 0.1341491937637329,
"rewards/rejected": 0.07930360734462738,
"step": 820
},
{
"epoch": 0.37,
"learning_rate": 7.502121598867218e-06,
"logits/chosen": -2.2647910118103027,
"logits/rejected": -2.2931771278381348,
"logps/chosen": -353.45660400390625,
"logps/rejected": -313.0556945800781,
"loss": 0.6721,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.16318438947200775,
"rewards/margins": 0.0658370777964592,
"rewards/rejected": 0.09734731912612915,
"step": 830
},
{
"epoch": 0.37,
"learning_rate": 7.438214563345389e-06,
"logits/chosen": -2.352962017059326,
"logits/rejected": -2.329225778579712,
"logps/chosen": -387.4513244628906,
"logps/rejected": -395.37347412109375,
"loss": 0.6693,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.19145812094211578,
"rewards/margins": 0.13347746431827545,
"rewards/rejected": 0.05798065662384033,
"step": 840
},
{
"epoch": 0.38,
"learning_rate": 7.373780595548334e-06,
"logits/chosen": -2.310203790664673,
"logits/rejected": -2.2119874954223633,
"logps/chosen": -377.05657958984375,
"logps/rejected": -357.82525634765625,
"loss": 0.7045,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.4518454670906067,
"rewards/margins": 0.05518758296966553,
"rewards/rejected": 0.3966578543186188,
"step": 850
},
{
"epoch": 0.38,
"learning_rate": 7.3088336205578565e-06,
"logits/chosen": -2.240410327911377,
"logits/rejected": -2.215846538543701,
"logps/chosen": -350.8703308105469,
"logps/rejected": -368.5628662109375,
"loss": 0.6456,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.1876397579908371,
"rewards/margins": 0.16387517750263214,
"rewards/rejected": 0.0237645972520113,
"step": 860
},
{
"epoch": 0.39,
"learning_rate": 7.243387674323794e-06,
"logits/chosen": -2.2497904300689697,
"logits/rejected": -2.232779026031494,
"logps/chosen": -339.3749084472656,
"logps/rejected": -359.62493896484375,
"loss": 0.6597,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.22228892147541046,
"rewards/margins": 0.12651677429676056,
"rewards/rejected": 0.09577211737632751,
"step": 870
},
{
"epoch": 0.39,
"learning_rate": 7.177456900630645e-06,
"logits/chosen": -2.256024122238159,
"logits/rejected": -2.2079262733459473,
"logps/chosen": -340.0914611816406,
"logps/rejected": -322.68011474609375,
"loss": 0.5837,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.16695842146873474,
"rewards/margins": 0.28337720036506653,
"rewards/rejected": -0.1164187639951706,
"step": 880
},
{
"epoch": 0.4,
"learning_rate": 7.111055548040911e-06,
"logits/chosen": -2.3057608604431152,
"logits/rejected": -2.2699170112609863,
"logps/chosen": -389.82611083984375,
"logps/rejected": -382.68829345703125,
"loss": 0.5632,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.19717621803283691,
"rewards/margins": 0.36688321828842163,
"rewards/rejected": -0.1697070151567459,
"step": 890
},
{
"epoch": 0.4,
"learning_rate": 7.044197966815773e-06,
"logits/chosen": -2.25697922706604,
"logits/rejected": -2.107326030731201,
"logps/chosen": -320.3851623535156,
"logps/rejected": -288.55108642578125,
"loss": 0.6459,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.034488920122385025,
"rewards/margins": 0.16461703181266785,
"rewards/rejected": -0.13012811541557312,
"step": 900
},
{
"epoch": 0.4,
"learning_rate": 6.976898605813798e-06,
"logits/chosen": -2.269026041030884,
"logits/rejected": -2.305229663848877,
"logps/chosen": -335.22540283203125,
"logps/rejected": -391.1421813964844,
"loss": 0.7153,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.16712869703769684,
"rewards/margins": 0.05030643194913864,
"rewards/rejected": 0.1168222427368164,
"step": 910
},
{
"epoch": 0.41,
"learning_rate": 6.90917200936835e-06,
"logits/chosen": -2.202436923980713,
"logits/rejected": -2.1774916648864746,
"logps/chosen": -308.787109375,
"logps/rejected": -331.55987548828125,
"loss": 0.7907,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.020061034709215164,
"rewards/margins": -0.09425730258226395,
"rewards/rejected": 0.07419625669717789,
"step": 920
},
{
"epoch": 0.41,
"learning_rate": 6.841032814144345e-06,
"logits/chosen": -2.2023422718048096,
"logits/rejected": -2.215259552001953,
"logps/chosen": -303.5005187988281,
"logps/rejected": -343.00146484375,
"loss": 0.6898,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.026828575879335403,
"rewards/margins": 0.07769123464822769,
"rewards/rejected": -0.05086265876889229,
"step": 930
},
{
"epoch": 0.42,
"learning_rate": 6.772495745975067e-06,
"logits/chosen": -2.268451452255249,
"logits/rejected": -2.1880459785461426,
"logps/chosen": -352.240966796875,
"logps/rejected": -341.0179748535156,
"loss": 0.6377,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.02395152859389782,
"rewards/margins": 0.17894978821277618,
"rewards/rejected": -0.20290131866931915,
"step": 940
},
{
"epoch": 0.42,
"learning_rate": 6.703575616679709e-06,
"logits/chosen": -2.382638454437256,
"logits/rejected": -2.3348803520202637,
"logps/chosen": -395.88372802734375,
"logps/rejected": -385.67364501953125,
"loss": 0.6581,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.00013340116129256785,
"rewards/margins": 0.17107084393501282,
"rewards/rejected": -0.1709374487400055,
"step": 950
},
{
"epoch": 0.43,
"learning_rate": 6.634287320862334e-06,
"logits/chosen": -2.332146644592285,
"logits/rejected": -2.196887254714966,
"logps/chosen": -367.8846435546875,
"logps/rejected": -342.24224853515625,
"loss": 0.6204,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.21408149600028992,
"rewards/margins": 0.27911919355392456,
"rewards/rejected": -0.06503769755363464,
"step": 960
},
{
"epoch": 0.43,
"learning_rate": 6.564645832692938e-06,
"logits/chosen": -2.2684531211853027,
"logits/rejected": -2.2622039318084717,
"logps/chosen": -356.80859375,
"logps/rejected": -363.5769958496094,
"loss": 0.7088,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.05582839250564575,
"rewards/margins": 0.07232120633125305,
"rewards/rejected": -0.016492802649736404,
"step": 970
},
{
"epoch": 0.44,
"learning_rate": 6.494666202671329e-06,
"logits/chosen": -2.297036647796631,
"logits/rejected": -2.165566921234131,
"logps/chosen": -358.6860046386719,
"logps/rejected": -317.59002685546875,
"loss": 0.5921,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.30202144384384155,
"rewards/margins": 0.3027155101299286,
"rewards/rejected": -0.0006940944003872573,
"step": 980
},
{
"epoch": 0.44,
"learning_rate": 6.424363554374496e-06,
"logits/chosen": -2.3090157508850098,
"logits/rejected": -2.232266902923584,
"logps/chosen": -363.88226318359375,
"logps/rejected": -358.2498779296875,
"loss": 0.6638,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.47291359305381775,
"rewards/margins": 0.14908143877983093,
"rewards/rejected": 0.3238321542739868,
"step": 990
},
{
"epoch": 0.44,
"learning_rate": 6.353753081188194e-06,
"logits/chosen": -2.2434115409851074,
"logits/rejected": -2.301614284515381,
"logps/chosen": -314.8789978027344,
"logps/rejected": -350.7088928222656,
"loss": 0.727,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.19925834238529205,
"rewards/margins": 0.04580863565206528,
"rewards/rejected": 0.15344971418380737,
"step": 1000
},
{
"epoch": 0.45,
"learning_rate": 6.28285004302345e-06,
"logits/chosen": -2.266707420349121,
"logits/rejected": -2.236722469329834,
"logps/chosen": -321.0040283203125,
"logps/rejected": -336.6592102050781,
"loss": 0.6677,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.1193336620926857,
"rewards/margins": 0.17302492260932922,
"rewards/rejected": -0.053691256791353226,
"step": 1010
},
{
"epoch": 0.45,
"learning_rate": 6.2116697630186685e-06,
"logits/chosen": -2.303358554840088,
"logits/rejected": -2.149106740951538,
"logps/chosen": -351.23590087890625,
"logps/rejected": -350.1204833984375,
"loss": 0.6293,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.17574825882911682,
"rewards/margins": 0.1902790516614914,
"rewards/rejected": -0.014530802145600319,
"step": 1020
},
{
"epoch": 0.46,
"learning_rate": 6.140227624228098e-06,
"logits/chosen": -2.375432252883911,
"logits/rejected": -2.297983169555664,
"logps/chosen": -366.21368408203125,
"logps/rejected": -378.6297912597656,
"loss": 0.6357,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.3224946856498718,
"rewards/margins": 0.2146320790052414,
"rewards/rejected": 0.10786261409521103,
"step": 1030
},
{
"epoch": 0.46,
"learning_rate": 6.068539066297331e-06,
"logits/chosen": -2.318620204925537,
"logits/rejected": -2.2646164894104004,
"logps/chosen": -367.49298095703125,
"logps/rejected": -360.1875305175781,
"loss": 0.6089,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.14375829696655273,
"rewards/margins": 0.2504768967628479,
"rewards/rejected": -0.10671859979629517,
"step": 1040
},
{
"epoch": 0.47,
"learning_rate": 5.996619582126586e-06,
"logits/chosen": -2.322288751602173,
"logits/rejected": -2.3236374855041504,
"logps/chosen": -367.33343505859375,
"logps/rejected": -372.8912658691406,
"loss": 0.7435,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.08361749351024628,
"rewards/margins": 0.028830815106630325,
"rewards/rejected": 0.054786670953035355,
"step": 1050
},
{
"epoch": 0.47,
"learning_rate": 5.924484714522473e-06,
"logits/chosen": -2.2468628883361816,
"logits/rejected": -2.2435338497161865,
"logps/chosen": -354.2232666015625,
"logps/rejected": -318.03851318359375,
"loss": 0.6024,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.1713722050189972,
"rewards/margins": 0.26503580808639526,
"rewards/rejected": -0.09366358816623688,
"step": 1060
},
{
"epoch": 0.48,
"learning_rate": 5.8521500528389685e-06,
"logits/chosen": -2.2789225578308105,
"logits/rejected": -2.250373125076294,
"logps/chosen": -337.760986328125,
"logps/rejected": -343.9210510253906,
"loss": 0.6352,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.043280668556690216,
"rewards/margins": 0.275790274143219,
"rewards/rejected": -0.3190709352493286,
"step": 1070
},
{
"epoch": 0.48,
"learning_rate": 5.779631229608352e-06,
"logits/chosen": -2.3031325340270996,
"logits/rejected": -2.2297275066375732,
"logps/chosen": -345.22265625,
"logps/rejected": -361.78680419921875,
"loss": 0.6227,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.18280446529388428,
"rewards/margins": 0.2825770378112793,
"rewards/rejected": -0.09977257996797562,
"step": 1080
},
{
"epoch": 0.48,
"learning_rate": 5.706943917162786e-06,
"logits/chosen": -2.3648791313171387,
"logits/rejected": -2.2548999786376953,
"logps/chosen": -348.91815185546875,
"logps/rejected": -315.13653564453125,
"loss": 0.6339,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.2526555061340332,
"rewards/margins": 0.18896642327308655,
"rewards/rejected": 0.06368909776210785,
"step": 1090
},
{
"epoch": 0.49,
"learning_rate": 5.634103824247312e-06,
"logits/chosen": -2.241288900375366,
"logits/rejected": -2.208639621734619,
"logps/chosen": -335.605224609375,
"logps/rejected": -334.7170715332031,
"loss": 0.632,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.13065436482429504,
"rewards/margins": 0.2133084535598755,
"rewards/rejected": -0.34396281838417053,
"step": 1100
},
{
"epoch": 0.49,
"learning_rate": 5.561126692624963e-06,
"logits/chosen": -2.2892165184020996,
"logits/rejected": -2.253537178039551,
"logps/chosen": -380.8193054199219,
"logps/rejected": -344.45684814453125,
"loss": 0.677,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.10464837402105331,
"rewards/margins": 0.13998612761497498,
"rewards/rejected": -0.035337746143341064,
"step": 1110
},
{
"epoch": 0.5,
"learning_rate": 5.488028293674759e-06,
"logits/chosen": -2.1598775386810303,
"logits/rejected": -2.3442585468292236,
"logps/chosen": -295.97161865234375,
"logps/rejected": -376.0238952636719,
"loss": 0.6603,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.1714015007019043,
"rewards/margins": 0.1644255667924881,
"rewards/rejected": 0.006975936703383923,
"step": 1120
},
{
"epoch": 0.5,
"learning_rate": 5.414824424983282e-06,
"logits/chosen": -2.253049373626709,
"logits/rejected": -2.313413143157959,
"logps/chosen": -350.61126708984375,
"logps/rejected": -394.3390197753906,
"loss": 0.7526,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.3026641309261322,
"rewards/margins": 0.06349755823612213,
"rewards/rejected": -0.3661617040634155,
"step": 1130
},
{
"epoch": 0.51,
"learning_rate": 5.341530906930604e-06,
"logits/chosen": -2.32954740524292,
"logits/rejected": -2.2630321979522705,
"logps/chosen": -389.9427185058594,
"logps/rejected": -338.2027893066406,
"loss": 0.6504,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0287860669195652,
"rewards/margins": 0.21246078610420227,
"rewards/rejected": -0.18367469310760498,
"step": 1140
},
{
"epoch": 0.51,
"learning_rate": 5.268163579271276e-06,
"logits/chosen": -2.249337673187256,
"logits/rejected": -2.19362473487854,
"logps/chosen": -330.29559326171875,
"logps/rejected": -327.573486328125,
"loss": 0.644,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.0009714558837004006,
"rewards/margins": 0.18328654766082764,
"rewards/rejected": -0.18425801396369934,
"step": 1150
},
{
"epoch": 0.52,
"learning_rate": 5.1947382977111374e-06,
"logits/chosen": -2.2833094596862793,
"logits/rejected": -2.203212261199951,
"logps/chosen": -360.40142822265625,
"logps/rejected": -346.81927490234375,
"loss": 0.5783,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.05404200404882431,
"rewards/margins": 0.3619672656059265,
"rewards/rejected": -0.3079253137111664,
"step": 1160
},
{
"epoch": 0.52,
"learning_rate": 5.128619108610792e-06,
"logits/chosen": -2.310303211212158,
"logits/rejected": -2.28350567817688,
"logps/chosen": -339.02398681640625,
"logps/rejected": -357.2115173339844,
"loss": 0.6813,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.07422615587711334,
"rewards/margins": 0.14461743831634521,
"rewards/rejected": -0.07039125263690948,
"step": 1170
},
{
"epoch": 0.52,
"learning_rate": 5.055127439202268e-06,
"logits/chosen": -2.309981107711792,
"logits/rejected": -2.2555365562438965,
"logps/chosen": -339.52301025390625,
"logps/rejected": -363.42657470703125,
"loss": 0.5414,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.13452677428722382,
"rewards/margins": 0.5444313287734985,
"rewards/rejected": -0.4099045693874359,
"step": 1180
},
{
"epoch": 0.53,
"learning_rate": 4.9816238559829586e-06,
"logits/chosen": -2.371007204055786,
"logits/rejected": -2.2399134635925293,
"logps/chosen": -387.3955383300781,
"logps/rejected": -373.3992919921875,
"loss": 0.6567,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04877791926264763,
"rewards/margins": 0.20018813014030457,
"rewards/rejected": -0.2489660531282425,
"step": 1190
},
{
"epoch": 0.53,
"learning_rate": 4.908124244105435e-06,
"logits/chosen": -2.1801342964172363,
"logits/rejected": -2.1720447540283203,
"logps/chosen": -307.62103271484375,
"logps/rejected": -318.6053771972656,
"loss": 0.6848,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.10059481859207153,
"rewards/margins": 0.0915575847029686,
"rewards/rejected": -0.19215238094329834,
"step": 1200
},
{
"epoch": 0.54,
"learning_rate": 4.834644487864005e-06,
"logits/chosen": -2.299656391143799,
"logits/rejected": -2.1939291954040527,
"logps/chosen": -364.6031188964844,
"logps/rejected": -340.6778259277344,
"loss": 0.7091,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.1433655321598053,
"rewards/margins": 0.1040463000535965,
"rewards/rejected": 0.039319224655628204,
"step": 1210
},
{
"epoch": 0.54,
"learning_rate": 4.7612004672619e-06,
"logits/chosen": -2.2465157508850098,
"logits/rejected": -2.142528533935547,
"logps/chosen": -314.5583801269531,
"logps/rejected": -273.532470703125,
"loss": 0.7121,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.00577303022146225,
"rewards/margins": 0.04662410169839859,
"rewards/rejected": -0.05239715054631233,
"step": 1220
},
{
"epoch": 0.55,
"learning_rate": 4.6878080545793765e-06,
"logits/chosen": -2.275844097137451,
"logits/rejected": -2.2758145332336426,
"logps/chosen": -288.96905517578125,
"logps/rejected": -304.53265380859375,
"loss": 0.6788,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.18407993018627167,
"rewards/margins": 0.13483914732933044,
"rewards/rejected": 0.049240779131650925,
"step": 1230
},
{
"epoch": 0.55,
"learning_rate": 4.614483110943502e-06,
"logits/chosen": -2.275071620941162,
"logits/rejected": -2.2525486946105957,
"logps/chosen": -338.60357666015625,
"logps/rejected": -337.8529357910156,
"loss": 0.6597,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.18449342250823975,
"rewards/margins": 0.14333459734916687,
"rewards/rejected": 0.04115881025791168,
"step": 1240
},
{
"epoch": 0.56,
"learning_rate": 4.54124148290033e-06,
"logits/chosen": -2.2469890117645264,
"logits/rejected": -2.2963757514953613,
"logps/chosen": -312.69677734375,
"logps/rejected": -368.51220703125,
"loss": 0.7698,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.211637943983078,
"rewards/margins": -0.05843405798077583,
"rewards/rejected": 0.27007198333740234,
"step": 1250
},
{
"epoch": 0.56,
"learning_rate": 4.46809899899026e-06,
"logits/chosen": -2.20833683013916,
"logits/rejected": -2.240799903869629,
"logps/chosen": -326.4002380371094,
"logps/rejected": -338.2776794433594,
"loss": 0.6814,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.15935859084129333,
"rewards/margins": 0.09861962497234344,
"rewards/rejected": 0.060738980770111084,
"step": 1260
},
{
"epoch": 0.56,
"learning_rate": 4.395071466327251e-06,
"logits/chosen": -2.2200393676757812,
"logits/rejected": -2.200827121734619,
"logps/chosen": -341.48321533203125,
"logps/rejected": -320.7751770019531,
"loss": 0.7204,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.03819073364138603,
"rewards/margins": 0.06163903325796127,
"rewards/rejected": -0.023448294028639793,
"step": 1270
},
{
"epoch": 0.57,
"learning_rate": 4.322174667182689e-06,
"logits/chosen": -2.3112101554870605,
"logits/rejected": -2.1941494941711426,
"logps/chosen": -381.0555114746094,
"logps/rejected": -349.847900390625,
"loss": 0.6448,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.2540966272354126,
"rewards/margins": 0.23512430489063263,
"rewards/rejected": 0.018972331658005714,
"step": 1280
},
{
"epoch": 0.57,
"learning_rate": 4.249424355574621e-06,
"logits/chosen": -2.361945629119873,
"logits/rejected": -2.2803444862365723,
"logps/chosen": -397.45330810546875,
"logps/rejected": -377.0959777832031,
"loss": 0.662,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.18188154697418213,
"rewards/margins": 0.205190509557724,
"rewards/rejected": -0.023308951407670975,
"step": 1290
},
{
"epoch": 0.58,
"learning_rate": 4.176836253863087e-06,
"logits/chosen": -2.3127691745758057,
"logits/rejected": -2.185509443283081,
"logps/chosen": -359.801025390625,
"logps/rejected": -326.85382080078125,
"loss": 0.5858,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.27552157640457153,
"rewards/margins": 0.30210158228874207,
"rewards/rejected": -0.02657998725771904,
"step": 1300
},
{
"epoch": 0.58,
"learning_rate": 4.1044260493523005e-06,
"logits/chosen": -2.219707727432251,
"logits/rejected": -2.2081971168518066,
"logps/chosen": -325.4644775390625,
"logps/rejected": -309.3984069824219,
"loss": 0.6114,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.29091349244117737,
"rewards/margins": 0.2537664771080017,
"rewards/rejected": 0.037146955728530884,
"step": 1310
},
{
"epoch": 0.59,
"learning_rate": 4.0322093909003965e-06,
"logits/chosen": -2.3424715995788574,
"logits/rejected": -2.328320026397705,
"logps/chosen": -369.3295593261719,
"logps/rejected": -420.92987060546875,
"loss": 0.5449,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.4485898017883301,
"rewards/margins": 0.42458122968673706,
"rewards/rejected": 0.02400858886539936,
"step": 1320
},
{
"epoch": 0.59,
"learning_rate": 3.960201885537504e-06,
"logits/chosen": -2.341200113296509,
"logits/rejected": -2.253725528717041,
"logps/chosen": -372.42633056640625,
"logps/rejected": -400.0967102050781,
"loss": 0.642,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.38997507095336914,
"rewards/margins": 0.22686178982257843,
"rewards/rejected": 0.16311326622962952,
"step": 1330
},
{
"epoch": 0.6,
"learning_rate": 3.888419095092843e-06,
"logits/chosen": -2.2710628509521484,
"logits/rejected": -2.3019535541534424,
"logps/chosen": -336.04296875,
"logps/rejected": -364.0369873046875,
"loss": 0.6737,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.08392750471830368,
"rewards/margins": 0.1348566859960556,
"rewards/rejected": -0.05092918127775192,
"step": 1340
},
{
"epoch": 0.6,
"learning_rate": 3.816876532831595e-06,
"logits/chosen": -2.1317548751831055,
"logits/rejected": -2.1478095054626465,
"logps/chosen": -297.4999084472656,
"logps/rejected": -314.2597961425781,
"loss": 0.6427,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.07686291635036469,
"rewards/margins": 0.2395528256893158,
"rewards/rejected": -0.16268989443778992,
"step": 1350
},
{
"epoch": 0.6,
"learning_rate": 3.7455896601022677e-06,
"logits/chosen": -2.2463011741638184,
"logits/rejected": -2.130866527557373,
"logps/chosen": -333.47564697265625,
"logps/rejected": -299.1008605957031,
"loss": 0.6625,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.01470687985420227,
"rewards/margins": 0.15500742197036743,
"rewards/rejected": -0.14030054211616516,
"step": 1360
},
{
"epoch": 0.61,
"learning_rate": 3.6745738829952928e-06,
"logits/chosen": -2.3302998542785645,
"logits/rejected": -2.3339765071868896,
"logps/chosen": -378.35498046875,
"logps/rejected": -410.18035888671875,
"loss": 0.6558,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.3636806905269623,
"rewards/margins": 0.19609752297401428,
"rewards/rejected": 0.1675831824541092,
"step": 1370
},
{
"epoch": 0.61,
"learning_rate": 3.6038445490135354e-06,
"logits/chosen": -2.3522942066192627,
"logits/rejected": -2.3492603302001953,
"logps/chosen": -387.61297607421875,
"logps/rejected": -422.75054931640625,
"loss": 0.6802,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.4821054935455322,
"rewards/margins": 0.14728207886219025,
"rewards/rejected": 0.3348234295845032,
"step": 1380
},
{
"epoch": 0.62,
"learning_rate": 3.5334169437555e-06,
"logits/chosen": -2.2042317390441895,
"logits/rejected": -2.272881507873535,
"logps/chosen": -345.3319396972656,
"logps/rejected": -344.2694091796875,
"loss": 0.6958,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.11976834386587143,
"rewards/margins": 0.10411565005779266,
"rewards/rejected": 0.015652697533369064,
"step": 1390
},
{
"epoch": 0.62,
"learning_rate": 3.4633062876118915e-06,
"logits/chosen": -2.310586452484131,
"logits/rejected": -2.2318148612976074,
"logps/chosen": -339.42095947265625,
"logps/rejected": -339.2361755371094,
"loss": 0.6279,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.20746886730194092,
"rewards/margins": 0.2241462916135788,
"rewards/rejected": -0.01667742058634758,
"step": 1400
},
{
"epoch": 0.63,
"learning_rate": 3.3935277324762807e-06,
"logits/chosen": -2.2938754558563232,
"logits/rejected": -2.3304316997528076,
"logps/chosen": -349.04547119140625,
"logps/rejected": -412.75042724609375,
"loss": 0.6163,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.04044444486498833,
"rewards/margins": 0.25089383125305176,
"rewards/rejected": -0.21044941246509552,
"step": 1410
},
{
"epoch": 0.63,
"learning_rate": 3.324096358470559e-06,
"logits/chosen": -2.298367977142334,
"logits/rejected": -2.2703890800476074,
"logps/chosen": -365.79571533203125,
"logps/rejected": -372.6152038574219,
"loss": 0.6579,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.16013844311237335,
"rewards/margins": 0.23061330616474152,
"rewards/rejected": -0.07047487795352936,
"step": 1420
},
{
"epoch": 0.64,
"learning_rate": 3.255027170685922e-06,
"logits/chosen": -2.271730422973633,
"logits/rejected": -2.305053234100342,
"logps/chosen": -379.85321044921875,
"logps/rejected": -405.1103515625,
"loss": 0.717,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.14956679940223694,
"rewards/margins": 0.06124185770750046,
"rewards/rejected": 0.08832494169473648,
"step": 1430
},
{
"epoch": 0.64,
"learning_rate": 3.186335095940058e-06,
"logits/chosen": -2.3461501598358154,
"logits/rejected": -2.1821436882019043,
"logps/chosen": -382.0367736816406,
"logps/rejected": -337.6816101074219,
"loss": 0.6432,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.07416114211082458,
"rewards/margins": 0.22856464982032776,
"rewards/rejected": -0.15440352261066437,
"step": 1440
},
{
"epoch": 0.64,
"learning_rate": 3.1180349795512478e-06,
"logits/chosen": -2.333571434020996,
"logits/rejected": -2.2108778953552246,
"logps/chosen": -364.5013427734375,
"logps/rejected": -348.1319580078125,
"loss": 0.6229,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.042986027896404266,
"rewards/margins": 0.23882392048835754,
"rewards/rejected": -0.19583788514137268,
"step": 1450
},
{
"epoch": 0.65,
"learning_rate": 3.050141582130093e-06,
"logits/chosen": -2.19138765335083,
"logits/rejected": -2.2427496910095215,
"logps/chosen": -337.9814147949219,
"logps/rejected": -330.500244140625,
"loss": 0.7066,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.053582824766635895,
"rewards/margins": 0.07893103361129761,
"rewards/rejected": -0.025348205119371414,
"step": 1460
},
{
"epoch": 0.65,
"learning_rate": 2.982669576389533e-06,
"logits/chosen": -2.296982526779175,
"logits/rejected": -2.2845733165740967,
"logps/chosen": -310.3797912597656,
"logps/rejected": -309.05975341796875,
"loss": 0.6881,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.22653362154960632,
"rewards/margins": 0.06024567037820816,
"rewards/rejected": 0.16628797352313995,
"step": 1470
},
{
"epoch": 0.66,
"learning_rate": 2.9156335439738705e-06,
"logits/chosen": -2.285391092300415,
"logits/rejected": -2.3086414337158203,
"logps/chosen": -373.62628173828125,
"logps/rejected": -386.38092041015625,
"loss": 0.7604,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.023256815969944,
"rewards/margins": -0.014189457520842552,
"rewards/rejected": 0.037446290254592896,
"step": 1480
},
{
"epoch": 0.66,
"learning_rate": 2.84904797230748e-06,
"logits/chosen": -2.2920703887939453,
"logits/rejected": -2.2386538982391357,
"logps/chosen": -336.79888916015625,
"logps/rejected": -377.5654296875,
"loss": 0.5965,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.3767642080783844,
"rewards/margins": 0.3530300557613373,
"rewards/rejected": 0.02373412624001503,
"step": 1490
},
{
"epoch": 0.67,
"learning_rate": 2.782927251463854e-06,
"logits/chosen": -2.2349536418914795,
"logits/rejected": -2.246170997619629,
"logps/chosen": -326.43084716796875,
"logps/rejected": -355.4977111816406,
"loss": 0.6291,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.12856540083885193,
"rewards/margins": 0.2676551938056946,
"rewards/rejected": -0.13908980786800385,
"step": 1500
},
{
"epoch": 0.67,
"learning_rate": 2.717285671055733e-06,
"logits/chosen": -2.2831931114196777,
"logits/rejected": -2.2716732025146484,
"logps/chosen": -339.9261474609375,
"logps/rejected": -372.9583740234375,
"loss": 0.6354,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.0657893493771553,
"rewards/margins": 0.23048046231269836,
"rewards/rejected": -0.29626980423927307,
"step": 1510
},
{
"epoch": 0.68,
"learning_rate": 2.652137417146897e-06,
"logits/chosen": -2.327761173248291,
"logits/rejected": -2.1530885696411133,
"logps/chosen": -351.7103271484375,
"logps/rejected": -321.41046142578125,
"loss": 0.7653,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.15571951866149902,
"rewards/margins": -0.01952260732650757,
"rewards/rejected": -0.13619689643383026,
"step": 1520
},
{
"epoch": 0.68,
"learning_rate": 2.587496569186378e-06,
"logits/chosen": -2.283737897872925,
"logits/rejected": -2.2826638221740723,
"logps/chosen": -369.670654296875,
"logps/rejected": -380.65460205078125,
"loss": 0.6403,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.06505627185106277,
"rewards/margins": 0.1945182979106903,
"rewards/rejected": -0.12946203351020813,
"step": 1530
},
{
"epoch": 0.68,
"learning_rate": 2.5233770969656703e-06,
"logits/chosen": -2.2368502616882324,
"logits/rejected": -2.2161707878112793,
"logps/chosen": -331.0984191894531,
"logps/rejected": -336.10040283203125,
"loss": 0.6759,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.1922999769449234,
"rewards/margins": 0.1443178653717041,
"rewards/rejected": 0.04798208177089691,
"step": 1540
},
{
"epoch": 0.69,
"learning_rate": 2.4597928575996917e-06,
"logits/chosen": -2.2817587852478027,
"logits/rejected": -2.2737059593200684,
"logps/chosen": -369.1025085449219,
"logps/rejected": -396.74005126953125,
"loss": 0.652,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.08208617568016052,
"rewards/margins": 0.25736135244369507,
"rewards/rejected": -0.17527517676353455,
"step": 1550
},
{
"epoch": 0.69,
"learning_rate": 2.3967575925320417e-06,
"logits/chosen": -2.362391471862793,
"logits/rejected": -2.2530202865600586,
"logps/chosen": -362.65301513671875,
"logps/rejected": -345.89776611328125,
"loss": 0.6438,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.2820921540260315,
"rewards/margins": 0.289219468832016,
"rewards/rejected": -0.007127317134290934,
"step": 1560
},
{
"epoch": 0.7,
"learning_rate": 2.334284924565307e-06,
"logits/chosen": -2.2167036533355713,
"logits/rejected": -2.187164068222046,
"logps/chosen": -322.52410888671875,
"logps/rejected": -329.1834716796875,
"loss": 0.6609,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.04885543882846832,
"rewards/margins": 0.21874144673347473,
"rewards/rejected": -0.16988599300384521,
"step": 1570
},
{
"epoch": 0.7,
"learning_rate": 2.2723883549169546e-06,
"logits/chosen": -2.2128214836120605,
"logits/rejected": -2.1517386436462402,
"logps/chosen": -297.3167419433594,
"logps/rejected": -319.46826171875,
"loss": 0.6695,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.1534380465745926,
"rewards/margins": 0.17669746279716492,
"rewards/rejected": -0.02325943298637867,
"step": 1580
},
{
"epoch": 0.71,
"learning_rate": 2.211081260301559e-06,
"logits/chosen": -2.2383880615234375,
"logits/rejected": -2.152236223220825,
"logps/chosen": -321.6248474121094,
"logps/rejected": -295.3414001464844,
"loss": 0.6343,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.2368522584438324,
"rewards/margins": 0.21582520008087158,
"rewards/rejected": 0.021027065813541412,
"step": 1590
},
{
"epoch": 0.71,
"learning_rate": 2.150376890039888e-06,
"logits/chosen": -2.142472743988037,
"logits/rejected": -2.2683558464050293,
"logps/chosen": -304.2695007324219,
"logps/rejected": -381.8064270019531,
"loss": 0.6457,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.05382103472948074,
"rewards/margins": 0.231169655919075,
"rewards/rejected": -0.17734862864017487,
"step": 1600
},
{
"epoch": 0.72,
"learning_rate": 2.090288363195546e-06,
"logits/chosen": -2.301752805709839,
"logits/rejected": -2.245049476623535,
"logps/chosen": -360.19940185546875,
"logps/rejected": -344.0550231933594,
"loss": 0.6983,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.1819063127040863,
"rewards/margins": 0.07504500448703766,
"rewards/rejected": 0.10686129331588745,
"step": 1610
},
{
"epoch": 0.72,
"learning_rate": 2.0308286657397586e-06,
"logits/chosen": -2.1193668842315674,
"logits/rejected": -2.279275417327881,
"logps/chosen": -290.13494873046875,
"logps/rejected": -304.28460693359375,
"loss": 0.6585,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.014680122956633568,
"rewards/margins": 0.12039873749017715,
"rewards/rejected": -0.13507884740829468,
"step": 1620
},
{
"epoch": 0.72,
"learning_rate": 1.972010647744929e-06,
"logits/chosen": -2.2673816680908203,
"logits/rejected": -2.23976731300354,
"logps/chosen": -359.8111267089844,
"logps/rejected": -389.32012939453125,
"loss": 0.6393,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1408449113368988,
"rewards/margins": 0.20818281173706055,
"rewards/rejected": -0.34902772307395935,
"step": 1630
},
{
"epoch": 0.73,
"learning_rate": 1.9138470206075468e-06,
"logits/chosen": -2.260129690170288,
"logits/rejected": -2.1876485347747803,
"logps/chosen": -349.2674560546875,
"logps/rejected": -373.29351806640625,
"loss": 0.6647,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.08648316562175751,
"rewards/margins": 0.13312320411205292,
"rewards/rejected": -0.21960635483264923,
"step": 1640
},
{
"epoch": 0.73,
"learning_rate": 1.8563503543010847e-06,
"logits/chosen": -2.2733237743377686,
"logits/rejected": -2.239638090133667,
"logps/chosen": -357.602294921875,
"logps/rejected": -358.47900390625,
"loss": 0.6549,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.038054175674915314,
"rewards/margins": 0.20417292416095734,
"rewards/rejected": -0.1661187708377838,
"step": 1650
},
{
"epoch": 0.74,
"learning_rate": 1.7995330746594492e-06,
"logits/chosen": -2.2824442386627197,
"logits/rejected": -2.319239854812622,
"logps/chosen": -335.9391174316406,
"logps/rejected": -376.009765625,
"loss": 0.7277,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.06412219256162643,
"rewards/margins": 0.0529680959880352,
"rewards/rejected": -0.11709029972553253,
"step": 1660
},
{
"epoch": 0.74,
"learning_rate": 1.7434074606915908e-06,
"logits/chosen": -2.2410006523132324,
"logits/rejected": -2.2910315990448,
"logps/chosen": -366.2132263183594,
"logps/rejected": -422.5810546875,
"loss": 0.6248,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.1538994014263153,
"rewards/margins": 0.33952516317367554,
"rewards/rejected": -0.185625821352005,
"step": 1670
},
{
"epoch": 0.75,
"learning_rate": 1.687985641927819e-06,
"logits/chosen": -2.3636865615844727,
"logits/rejected": -2.2147748470306396,
"logps/chosen": -360.6214294433594,
"logps/rejected": -323.7347412109375,
"loss": 0.622,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.18724626302719116,
"rewards/margins": 0.2453027069568634,
"rewards/rejected": -0.05805645138025284,
"step": 1680
},
{
"epoch": 0.75,
"learning_rate": 1.6332795957984688e-06,
"logits/chosen": -2.2480177879333496,
"logits/rejected": -2.2115044593811035,
"logps/chosen": -352.7060852050781,
"logps/rejected": -360.0293884277344,
"loss": 0.6535,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.05413411930203438,
"rewards/margins": 0.20770862698554993,
"rewards/rejected": -0.2618427276611328,
"step": 1690
},
{
"epoch": 0.76,
"learning_rate": 1.5793011450453854e-06,
"logits/chosen": -2.230503797531128,
"logits/rejected": -2.2694289684295654,
"logps/chosen": -292.1943664550781,
"logps/rejected": -338.814453125,
"loss": 0.7215,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.112449049949646,
"rewards/margins": 0.08113683760166168,
"rewards/rejected": -0.19358590245246887,
"step": 1700
},
{
"epoch": 0.76,
"learning_rate": 1.5260619551668842e-06,
"logits/chosen": -2.186260461807251,
"logits/rejected": -2.3265433311462402,
"logps/chosen": -315.28778076171875,
"logps/rejected": -363.6637268066406,
"loss": 0.7206,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.10357306897640228,
"rewards/margins": 0.0456685833632946,
"rewards/rejected": -0.14924165606498718,
"step": 1710
},
{
"epoch": 0.76,
"learning_rate": 1.4735735318966521e-06,
"logits/chosen": -2.338139772415161,
"logits/rejected": -2.1299831867218018,
"logps/chosen": -330.22796630859375,
"logps/rejected": -297.2973327636719,
"loss": 0.644,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.05218541622161865,
"rewards/margins": 0.25528091192245483,
"rewards/rejected": -0.20309551060199738,
"step": 1720
},
{
"epoch": 0.77,
"learning_rate": 1.4218472187172212e-06,
"logits/chosen": -2.1943013668060303,
"logits/rejected": -2.255190372467041,
"logps/chosen": -294.1163024902344,
"logps/rejected": -333.59228515625,
"loss": 0.6324,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.09838038682937622,
"rewards/margins": 0.21084125339984894,
"rewards/rejected": -0.30922168493270874,
"step": 1730
},
{
"epoch": 0.77,
"learning_rate": 1.3708941944084636e-06,
"logits/chosen": -2.3456058502197266,
"logits/rejected": -2.361806631088257,
"logps/chosen": -406.53094482421875,
"logps/rejected": -447.1785583496094,
"loss": 0.6449,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.2680016756057739,
"rewards/margins": 0.210541769862175,
"rewards/rejected": 0.057459909468889236,
"step": 1740
},
{
"epoch": 0.78,
"learning_rate": 1.3207254706317174e-06,
"logits/chosen": -2.276390790939331,
"logits/rejected": -2.296130657196045,
"logps/chosen": -342.73260498046875,
"logps/rejected": -360.7101135253906,
"loss": 0.6452,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.1815967857837677,
"rewards/margins": 0.2110695093870163,
"rewards/rejected": -0.029472723603248596,
"step": 1750
},
{
"epoch": 0.78,
"learning_rate": 1.2713518895499932e-06,
"logits/chosen": -2.2506117820739746,
"logits/rejected": -2.1966030597686768,
"logps/chosen": -328.85302734375,
"logps/rejected": -346.61041259765625,
"loss": 0.6133,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.16987404227256775,
"rewards/margins": 0.2849760055541992,
"rewards/rejected": -0.45485004782676697,
"step": 1760
},
{
"epoch": 0.79,
"learning_rate": 1.2227841214848519e-06,
"logits/chosen": -2.3572230339050293,
"logits/rejected": -2.241999387741089,
"logps/chosen": -386.4831237792969,
"logps/rejected": -324.4116516113281,
"loss": 0.623,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.19086837768554688,
"rewards/margins": 0.23254597187042236,
"rewards/rejected": -0.04167759045958519,
"step": 1770
},
{
"epoch": 0.79,
"learning_rate": 1.175032662610383e-06,
"logits/chosen": -2.384322166442871,
"logits/rejected": -2.372183322906494,
"logps/chosen": -379.8897705078125,
"logps/rejected": -384.9872131347656,
"loss": 0.6472,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.13912078738212585,
"rewards/margins": 0.1807091236114502,
"rewards/rejected": -0.041588325053453445,
"step": 1780
},
{
"epoch": 0.8,
"learning_rate": 1.1281078326848438e-06,
"logits/chosen": -2.2786386013031006,
"logits/rejected": -2.2903854846954346,
"logps/chosen": -342.7461242675781,
"logps/rejected": -383.90411376953125,
"loss": 0.5802,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.15179908275604248,
"rewards/margins": 0.4043292999267578,
"rewards/rejected": -0.25253021717071533,
"step": 1790
},
{
"epoch": 0.8,
"learning_rate": 1.0820197728204085e-06,
"logits/chosen": -2.235412120819092,
"logits/rejected": -2.1771421432495117,
"logps/chosen": -336.4576721191406,
"logps/rejected": -346.7325744628906,
"loss": 0.6903,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.0932273119688034,
"rewards/margins": 0.12401854991912842,
"rewards/rejected": -0.03079124726355076,
"step": 1800
},
{
"epoch": 0.8,
"learning_rate": 1.0367784432915407e-06,
"logits/chosen": -2.2605013847351074,
"logits/rejected": -2.2363693714141846,
"logps/chosen": -337.21728515625,
"logps/rejected": -330.5986633300781,
"loss": 0.6855,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.2193947285413742,
"rewards/margins": 0.11095724254846573,
"rewards/rejected": 0.10843745619058609,
"step": 1810
},
{
"epoch": 0.81,
"learning_rate": 9.923936213824297e-07,
"logits/chosen": -2.387052297592163,
"logits/rejected": -2.2252724170684814,
"logps/chosen": -368.71881103515625,
"logps/rejected": -402.789794921875,
"loss": 0.6306,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.32878467440605164,
"rewards/margins": 0.26738250255584717,
"rewards/rejected": 0.06140219047665596,
"step": 1820
},
{
"epoch": 0.81,
"learning_rate": 9.488748992739877e-07,
"logits/chosen": -2.2936031818389893,
"logits/rejected": -2.3079075813293457,
"logps/chosen": -340.22430419921875,
"logps/rejected": -390.5373840332031,
"loss": 0.653,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.11492462456226349,
"rewards/margins": 0.22790834307670593,
"rewards/rejected": -0.11298371851444244,
"step": 1830
},
{
"epoch": 0.82,
"learning_rate": 9.062316819708427e-07,
"logits/chosen": -2.26062273979187,
"logits/rejected": -2.2534077167510986,
"logps/chosen": -323.99554443359375,
"logps/rejected": -343.2364501953125,
"loss": 0.6792,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.07681259512901306,
"rewards/margins": 0.13750119507312775,
"rewards/rejected": -0.2143137902021408,
"step": 1840
},
{
"epoch": 0.82,
"learning_rate": 8.644731852687904e-07,
"logits/chosen": -2.1960670948028564,
"logits/rejected": -2.252990245819092,
"logps/chosen": -338.3189392089844,
"logps/rejected": -409.77947998046875,
"loss": 0.6429,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.027236556634306908,
"rewards/margins": 0.200010746717453,
"rewards/rejected": -0.22724728286266327,
"step": 1850
},
{
"epoch": 0.83,
"learning_rate": 8.236084337631256e-07,
"logits/chosen": -2.269155502319336,
"logits/rejected": -2.2297897338867188,
"logps/chosen": -350.8360900878906,
"logps/rejected": -349.05743408203125,
"loss": 0.618,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.03905141353607178,
"rewards/margins": 0.2928921580314636,
"rewards/rejected": -0.25384077429771423,
"step": 1860
},
{
"epoch": 0.83,
"learning_rate": 7.836462588983029e-07,
"logits/chosen": -2.2999212741851807,
"logits/rejected": -2.2945046424865723,
"logps/chosen": -331.8743896484375,
"logps/rejected": -358.82427978515625,
"loss": 0.6854,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.007713166065514088,
"rewards/margins": 0.15347187221050262,
"rewards/rejected": -0.1457587033510208,
"step": 1870
},
{
"epoch": 0.84,
"learning_rate": 7.445952970593401e-07,
"logits/chosen": -2.292762517929077,
"logits/rejected": -2.2326605319976807,
"logps/chosen": -368.51123046875,
"logps/rejected": -343.02362060546875,
"loss": 0.7055,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.138399139046669,
"rewards/margins": 0.1448075920343399,
"rewards/rejected": -0.2832067608833313,
"step": 1880
},
{
"epoch": 0.84,
"learning_rate": 7.064639877053753e-07,
"logits/chosen": -2.2553770542144775,
"logits/rejected": -2.1739296913146973,
"logps/chosen": -347.02081298828125,
"logps/rejected": -329.3694152832031,
"loss": 0.7244,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0695744976401329,
"rewards/margins": 0.0453006774187088,
"rewards/rejected": -0.1148751750588417,
"step": 1890
},
{
"epoch": 0.84,
"learning_rate": 6.692605715457734e-07,
"logits/chosen": -2.296112537384033,
"logits/rejected": -2.2227189540863037,
"logps/chosen": -335.0692443847656,
"logps/rejected": -342.08563232421875,
"loss": 0.6636,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.09152424335479736,
"rewards/margins": 0.14057905972003937,
"rewards/rejected": -0.049054812639951706,
"step": 1900
},
{
"epoch": 0.85,
"learning_rate": 6.329930887592067e-07,
"logits/chosen": -2.277210235595703,
"logits/rejected": -2.263932704925537,
"logps/chosen": -373.13623046875,
"logps/rejected": -398.31329345703125,
"loss": 0.6006,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.10486678779125214,
"rewards/margins": 0.2877123951911926,
"rewards/rejected": -0.39257916808128357,
"step": 1910
},
{
"epoch": 0.85,
"learning_rate": 5.976693772560487e-07,
"logits/chosen": -2.3237483501434326,
"logits/rejected": -2.283463716506958,
"logps/chosen": -397.46905517578125,
"logps/rejected": -399.92266845703125,
"loss": 0.6716,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.03626967594027519,
"rewards/margins": 0.20212960243225098,
"rewards/rejected": -0.23839926719665527,
"step": 1920
},
{
"epoch": 0.86,
"learning_rate": 5.632970709844976e-07,
"logits/chosen": -2.2484121322631836,
"logits/rejected": -2.2332425117492676,
"logps/chosen": -352.50372314453125,
"logps/rejected": -382.41290283203125,
"loss": 0.7484,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.20201630890369415,
"rewards/margins": 0.06586066633462906,
"rewards/rejected": -0.2678769528865814,
"step": 1930
},
{
"epoch": 0.86,
"learning_rate": 5.298835982807704e-07,
"logits/chosen": -2.324031352996826,
"logits/rejected": -2.2343201637268066,
"logps/chosen": -387.73211669921875,
"logps/rejected": -355.9978942871094,
"loss": 0.7038,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.11137855052947998,
"rewards/margins": 0.1095174103975296,
"rewards/rejected": 0.0018611550331115723,
"step": 1940
},
{
"epoch": 0.87,
"learning_rate": 4.974361802637395e-07,
"logits/chosen": -2.3116257190704346,
"logits/rejected": -2.272489070892334,
"logps/chosen": -362.0296325683594,
"logps/rejected": -368.2407531738281,
"loss": 0.6696,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.055863846093416214,
"rewards/margins": 0.1546260416507721,
"rewards/rejected": -0.2104898989200592,
"step": 1950
},
{
"epoch": 0.87,
"learning_rate": 4.6596182927434395e-07,
"logits/chosen": -2.2587242126464844,
"logits/rejected": -2.2343411445617676,
"logps/chosen": -326.1157531738281,
"logps/rejected": -347.9691162109375,
"loss": 0.652,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.12903109192848206,
"rewards/margins": 0.13124972581863403,
"rewards/rejected": -0.2602807879447937,
"step": 1960
},
{
"epoch": 0.88,
"learning_rate": 4.354673473601251e-07,
"logits/chosen": -2.249849557876587,
"logits/rejected": -2.1856112480163574,
"logps/chosen": -350.0913391113281,
"logps/rejected": -357.5381774902344,
"loss": 0.6364,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.04396069794893265,
"rewards/margins": 0.2554934620857239,
"rewards/rejected": -0.21153274178504944,
"step": 1970
},
{
"epoch": 0.88,
"learning_rate": 4.059593248052107e-07,
"logits/chosen": -2.270174741744995,
"logits/rejected": -2.214571475982666,
"logps/chosen": -363.952880859375,
"logps/rejected": -363.4573974609375,
"loss": 0.5834,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.0837903842329979,
"rewards/margins": 0.3650640845298767,
"rewards/rejected": -0.2812737226486206,
"step": 1980
},
{
"epoch": 0.88,
"learning_rate": 3.774441387060634e-07,
"logits/chosen": -2.3621678352355957,
"logits/rejected": -2.304919719696045,
"logps/chosen": -411.8487854003906,
"logps/rejected": -410.06671142578125,
"loss": 0.6429,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.14407065510749817,
"rewards/margins": 0.2489246129989624,
"rewards/rejected": -0.10485398769378662,
"step": 1990
},
{
"epoch": 0.89,
"learning_rate": 3.4992795159329516e-07,
"logits/chosen": -2.3314731121063232,
"logits/rejected": -2.2980003356933594,
"logps/chosen": -380.12017822265625,
"logps/rejected": -410.78887939453125,
"loss": 0.6918,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.10951967537403107,
"rewards/margins": 0.07033322751522064,
"rewards/rejected": 0.03918645530939102,
"step": 2000
}
],
"logging_steps": 10,
"max_steps": 2250,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}