statking's picture
Model save
d21fe82 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999607072691552,
"eval_steps": 100,
"global_step": 1908,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005239030779305829,
"grad_norm": 40.089039871992924,
"learning_rate": 3.6649214659685864e-07,
"log_odds_chosen": 0.40882301330566406,
"log_odds_ratio": -0.7743430137634277,
"logits/chosen": -1.251479148864746,
"logits/rejected": -1.348970651626587,
"logps/chosen": -1.8760229349136353,
"logps/rejected": -2.220275402069092,
"loss": 9.7709,
"nll_loss": 9.695734977722168,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.018760228529572487,
"rewards/margins": 0.00344252772629261,
"rewards/rejected": -0.022202756255865097,
"step": 10
},
{
"epoch": 0.010478061558611657,
"grad_norm": 41.48632386237846,
"learning_rate": 7.329842931937173e-07,
"log_odds_chosen": 0.2817743718624115,
"log_odds_ratio": -0.8707199096679688,
"logits/chosen": -1.3488132953643799,
"logits/rejected": -1.333653211593628,
"logps/chosen": -2.0333549976348877,
"logps/rejected": -2.2923362255096436,
"loss": 9.7365,
"nll_loss": 10.180292129516602,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.02033355087041855,
"rewards/margins": 0.00258981017395854,
"rewards/rejected": -0.022923361510038376,
"step": 20
},
{
"epoch": 0.015717092337917484,
"grad_norm": 38.86915439525821,
"learning_rate": 1.0994764397905759e-06,
"log_odds_chosen": 0.10666105896234512,
"log_odds_ratio": -0.879582405090332,
"logits/chosen": -1.411980390548706,
"logits/rejected": -1.4946210384368896,
"logps/chosen": -1.9859802722930908,
"logps/rejected": -2.066720724105835,
"loss": 9.2475,
"nll_loss": 9.394262313842773,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.019859803840517998,
"rewards/margins": 0.0008074056240729988,
"rewards/rejected": -0.02066720835864544,
"step": 30
},
{
"epoch": 0.020956123117223315,
"grad_norm": 64.36060245058518,
"learning_rate": 1.4659685863874346e-06,
"log_odds_chosen": 0.19943363964557648,
"log_odds_ratio": -0.9107095003128052,
"logits/chosen": -1.2995336055755615,
"logits/rejected": -1.3503811359405518,
"logps/chosen": -1.8726139068603516,
"logps/rejected": -2.0438897609710693,
"loss": 7.7077,
"nll_loss": 7.663236141204834,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.018726136535406113,
"rewards/margins": 0.0017127618193626404,
"rewards/rejected": -0.020438898354768753,
"step": 40
},
{
"epoch": 0.02619515389652914,
"grad_norm": 41.110372744682586,
"learning_rate": 1.8324607329842933e-06,
"log_odds_chosen": 0.06553123891353607,
"log_odds_ratio": -0.9510926008224487,
"logits/chosen": -0.8678807020187378,
"logits/rejected": -0.8854366540908813,
"logps/chosen": -2.0910544395446777,
"logps/rejected": -2.1262149810791016,
"loss": 4.4672,
"nll_loss": 4.56087589263916,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.020910542458295822,
"rewards/margins": 0.0003516075958032161,
"rewards/rejected": -0.02126215025782585,
"step": 50
},
{
"epoch": 0.03143418467583497,
"grad_norm": 16.651527410211266,
"learning_rate": 2.1989528795811517e-06,
"log_odds_chosen": 0.2586204707622528,
"log_odds_ratio": -0.7958043813705444,
"logits/chosen": -0.2105911523103714,
"logits/rejected": -0.19482675194740295,
"logps/chosen": -1.780350923538208,
"logps/rejected": -2.0165352821350098,
"loss": 2.0977,
"nll_loss": 2.121706008911133,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.017803508788347244,
"rewards/margins": 0.0023618421982973814,
"rewards/rejected": -0.020165350288152695,
"step": 60
},
{
"epoch": 0.0366732154551408,
"grad_norm": 0.6950743796393763,
"learning_rate": 2.5654450261780104e-06,
"log_odds_chosen": 0.40901675820350647,
"log_odds_ratio": -0.8085654973983765,
"logits/chosen": -0.22529537975788116,
"logits/rejected": -0.187855526804924,
"logps/chosen": -1.7872791290283203,
"logps/rejected": -2.1754517555236816,
"loss": 1.1033,
"nll_loss": 1.104498028755188,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.01787278801202774,
"rewards/margins": 0.003881725948303938,
"rewards/rejected": -0.021754514425992966,
"step": 70
},
{
"epoch": 0.04191224623444663,
"grad_norm": 0.5424625090833263,
"learning_rate": 2.931937172774869e-06,
"log_odds_chosen": 0.2518347501754761,
"log_odds_ratio": -0.8060741424560547,
"logits/chosen": -0.38638219237327576,
"logits/rejected": -0.37130922079086304,
"logps/chosen": -1.6561933755874634,
"logps/rejected": -1.8596878051757812,
"loss": 0.9622,
"nll_loss": 0.9561271667480469,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.016561932861804962,
"rewards/margins": 0.00203494424931705,
"rewards/rejected": -0.01859687827527523,
"step": 80
},
{
"epoch": 0.047151277013752456,
"grad_norm": 0.49692784970495385,
"learning_rate": 3.298429319371728e-06,
"log_odds_chosen": 0.25015050172805786,
"log_odds_ratio": -0.7168025970458984,
"logits/chosen": -0.46545910835266113,
"logits/rejected": -0.47551122307777405,
"logps/chosen": -1.434188723564148,
"logps/rejected": -1.617444634437561,
"loss": 0.9076,
"nll_loss": 0.9012999534606934,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.014341888949275017,
"rewards/margins": 0.0018325571436434984,
"rewards/rejected": -0.016174444928765297,
"step": 90
},
{
"epoch": 0.05239030779305828,
"grad_norm": 0.34409399893883913,
"learning_rate": 3.6649214659685865e-06,
"log_odds_chosen": 0.33899450302124023,
"log_odds_ratio": -0.6892405152320862,
"logits/chosen": -0.42995914816856384,
"logits/rejected": -0.46385449171066284,
"logps/chosen": -1.3845367431640625,
"logps/rejected": -1.6512413024902344,
"loss": 0.8633,
"nll_loss": 0.8542642593383789,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.013845366425812244,
"rewards/margins": 0.00266704591922462,
"rewards/rejected": -0.016512412577867508,
"step": 100
},
{
"epoch": 0.05239030779305828,
"eval_log_odds_chosen": 0.2912975251674652,
"eval_log_odds_ratio": -0.6964674592018127,
"eval_logits/chosen": -0.4465982913970947,
"eval_logits/rejected": -0.4503002464771271,
"eval_logps/chosen": -1.3476332426071167,
"eval_logps/rejected": -1.5779348611831665,
"eval_loss": 0.7180835008621216,
"eval_nll_loss": 0.7125721573829651,
"eval_rewards/accuracies": 0.6060000061988831,
"eval_rewards/chosen": -0.01347633171826601,
"eval_rewards/margins": 0.0023030168376863003,
"eval_rewards/rejected": -0.015779349952936172,
"eval_runtime": 269.9338,
"eval_samples_per_second": 7.406,
"eval_steps_per_second": 0.463,
"step": 100
},
{
"epoch": 0.05762933857236411,
"grad_norm": 0.2803398283753954,
"learning_rate": 4.031413612565445e-06,
"log_odds_chosen": 0.18521742522716522,
"log_odds_ratio": -0.7459183931350708,
"logits/chosen": -0.41053661704063416,
"logits/rejected": -0.4239114820957184,
"logps/chosen": -1.3022041320800781,
"logps/rejected": -1.4337875843048096,
"loss": 0.8606,
"nll_loss": 0.8364461064338684,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.013022040948271751,
"rewards/margins": 0.0013158348156139255,
"rewards/rejected": -0.014337876811623573,
"step": 110
},
{
"epoch": 0.06286836935166994,
"grad_norm": 0.27279868153391584,
"learning_rate": 4.3979057591623035e-06,
"log_odds_chosen": 0.3604566752910614,
"log_odds_ratio": -0.6311579942703247,
"logits/chosen": -0.42637091875076294,
"logits/rejected": -0.42720526456832886,
"logps/chosen": -1.2974519729614258,
"logps/rejected": -1.5656707286834717,
"loss": 0.8425,
"nll_loss": 0.8501418232917786,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.012974520213901997,
"rewards/margins": 0.0026821885257959366,
"rewards/rejected": -0.01565670594573021,
"step": 120
},
{
"epoch": 0.06810740013097577,
"grad_norm": 0.30807142716511443,
"learning_rate": 4.764397905759163e-06,
"log_odds_chosen": 0.35572677850723267,
"log_odds_ratio": -0.6256042718887329,
"logits/chosen": -0.4055427610874176,
"logits/rejected": -0.41731196641921997,
"logps/chosen": -1.1877421140670776,
"logps/rejected": -1.4562331438064575,
"loss": 0.9061,
"nll_loss": 0.9138392210006714,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.011877421289682388,
"rewards/margins": 0.0026849093846976757,
"rewards/rejected": -0.014562331140041351,
"step": 130
},
{
"epoch": 0.0733464309102816,
"grad_norm": 0.21901353529181275,
"learning_rate": 5.130890052356021e-06,
"log_odds_chosen": 0.20408260822296143,
"log_odds_ratio": -0.7225680351257324,
"logits/chosen": -0.3313930630683899,
"logits/rejected": -0.3637652099132538,
"logps/chosen": -1.1521110534667969,
"logps/rejected": -1.3134852647781372,
"loss": 0.8555,
"nll_loss": 0.8377349972724915,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.011521109379827976,
"rewards/margins": 0.0016137424390763044,
"rewards/rejected": -0.013134850189089775,
"step": 140
},
{
"epoch": 0.07858546168958742,
"grad_norm": 0.21331922770164838,
"learning_rate": 5.49738219895288e-06,
"log_odds_chosen": 0.28065016865730286,
"log_odds_ratio": -0.6753939390182495,
"logits/chosen": -0.3591609597206116,
"logits/rejected": -0.3576233983039856,
"logps/chosen": -1.0449597835540771,
"logps/rejected": -1.2448740005493164,
"loss": 0.7812,
"nll_loss": 0.7689298987388611,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.010449598543345928,
"rewards/margins": 0.001999142114073038,
"rewards/rejected": -0.012448740191757679,
"step": 150
},
{
"epoch": 0.08382449246889326,
"grad_norm": 0.23877612948281643,
"learning_rate": 5.863874345549738e-06,
"log_odds_chosen": 0.17812715470790863,
"log_odds_ratio": -0.7157899141311646,
"logits/chosen": -0.34616202116012573,
"logits/rejected": -0.32302820682525635,
"logps/chosen": -1.1274374723434448,
"logps/rejected": -1.2492964267730713,
"loss": 0.8319,
"nll_loss": 0.8008116483688354,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.011274375021457672,
"rewards/margins": 0.0012185879750177264,
"rewards/rejected": -0.012492964044213295,
"step": 160
},
{
"epoch": 0.08906352324819908,
"grad_norm": 0.16490503339477303,
"learning_rate": 6.230366492146597e-06,
"log_odds_chosen": 0.35694795846939087,
"log_odds_ratio": -0.6470257043838501,
"logits/chosen": -0.3833572566509247,
"logits/rejected": -0.3738633394241333,
"logps/chosen": -1.098602056503296,
"logps/rejected": -1.3455427885055542,
"loss": 0.8428,
"nll_loss": 0.777732253074646,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.010986020788550377,
"rewards/margins": 0.0024694064632058144,
"rewards/rejected": -0.013455428183078766,
"step": 170
},
{
"epoch": 0.09430255402750491,
"grad_norm": 0.19976560242149322,
"learning_rate": 6.596858638743456e-06,
"log_odds_chosen": 0.21126070618629456,
"log_odds_ratio": -0.6887334585189819,
"logits/chosen": -0.36508241295814514,
"logits/rejected": -0.35655850172042847,
"logps/chosen": -1.1599535942077637,
"logps/rejected": -1.3315963745117188,
"loss": 0.8363,
"nll_loss": 0.8872919082641602,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.01159953698515892,
"rewards/margins": 0.001716427505016327,
"rewards/rejected": -0.013315962627530098,
"step": 180
},
{
"epoch": 0.09954158480681075,
"grad_norm": 0.28624827514651513,
"learning_rate": 6.963350785340315e-06,
"log_odds_chosen": 0.23439832031726837,
"log_odds_ratio": -0.6879830956459045,
"logits/chosen": -0.3974788784980774,
"logits/rejected": -0.3820292353630066,
"logps/chosen": -1.1166422367095947,
"logps/rejected": -1.3017523288726807,
"loss": 0.817,
"nll_loss": 0.7972527146339417,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.011166421696543694,
"rewards/margins": 0.0018511017551645637,
"rewards/rejected": -0.013017524965107441,
"step": 190
},
{
"epoch": 0.10478061558611657,
"grad_norm": 0.2323297919875336,
"learning_rate": 6.999525460456016e-06,
"log_odds_chosen": 0.20863866806030273,
"log_odds_ratio": -0.6829880475997925,
"logits/chosen": -0.3505745530128479,
"logits/rejected": -0.37228649854660034,
"logps/chosen": -1.0812726020812988,
"logps/rejected": -1.2249776124954224,
"loss": 0.7831,
"nll_loss": 0.7905929684638977,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.010812724940478802,
"rewards/margins": 0.0014370509888976812,
"rewards/rejected": -0.012249777093529701,
"step": 200
},
{
"epoch": 0.10478061558611657,
"eval_log_odds_chosen": 0.2690645754337311,
"eval_log_odds_ratio": -0.6627397537231445,
"eval_logits/chosen": -0.36186957359313965,
"eval_logits/rejected": -0.3621442914009094,
"eval_logps/chosen": -1.0519521236419678,
"eval_logps/rejected": -1.2498859167099,
"eval_loss": 0.6487022042274475,
"eval_nll_loss": 0.6431540846824646,
"eval_rewards/accuracies": 0.6140000224113464,
"eval_rewards/chosen": -0.010519521310925484,
"eval_rewards/margins": 0.0019793356768786907,
"eval_rewards/rejected": -0.012498857453465462,
"eval_runtime": 268.3833,
"eval_samples_per_second": 7.448,
"eval_steps_per_second": 0.466,
"step": 200
},
{
"epoch": 0.1100196463654224,
"grad_norm": 0.22085594878634468,
"learning_rate": 6.997885242050564e-06,
"log_odds_chosen": 0.14821310341358185,
"log_odds_ratio": -0.7308140993118286,
"logits/chosen": -0.35621774196624756,
"logits/rejected": -0.34287530183792114,
"logps/chosen": -1.0816371440887451,
"logps/rejected": -1.1855530738830566,
"loss": 0.7606,
"nll_loss": 0.7189425826072693,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.010816371068358421,
"rewards/margins": 0.0010391605319455266,
"rewards/rejected": -0.0118555324152112,
"step": 210
},
{
"epoch": 0.11525867714472822,
"grad_norm": 0.20718906330906176,
"learning_rate": 6.9950740352365535e-06,
"log_odds_chosen": 0.13336250185966492,
"log_odds_ratio": -0.7418603897094727,
"logits/chosen": -0.31412142515182495,
"logits/rejected": -0.29769355058670044,
"logps/chosen": -1.0191659927368164,
"logps/rejected": -1.1312214136123657,
"loss": 0.7423,
"nll_loss": 0.7373214364051819,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.010191659443080425,
"rewards/margins": 0.0011205542832612991,
"rewards/rejected": -0.011312213726341724,
"step": 220
},
{
"epoch": 0.12049770792403405,
"grad_norm": 0.21517650420593173,
"learning_rate": 6.991092781122789e-06,
"log_odds_chosen": 0.21017885208129883,
"log_odds_ratio": -0.6870851516723633,
"logits/chosen": -0.34512776136398315,
"logits/rejected": -0.32372525334358215,
"logps/chosen": -1.1044729948043823,
"logps/rejected": -1.2339098453521729,
"loss": 0.7789,
"nll_loss": 0.7626355886459351,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.011044728569686413,
"rewards/margins": 0.0012943701585754752,
"rewards/rejected": -0.012339098379015923,
"step": 230
},
{
"epoch": 0.12573673870333987,
"grad_norm": 0.196338942404361,
"learning_rate": 6.985942812515264e-06,
"log_odds_chosen": 0.22509415447711945,
"log_odds_ratio": -0.6694994568824768,
"logits/chosen": -0.33054572343826294,
"logits/rejected": -0.34040743112564087,
"logps/chosen": -1.0536653995513916,
"logps/rejected": -1.2156822681427002,
"loss": 0.7989,
"nll_loss": 0.8106544613838196,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.010536652989685535,
"rewards/margins": 0.0016201699618250132,
"rewards/rejected": -0.01215682178735733,
"step": 240
},
{
"epoch": 0.13097576948264572,
"grad_norm": 0.19854612692373078,
"learning_rate": 6.9796258534709805e-06,
"log_odds_chosen": 0.26194605231285095,
"log_odds_ratio": -0.677527904510498,
"logits/chosen": -0.3183462917804718,
"logits/rejected": -0.3198302686214447,
"logps/chosen": -1.0683705806732178,
"logps/rejected": -1.2614667415618896,
"loss": 0.7328,
"nll_loss": 0.7110816240310669,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.010683706030249596,
"rewards/margins": 0.0019309620838612318,
"rewards/rejected": -0.012614667415618896,
"step": 250
},
{
"epoch": 0.13621480026195154,
"grad_norm": 0.28276448420310984,
"learning_rate": 6.972144018720786e-06,
"log_odds_chosen": 0.08540093898773193,
"log_odds_ratio": -0.762796938419342,
"logits/chosen": -0.33047622442245483,
"logits/rejected": -0.30570656061172485,
"logps/chosen": -1.079242467880249,
"logps/rejected": -1.140084981918335,
"loss": 0.763,
"nll_loss": 0.7699041366577148,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.010792424902319908,
"rewards/margins": 0.0006084250053390861,
"rewards/rejected": -0.011400849558413029,
"step": 260
},
{
"epoch": 0.14145383104125736,
"grad_norm": 0.17805276020084845,
"learning_rate": 6.96349981296142e-06,
"log_odds_chosen": 0.30828922986984253,
"log_odds_ratio": -0.6861331462860107,
"logits/chosen": -0.28029608726501465,
"logits/rejected": -0.3021364212036133,
"logps/chosen": -1.0115910768508911,
"logps/rejected": -1.246225118637085,
"loss": 0.7833,
"nll_loss": 0.7348794937133789,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.01011591125279665,
"rewards/margins": 0.0023463410325348377,
"rewards/rejected": -0.012462252750992775,
"step": 270
},
{
"epoch": 0.1466928618205632,
"grad_norm": 0.16178698706252137,
"learning_rate": 6.953696130017022e-06,
"log_odds_chosen": 0.15516668558120728,
"log_odds_ratio": -0.7157121896743774,
"logits/chosen": -0.31760409474372864,
"logits/rejected": -0.31049543619155884,
"logps/chosen": -1.0282506942749023,
"logps/rejected": -1.1352583169937134,
"loss": 0.7233,
"nll_loss": 0.7511364817619324,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.010282507166266441,
"rewards/margins": 0.0010700763668864965,
"rewards/rejected": -0.011352581903338432,
"step": 280
},
{
"epoch": 0.15193189259986903,
"grad_norm": 0.1924395788457037,
"learning_rate": 6.94273625187036e-06,
"log_odds_chosen": 0.33385169506073,
"log_odds_ratio": -0.6219146251678467,
"logits/chosen": -0.3122417628765106,
"logits/rejected": -0.3353222906589508,
"logps/chosen": -0.9765304327011108,
"logps/rejected": -1.193291425704956,
"loss": 0.7901,
"nll_loss": 0.7909854650497437,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.009765303693711758,
"rewards/margins": 0.0021676109172403812,
"rewards/rejected": -0.011932915076613426,
"step": 290
},
{
"epoch": 0.15717092337917485,
"grad_norm": 0.34259992418933566,
"learning_rate": 6.9306238475641205e-06,
"log_odds_chosen": 0.2912523150444031,
"log_odds_ratio": -0.6484606266021729,
"logits/chosen": -0.2717845141887665,
"logits/rejected": -0.3028518557548523,
"logps/chosen": -1.0336506366729736,
"logps/rejected": -1.2281897068023682,
"loss": 0.7146,
"nll_loss": 0.6968905329704285,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.010336506180465221,
"rewards/margins": 0.0019453916465863585,
"rewards/rejected": -0.012281898409128189,
"step": 300
},
{
"epoch": 0.15717092337917485,
"eval_log_odds_chosen": 0.27896934747695923,
"eval_log_odds_ratio": -0.659382164478302,
"eval_logits/chosen": -0.3169032633304596,
"eval_logits/rejected": -0.3196420669555664,
"eval_logps/chosen": -1.0173438787460327,
"eval_logps/rejected": -1.2193564176559448,
"eval_loss": 0.6237765550613403,
"eval_nll_loss": 0.6180580854415894,
"eval_rewards/accuracies": 0.6140000224113464,
"eval_rewards/chosen": -0.010173438116908073,
"eval_rewards/margins": 0.002020125975832343,
"eval_rewards/rejected": -0.01219356432557106,
"eval_runtime": 277.9482,
"eval_samples_per_second": 7.192,
"eval_steps_per_second": 0.45,
"step": 300
},
{
"epoch": 0.16240995415848067,
"grad_norm": 0.16276041420565796,
"learning_rate": 6.917362971972625e-06,
"log_odds_chosen": 0.2336260974407196,
"log_odds_ratio": -0.6989104151725769,
"logits/chosen": -0.28494778275489807,
"logits/rejected": -0.3021838068962097,
"logps/chosen": -1.0360081195831299,
"logps/rejected": -1.2258890867233276,
"loss": 0.7523,
"nll_loss": 0.7411647439002991,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.010360080748796463,
"rewards/margins": 0.0018988108495250344,
"rewards/rejected": -0.012258890084922314,
"step": 310
},
{
"epoch": 0.16764898493778652,
"grad_norm": 0.15636902004729206,
"learning_rate": 6.902958064444372e-06,
"log_odds_chosen": 0.25901222229003906,
"log_odds_ratio": -0.6495088338851929,
"logits/chosen": -0.3297797739505768,
"logits/rejected": -0.32505810260772705,
"logps/chosen": -1.0110734701156616,
"logps/rejected": -1.1903458833694458,
"loss": 0.785,
"nll_loss": 0.788983166217804,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.010110734961926937,
"rewards/margins": 0.0017927236622199416,
"rewards/rejected": -0.011903459206223488,
"step": 320
},
{
"epoch": 0.17288801571709234,
"grad_norm": 0.16465507987416486,
"learning_rate": 6.8874139473158825e-06,
"log_odds_chosen": 0.50341796875,
"log_odds_ratio": -0.5877692103385925,
"logits/chosen": -0.33102065324783325,
"logits/rejected": -0.33758553862571716,
"logps/chosen": -1.0253140926361084,
"logps/rejected": -1.4051698446273804,
"loss": 0.772,
"nll_loss": 0.7442210912704468,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.01025314163416624,
"rewards/margins": 0.0037985569797456264,
"rewards/rejected": -0.01405169814825058,
"step": 330
},
{
"epoch": 0.17812704649639816,
"grad_norm": 0.2553378662120776,
"learning_rate": 6.870735824297317e-06,
"log_odds_chosen": 0.0705319195985794,
"log_odds_ratio": -0.7645262479782104,
"logits/chosen": -0.2785702049732208,
"logits/rejected": -0.28481778502464294,
"logps/chosen": -1.1028659343719482,
"logps/rejected": -1.1618068218231201,
"loss": 0.7605,
"nll_loss": 0.7672589421272278,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.011028659529983997,
"rewards/margins": 0.0005894082714803517,
"rewards/rejected": -0.011618068441748619,
"step": 340
},
{
"epoch": 0.183366077275704,
"grad_norm": 0.18113875877671343,
"learning_rate": 6.852929278730433e-06,
"log_odds_chosen": 0.14474061131477356,
"log_odds_ratio": -0.7329230308532715,
"logits/chosen": -0.2613077461719513,
"logits/rejected": -0.2920432984828949,
"logps/chosen": -0.9968924522399902,
"logps/rejected": -1.098439335823059,
"loss": 0.7298,
"nll_loss": 0.6963182091712952,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.009968924336135387,
"rewards/margins": 0.0010154687333852053,
"rewards/rejected": -0.010984392836689949,
"step": 350
},
{
"epoch": 0.18860510805500982,
"grad_norm": 0.19400999564615143,
"learning_rate": 6.834000271719443e-06,
"log_odds_chosen": 0.2884977459907532,
"log_odds_ratio": -0.6588489413261414,
"logits/chosen": -0.3067547678947449,
"logits/rejected": -0.31717801094055176,
"logps/chosen": -1.0352160930633545,
"logps/rejected": -1.2498157024383545,
"loss": 0.7559,
"nll_loss": 0.7783513069152832,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.010352160781621933,
"rewards/margins": 0.002145996782928705,
"rewards/rejected": -0.012498157098889351,
"step": 360
},
{
"epoch": 0.19384413883431564,
"grad_norm": 0.1839111513162734,
"learning_rate": 6.813955140135418e-06,
"log_odds_chosen": 0.25216466188430786,
"log_odds_ratio": -0.6868494749069214,
"logits/chosen": -0.2936992347240448,
"logits/rejected": -0.2800842523574829,
"logps/chosen": -1.0396112203598022,
"logps/rejected": -1.2321968078613281,
"loss": 0.7315,
"nll_loss": 0.6916857957839966,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.010396112687885761,
"rewards/margins": 0.0019258556421846151,
"rewards/rejected": -0.01232196670025587,
"step": 370
},
{
"epoch": 0.1990831696136215,
"grad_norm": 0.20686182443960535,
"learning_rate": 6.7928005944948864e-06,
"log_odds_chosen": 0.18967841565608978,
"log_odds_ratio": -0.7045271992683411,
"logits/chosen": -0.27514809370040894,
"logits/rejected": -0.2739102840423584,
"logps/chosen": -1.0092417001724243,
"logps/rejected": -1.1560665369033813,
"loss": 0.74,
"nll_loss": 0.7480857968330383,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.010092416778206825,
"rewards/margins": 0.001468247384764254,
"rewards/rejected": -0.011560664512217045,
"step": 380
},
{
"epoch": 0.2043222003929273,
"grad_norm": 0.19077789339143533,
"learning_rate": 6.770543716713352e-06,
"log_odds_chosen": 0.2070658951997757,
"log_odds_ratio": -0.703337550163269,
"logits/chosen": -0.29966622591018677,
"logits/rejected": -0.29987436532974243,
"logps/chosen": -1.0798876285552979,
"logps/rejected": -1.2502596378326416,
"loss": 0.7795,
"nll_loss": 0.7723098993301392,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.01079887617379427,
"rewards/margins": 0.001703719375655055,
"rewards/rejected": -0.012502595782279968,
"step": 390
},
{
"epoch": 0.20956123117223313,
"grad_norm": 0.19361123064559435,
"learning_rate": 6.747191957734486e-06,
"log_odds_chosen": 0.15251222252845764,
"log_odds_ratio": -0.7213733196258545,
"logits/chosen": -0.2669166922569275,
"logits/rejected": -0.2855262756347656,
"logps/chosen": -1.0066086053848267,
"logps/rejected": -1.1335278749465942,
"loss": 0.7361,
"nll_loss": 0.6917256116867065,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.010066085495054722,
"rewards/margins": 0.0012691912706941366,
"rewards/rejected": -0.011335276998579502,
"step": 400
},
{
"epoch": 0.20956123117223313,
"eval_log_odds_chosen": 0.27702653408050537,
"eval_log_odds_ratio": -0.6617660522460938,
"eval_logits/chosen": -0.2810555696487427,
"eval_logits/rejected": -0.284096360206604,
"eval_logps/chosen": -1.0014485120773315,
"eval_logps/rejected": -1.2012392282485962,
"eval_loss": 0.6136931777000427,
"eval_nll_loss": 0.6077669858932495,
"eval_rewards/accuracies": 0.6140000224113464,
"eval_rewards/chosen": -0.010014484636485577,
"eval_rewards/margins": 0.001997907180339098,
"eval_rewards/rejected": -0.012012392282485962,
"eval_runtime": 269.6649,
"eval_samples_per_second": 7.413,
"eval_steps_per_second": 0.464,
"step": 400
},
{
"epoch": 0.21480026195153898,
"grad_norm": 0.24070277239270585,
"learning_rate": 6.7227531350357585e-06,
"log_odds_chosen": 0.48085784912109375,
"log_odds_ratio": -0.5908285975456238,
"logits/chosen": -0.27427297830581665,
"logits/rejected": -0.2949586510658264,
"logps/chosen": -0.972133994102478,
"logps/rejected": -1.2912139892578125,
"loss": 0.7185,
"nll_loss": 0.7345937490463257,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.00972133968025446,
"rewards/margins": 0.0031908005475997925,
"rewards/rejected": -0.012912139296531677,
"step": 410
},
{
"epoch": 0.2200392927308448,
"grad_norm": 0.2315925815874692,
"learning_rate": 6.697235430011389e-06,
"log_odds_chosen": 0.2700832486152649,
"log_odds_ratio": -0.6809287667274475,
"logits/chosen": -0.2654404044151306,
"logits/rejected": -0.2597273290157318,
"logps/chosen": -0.9692566990852356,
"logps/rejected": -1.1582214832305908,
"loss": 0.7136,
"nll_loss": 0.6900116801261902,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.009692566469311714,
"rewards/margins": 0.0018896475667133927,
"rewards/rejected": -0.011582214385271072,
"step": 420
},
{
"epoch": 0.22527832351015062,
"grad_norm": 0.1877491685270872,
"learning_rate": 6.670647385233456e-06,
"log_odds_chosen": 0.2508837580680847,
"log_odds_ratio": -0.7000848054885864,
"logits/chosen": -0.2642936706542969,
"logits/rejected": -0.26817911863327026,
"logps/chosen": -1.0234228372573853,
"logps/rejected": -1.2276101112365723,
"loss": 0.7568,
"nll_loss": 0.7772089242935181,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.010234227403998375,
"rewards/margins": 0.002041872590780258,
"rewards/rejected": -0.012276100926101208,
"step": 430
},
{
"epoch": 0.23051735428945644,
"grad_norm": 0.31875972452356033,
"learning_rate": 6.642997901592093e-06,
"log_odds_chosen": 0.15048085153102875,
"log_odds_ratio": -0.703689694404602,
"logits/chosen": -0.2521992325782776,
"logits/rejected": -0.24312739074230194,
"logps/chosen": -1.0188218355178833,
"logps/rejected": -1.1303977966308594,
"loss": 0.7633,
"nll_loss": 0.7371809482574463,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.010188218206167221,
"rewards/margins": 0.0011157591361552477,
"rewards/rejected": -0.01130397617816925,
"step": 440
},
{
"epoch": 0.2357563850687623,
"grad_norm": 0.25010097086107463,
"learning_rate": 6.614296235315736e-06,
"log_odds_chosen": 0.3179778754711151,
"log_odds_ratio": -0.6758134365081787,
"logits/chosen": -0.3085033595561981,
"logits/rejected": -0.3148275315761566,
"logps/chosen": -1.0270577669143677,
"logps/rejected": -1.2768559455871582,
"loss": 0.7733,
"nll_loss": 0.788577675819397,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.010270575992763042,
"rewards/margins": 0.0024979840964078903,
"rewards/rejected": -0.012768561020493507,
"step": 450
},
{
"epoch": 0.2409954158480681,
"grad_norm": 0.2124434061946913,
"learning_rate": 6.584551994872414e-06,
"log_odds_chosen": 0.1259727030992508,
"log_odds_ratio": -0.7334306836128235,
"logits/chosen": -0.28063657879829407,
"logits/rejected": -0.26131364703178406,
"logps/chosen": -1.0509703159332275,
"logps/rejected": -1.145269513130188,
"loss": 0.7295,
"nll_loss": 0.6992601752281189,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.010509702377021313,
"rewards/margins": 0.000942991697229445,
"rewards/rejected": -0.011452694423496723,
"step": 460
},
{
"epoch": 0.24623444662737393,
"grad_norm": 0.21593583192713314,
"learning_rate": 6.553775137753117e-06,
"log_odds_chosen": 0.11253416538238525,
"log_odds_ratio": -0.7634598016738892,
"logits/chosen": -0.28993192315101624,
"logits/rejected": -0.28420716524124146,
"logps/chosen": -1.0563971996307373,
"logps/rejected": -1.1430349349975586,
"loss": 0.7283,
"nll_loss": 0.7201040983200073,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.010563971474766731,
"rewards/margins": 0.0008663767948746681,
"rewards/rejected": -0.0114303482696414,
"step": 470
},
{
"epoch": 0.25147347740667975,
"grad_norm": 0.2598288179745421,
"learning_rate": 6.521975967138322e-06,
"log_odds_chosen": 0.31320375204086304,
"log_odds_ratio": -0.671956479549408,
"logits/chosen": -0.251331627368927,
"logits/rejected": -0.25280171632766724,
"logps/chosen": -1.0279942750930786,
"logps/rejected": -1.2646806240081787,
"loss": 0.7066,
"nll_loss": 0.6571913361549377,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.01027994230389595,
"rewards/margins": 0.0023668636567890644,
"rewards/rejected": -0.012646806426346302,
"step": 480
},
{
"epoch": 0.25671250818598557,
"grad_norm": 0.24565992829114924,
"learning_rate": 6.4891651284487955e-06,
"log_odds_chosen": 0.0688483789563179,
"log_odds_ratio": -0.7365472912788391,
"logits/chosen": -0.27430278062820435,
"logits/rejected": -0.29375168681144714,
"logps/chosen": -1.028810977935791,
"logps/rejected": -1.071062684059143,
"loss": 0.7146,
"nll_loss": 0.7346883416175842,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.010288110002875328,
"rewards/margins": 0.000422515906393528,
"rewards/rejected": -0.01071062684059143,
"step": 490
},
{
"epoch": 0.26195153896529144,
"grad_norm": 0.2353402772652972,
"learning_rate": 6.455353605781819e-06,
"log_odds_chosen": 0.10149893909692764,
"log_odds_ratio": -0.7544985413551331,
"logits/chosen": -0.31464990973472595,
"logits/rejected": -0.2927784025669098,
"logps/chosen": -1.0012602806091309,
"logps/rejected": -1.084865689277649,
"loss": 0.7382,
"nll_loss": 0.7470039129257202,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.010012601502239704,
"rewards/margins": 0.0008360546198673546,
"rewards/rejected": -0.010848656296730042,
"step": 500
},
{
"epoch": 0.26195153896529144,
"eval_log_odds_chosen": 0.2812057435512543,
"eval_log_odds_ratio": -0.6602885127067566,
"eval_logits/chosen": -0.2982476055622101,
"eval_logits/rejected": -0.30226191878318787,
"eval_logps/chosen": -0.9868198037147522,
"eval_logps/rejected": -1.1884440183639526,
"eval_loss": 0.6065535545349121,
"eval_nll_loss": 0.6005536913871765,
"eval_rewards/accuracies": 0.6119999885559082,
"eval_rewards/chosen": -0.009868198074400425,
"eval_rewards/margins": 0.0020162416622042656,
"eval_rewards/rejected": -0.01188443973660469,
"eval_runtime": 268.8732,
"eval_samples_per_second": 7.435,
"eval_steps_per_second": 0.465,
"step": 500
},
{
"epoch": 0.26719056974459726,
"grad_norm": 0.27957826390810075,
"learning_rate": 6.420552718234041e-06,
"log_odds_chosen": 0.2998487651348114,
"log_odds_ratio": -0.679768443107605,
"logits/chosen": -0.30905863642692566,
"logits/rejected": -0.32363763451576233,
"logps/chosen": -0.9874080419540405,
"logps/rejected": -1.2168712615966797,
"loss": 0.7884,
"nll_loss": 0.7535444498062134,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.009874081239104271,
"rewards/margins": 0.002294632140547037,
"rewards/rejected": -0.01216871291399002,
"step": 510
},
{
"epoch": 0.2724296005239031,
"grad_norm": 0.2531949493918585,
"learning_rate": 6.384774116112176e-06,
"log_odds_chosen": 0.24868044257164001,
"log_odds_ratio": -0.6564691066741943,
"logits/chosen": -0.2581290304660797,
"logits/rejected": -0.27538132667541504,
"logps/chosen": -0.9462459683418274,
"logps/rejected": -1.1043087244033813,
"loss": 0.7477,
"nll_loss": 0.7717889547348022,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.00946245901286602,
"rewards/margins": 0.0015806274022907019,
"rewards/rejected": -0.011043086647987366,
"step": 520
},
{
"epoch": 0.2776686313032089,
"grad_norm": 0.2457887466034346,
"learning_rate": 6.348029777032831e-06,
"log_odds_chosen": 0.16035327315330505,
"log_odds_ratio": -0.738000750541687,
"logits/chosen": -0.2864713668823242,
"logits/rejected": -0.2864418029785156,
"logps/chosen": -1.08033287525177,
"logps/rejected": -1.2031466960906982,
"loss": 0.7423,
"nll_loss": 0.7512191534042358,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.010803327895700932,
"rewards/margins": 0.0012281389208510518,
"rewards/rejected": -0.012031466700136662,
"step": 530
},
{
"epoch": 0.2829076620825147,
"grad_norm": 0.20086958833949786,
"learning_rate": 6.310332001912748e-06,
"log_odds_chosen": 0.21620038151741028,
"log_odds_ratio": -0.7137210369110107,
"logits/chosen": -0.273650199174881,
"logits/rejected": -0.2783169150352478,
"logps/chosen": -0.9874860644340515,
"logps/rejected": -1.1352880001068115,
"loss": 0.7143,
"nll_loss": 0.6813681721687317,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.00987485982477665,
"rewards/margins": 0.0014780184719711542,
"rewards/rejected": -0.011352878995239735,
"step": 540
},
{
"epoch": 0.28814669286182054,
"grad_norm": 0.24303066124311232,
"learning_rate": 6.27169341085083e-06,
"log_odds_chosen": 0.22751787304878235,
"log_odds_ratio": -0.6631879210472107,
"logits/chosen": -0.27736982703208923,
"logits/rejected": -0.2964634299278259,
"logps/chosen": -0.9909104108810425,
"logps/rejected": -1.1427714824676514,
"loss": 0.729,
"nll_loss": 0.6827758550643921,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.00990910455584526,
"rewards/margins": 0.0015186098171398044,
"rewards/rejected": -0.011427713558077812,
"step": 550
},
{
"epoch": 0.2933857236411264,
"grad_norm": 0.25050011460137184,
"learning_rate": 6.232126938903292e-06,
"log_odds_chosen": 0.32530641555786133,
"log_odds_ratio": -0.6435776352882385,
"logits/chosen": -0.2863444983959198,
"logits/rejected": -0.2765114903450012,
"logps/chosen": -0.9766008257865906,
"logps/rejected": -1.2149832248687744,
"loss": 0.7096,
"nll_loss": 0.689509928226471,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.009766008704900742,
"rewards/margins": 0.0023838237393647432,
"rewards/rejected": -0.012149832211434841,
"step": 560
},
{
"epoch": 0.29862475442043224,
"grad_norm": 0.21891859191151827,
"learning_rate": 6.191645831753405e-06,
"log_odds_chosen": 0.38149961829185486,
"log_odds_ratio": -0.6493080854415894,
"logits/chosen": -0.2663540244102478,
"logits/rejected": -0.29600009322166443,
"logps/chosen": -1.027773380279541,
"logps/rejected": -1.317977786064148,
"loss": 0.7722,
"nll_loss": 0.8025256395339966,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.010277734138071537,
"rewards/margins": 0.0029020446818321943,
"rewards/rejected": -0.013179777190089226,
"step": 570
},
{
"epoch": 0.30386378519973806,
"grad_norm": 0.22254143966893244,
"learning_rate": 6.150263641277216e-06,
"log_odds_chosen": 0.3161838948726654,
"log_odds_ratio": -0.6515612602233887,
"logits/chosen": -0.24465902149677277,
"logits/rejected": -0.26124146580696106,
"logps/chosen": -0.9947541952133179,
"logps/rejected": -1.1926690340042114,
"loss": 0.7212,
"nll_loss": 0.7128957509994507,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.009947540238499641,
"rewards/margins": 0.0019791489467024803,
"rewards/rejected": -0.011926690116524696,
"step": 580
},
{
"epoch": 0.3091028159790439,
"grad_norm": 0.22077450440144003,
"learning_rate": 6.107994221006794e-06,
"log_odds_chosen": 0.31419774889945984,
"log_odds_ratio": -0.6574016213417053,
"logits/chosen": -0.22359177470207214,
"logits/rejected": -0.23112180829048157,
"logps/chosen": -0.9491780996322632,
"logps/rejected": -1.1917085647583008,
"loss": 0.6984,
"nll_loss": 0.6670723557472229,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.009491780772805214,
"rewards/margins": 0.0024253041483461857,
"rewards/rejected": -0.011917085386812687,
"step": 590
},
{
"epoch": 0.3143418467583497,
"grad_norm": 0.251407676620302,
"learning_rate": 6.064851721492469e-06,
"log_odds_chosen": 0.34587010741233826,
"log_odds_ratio": -0.6531810760498047,
"logits/chosen": -0.2380281388759613,
"logits/rejected": -0.2737070322036743,
"logps/chosen": -0.965602695941925,
"logps/rejected": -1.2065523862838745,
"loss": 0.7339,
"nll_loss": 0.7357583045959473,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.009656025096774101,
"rewards/margins": 0.00240949634462595,
"rewards/rejected": -0.012065522372722626,
"step": 600
},
{
"epoch": 0.3143418467583497,
"eval_log_odds_chosen": 0.28590938448905945,
"eval_log_odds_ratio": -0.6587470769882202,
"eval_logits/chosen": -0.24898304045200348,
"eval_logits/rejected": -0.2544224262237549,
"eval_logps/chosen": -0.9714497923851013,
"eval_logps/rejected": -1.1750952005386353,
"eval_loss": 0.6008986830711365,
"eval_nll_loss": 0.5948084592819214,
"eval_rewards/accuracies": 0.6100000143051147,
"eval_rewards/chosen": -0.009714496321976185,
"eval_rewards/margins": 0.0020364541560411453,
"eval_rewards/rejected": -0.011750951409339905,
"eval_runtime": 269.4545,
"eval_samples_per_second": 7.419,
"eval_steps_per_second": 0.464,
"step": 600
},
{
"epoch": 0.3195808775376555,
"grad_norm": 0.19175608334969543,
"learning_rate": 6.0208505855656546e-06,
"log_odds_chosen": 0.07260292023420334,
"log_odds_ratio": -0.7751378417015076,
"logits/chosen": -0.25565439462661743,
"logits/rejected": -0.2717348337173462,
"logps/chosen": -1.075731635093689,
"logps/rejected": -1.1411268711090088,
"loss": 0.7227,
"nll_loss": 0.7402042150497437,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.010757316835224628,
"rewards/margins": 0.0006539515452459455,
"rewards/rejected": -0.011411268264055252,
"step": 610
},
{
"epoch": 0.32481990831696134,
"grad_norm": 0.2578615657242334,
"learning_rate": 5.976005543503809e-06,
"log_odds_chosen": 0.1597108542919159,
"log_odds_ratio": -0.7176756858825684,
"logits/chosen": -0.25491657853126526,
"logits/rejected": -0.24470999836921692,
"logps/chosen": -0.9926729202270508,
"logps/rejected": -1.1365479230880737,
"loss": 0.6902,
"nll_loss": 0.69666588306427,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.009926728904247284,
"rewards/margins": 0.0014387497212737799,
"rewards/rejected": -0.011365478858351707,
"step": 620
},
{
"epoch": 0.3300589390962672,
"grad_norm": 0.2633633043041551,
"learning_rate": 5.930331608099176e-06,
"log_odds_chosen": 0.3888145685195923,
"log_odds_ratio": -0.6442294716835022,
"logits/chosen": -0.2764403223991394,
"logits/rejected": -0.287002295255661,
"logps/chosen": -0.9268707036972046,
"logps/rejected": -1.1958353519439697,
"loss": 0.7199,
"nll_loss": 0.7196196913719177,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.00926870759576559,
"rewards/margins": 0.0026896463241428137,
"rewards/rejected": -0.011958354152739048,
"step": 630
},
{
"epoch": 0.33529796987557303,
"grad_norm": 0.21089837015837296,
"learning_rate": 5.88384406963295e-06,
"log_odds_chosen": 0.146010160446167,
"log_odds_ratio": -0.7168859839439392,
"logits/chosen": -0.26726484298706055,
"logits/rejected": -0.25664839148521423,
"logps/chosen": -1.0535497665405273,
"logps/rejected": -1.1466712951660156,
"loss": 0.7069,
"nll_loss": 0.6863896250724792,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.010535497218370438,
"rewards/margins": 0.000931215938180685,
"rewards/rejected": -0.011466712690889835,
"step": 640
},
{
"epoch": 0.34053700065487885,
"grad_norm": 0.19092936543303227,
"learning_rate": 5.836558490756538e-06,
"log_odds_chosen": 0.15154734253883362,
"log_odds_ratio": -0.7028461694717407,
"logits/chosen": -0.3015301823616028,
"logits/rejected": -0.29784873127937317,
"logps/chosen": -1.0319565534591675,
"logps/rejected": -1.1262956857681274,
"loss": 0.6908,
"nll_loss": 0.714655876159668,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.010319565422832966,
"rewards/margins": 0.000943391933105886,
"rewards/rejected": -0.011262957938015461,
"step": 650
},
{
"epoch": 0.34577603143418467,
"grad_norm": 0.26193122829170745,
"learning_rate": 5.788490701281647e-06,
"log_odds_chosen": 0.16511467099189758,
"log_odds_ratio": -0.6980458498001099,
"logits/chosen": -0.26230692863464355,
"logits/rejected": -0.2723314166069031,
"logps/chosen": -1.024036169052124,
"logps/rejected": -1.1353824138641357,
"loss": 0.7383,
"nll_loss": 0.7427499890327454,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.010240362025797367,
"rewards/margins": 0.001113462378270924,
"rewards/rejected": -0.011353823356330395,
"step": 660
},
{
"epoch": 0.3510150622134905,
"grad_norm": 0.23994917813654507,
"learning_rate": 5.739656792880934e-06,
"log_odds_chosen": 0.1396145075559616,
"log_odds_ratio": -0.7353970408439636,
"logits/chosen": -0.23678474128246307,
"logits/rejected": -0.22982017695903778,
"logps/chosen": -1.0073795318603516,
"logps/rejected": -1.132542371749878,
"loss": 0.7342,
"nll_loss": 0.7146428823471069,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.010073795914649963,
"rewards/margins": 0.0012516285059973598,
"rewards/rejected": -0.01132542360574007,
"step": 670
},
{
"epoch": 0.3562540929927963,
"grad_norm": 0.20221234021881487,
"learning_rate": 5.6900731137009834e-06,
"log_odds_chosen": 0.2808675765991211,
"log_odds_ratio": -0.6466277837753296,
"logits/chosen": -0.23304986953735352,
"logits/rejected": -0.24880293011665344,
"logps/chosen": -0.9442907571792603,
"logps/rejected": -1.1252390146255493,
"loss": 0.6538,
"nll_loss": 0.6462209224700928,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.009442906826734543,
"rewards/margins": 0.0018094830447807908,
"rewards/rejected": -0.011252389289438725,
"step": 680
},
{
"epoch": 0.3614931237721022,
"grad_norm": 0.21729386041077234,
"learning_rate": 5.639756262889441e-06,
"log_odds_chosen": 0.29118528962135315,
"log_odds_ratio": -0.6446735262870789,
"logits/chosen": -0.2575289011001587,
"logits/rejected": -0.2800835072994232,
"logps/chosen": -0.9227706789970398,
"logps/rejected": -1.119728684425354,
"loss": 0.7375,
"nll_loss": 0.7325758337974548,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.009227706119418144,
"rewards/margins": 0.0019695800729095936,
"rewards/rejected": -0.011197286657989025,
"step": 690
},
{
"epoch": 0.366732154551408,
"grad_norm": 0.24056294333924544,
"learning_rate": 5.588723085038102e-06,
"log_odds_chosen": 0.14757606387138367,
"log_odds_ratio": -0.7077519297599792,
"logits/chosen": -0.23453739285469055,
"logits/rejected": -0.24059641361236572,
"logps/chosen": -0.9751278758049011,
"logps/rejected": -1.0833594799041748,
"loss": 0.7133,
"nll_loss": 0.7068762183189392,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.009751277044415474,
"rewards/margins": 0.001082315924577415,
"rewards/rejected": -0.010833594016730785,
"step": 700
},
{
"epoch": 0.366732154551408,
"eval_log_odds_chosen": 0.2828400731086731,
"eval_log_odds_ratio": -0.6590429544448853,
"eval_logits/chosen": -0.2763667702674866,
"eval_logits/rejected": -0.2830006182193756,
"eval_logps/chosen": -0.95875483751297,
"eval_logps/rejected": -1.1589833498001099,
"eval_loss": 0.5967572331428528,
"eval_nll_loss": 0.5905880331993103,
"eval_rewards/accuracies": 0.6069999933242798,
"eval_rewards/chosen": -0.009587547741830349,
"eval_rewards/margins": 0.002002286957576871,
"eval_rewards/rejected": -0.011589834466576576,
"eval_runtime": 270.3826,
"eval_samples_per_second": 7.393,
"eval_steps_per_second": 0.462,
"step": 700
},
{
"epoch": 0.3719711853307138,
"grad_norm": 0.25075392464158525,
"learning_rate": 5.536990664543849e-06,
"log_odds_chosen": 0.25649288296699524,
"log_odds_ratio": -0.6813511252403259,
"logits/chosen": -0.26890888810157776,
"logits/rejected": -0.2766120135784149,
"logps/chosen": -0.9498542547225952,
"logps/rejected": -1.1336113214492798,
"loss": 0.7356,
"nll_loss": 0.7913224697113037,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.009498542174696922,
"rewards/margins": 0.0018375713843852282,
"rewards/rejected": -0.011336112394928932,
"step": 710
},
{
"epoch": 0.37721021611001965,
"grad_norm": 0.2560360768330474,
"learning_rate": 5.484576319889293e-06,
"log_odds_chosen": 0.35988372564315796,
"log_odds_ratio": -0.6421231031417847,
"logits/chosen": -0.2358725517988205,
"logits/rejected": -0.2683177888393402,
"logps/chosen": -0.9860288500785828,
"logps/rejected": -1.2582954168319702,
"loss": 0.6986,
"nll_loss": 0.7215532064437866,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.00986028928309679,
"rewards/margins": 0.0027226645033806562,
"rewards/rejected": -0.012582953087985516,
"step": 720
},
{
"epoch": 0.38244924688932547,
"grad_norm": 0.2389570992848936,
"learning_rate": 5.4314975978450645e-06,
"log_odds_chosen": 0.24539212882518768,
"log_odds_ratio": -0.666843831539154,
"logits/chosen": -0.23044565320014954,
"logits/rejected": -0.257163405418396,
"logps/chosen": -0.987472653388977,
"logps/rejected": -1.1630165576934814,
"loss": 0.724,
"nll_loss": 0.7047569751739502,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.009874725714325905,
"rewards/margins": 0.001755438162945211,
"rewards/rejected": -0.011630164459347725,
"step": 730
},
{
"epoch": 0.3876882776686313,
"grad_norm": 0.23349735339471042,
"learning_rate": 5.377772267595671e-06,
"log_odds_chosen": 0.1265036016702652,
"log_odds_ratio": -0.7426053881645203,
"logits/chosen": -0.22161659598350525,
"logits/rejected": -0.20148198306560516,
"logps/chosen": -0.987158477306366,
"logps/rejected": -1.084160566329956,
"loss": 0.6558,
"nll_loss": 0.6827282905578613,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.009871585294604301,
"rewards/margins": 0.0009700207156129181,
"rewards/rejected": -0.010841606184840202,
"step": 740
},
{
"epoch": 0.3929273084479371,
"grad_norm": 0.24740744827014474,
"learning_rate": 5.323418314790902e-06,
"log_odds_chosen": 0.180901437997818,
"log_odds_ratio": -0.6792001724243164,
"logits/chosen": -0.22566553950309753,
"logits/rejected": -0.2513706088066101,
"logps/chosen": -1.010834813117981,
"logps/rejected": -1.1412070989608765,
"loss": 0.7466,
"nll_loss": 0.7286490201950073,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.010108347050845623,
"rewards/margins": 0.001303724362514913,
"rewards/rejected": -0.01141207106411457,
"step": 750
},
{
"epoch": 0.398166339227243,
"grad_norm": 0.24398794037042804,
"learning_rate": 5.268453935524767e-06,
"log_odds_chosen": 0.27628079056739807,
"log_odds_ratio": -0.6668368577957153,
"logits/chosen": -0.23829717934131622,
"logits/rejected": -0.26499801874160767,
"logps/chosen": -1.0213868618011475,
"logps/rejected": -1.2219724655151367,
"loss": 0.6748,
"nll_loss": 0.6636958122253418,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.010213869623839855,
"rewards/margins": 0.0020058555528521538,
"rewards/rejected": -0.012219725176692009,
"step": 760
},
{
"epoch": 0.4034053700065488,
"grad_norm": 0.23509527899222843,
"learning_rate": 5.212897530243978e-06,
"log_odds_chosen": 0.29617372155189514,
"log_odds_ratio": -0.6592302322387695,
"logits/chosen": -0.2393266260623932,
"logits/rejected": -0.2519669830799103,
"logps/chosen": -0.9821060299873352,
"logps/rejected": -1.1928421258926392,
"loss": 0.7028,
"nll_loss": 0.7175225019454956,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.009821060113608837,
"rewards/margins": 0.002107360865920782,
"rewards/rejected": -0.011928422376513481,
"step": 770
},
{
"epoch": 0.4086444007858546,
"grad_norm": 0.23394947702424512,
"learning_rate": 5.156767697588029e-06,
"log_odds_chosen": 0.3425753116607666,
"log_odds_ratio": -0.6615421772003174,
"logits/chosen": -0.2409038543701172,
"logits/rejected": -0.2574775815010071,
"logps/chosen": -0.9479767084121704,
"logps/rejected": -1.1898690462112427,
"loss": 0.6941,
"nll_loss": 0.657213568687439,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.0094797657802701,
"rewards/margins": 0.002418924355879426,
"rewards/rejected": -0.011898690834641457,
"step": 780
},
{
"epoch": 0.41388343156516044,
"grad_norm": 0.21833827164567227,
"learning_rate": 5.100083228162918e-06,
"log_odds_chosen": 0.24498343467712402,
"log_odds_ratio": -0.6762871742248535,
"logits/chosen": -0.275887668132782,
"logits/rejected": -0.30585306882858276,
"logps/chosen": -0.9832944869995117,
"logps/rejected": -1.1611313819885254,
"loss": 0.7435,
"nll_loss": 0.7558133006095886,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.00983294378966093,
"rewards/margins": 0.001778370002284646,
"rewards/rejected": -0.011611313559114933,
"step": 790
},
{
"epoch": 0.41912246234446626,
"grad_norm": 0.23630257510340968,
"learning_rate": 5.042863098250613e-06,
"log_odds_chosen": 0.3097684979438782,
"log_odds_ratio": -0.669011652469635,
"logits/chosen": -0.2526930868625641,
"logits/rejected": -0.2613184154033661,
"logps/chosen": -0.975333034992218,
"logps/rejected": -1.203438401222229,
"loss": 0.6988,
"nll_loss": 0.6705261468887329,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.009753329679369926,
"rewards/margins": 0.002281052526086569,
"rewards/rejected": -0.012034382671117783,
"step": 800
},
{
"epoch": 0.41912246234446626,
"eval_log_odds_chosen": 0.2897730767726898,
"eval_log_odds_ratio": -0.6575716137886047,
"eval_logits/chosen": -0.27449238300323486,
"eval_logits/rejected": -0.28172942996025085,
"eval_logps/chosen": -0.9450584053993225,
"eval_logps/rejected": -1.1490689516067505,
"eval_loss": 0.5925648212432861,
"eval_nll_loss": 0.5863717198371887,
"eval_rewards/accuracies": 0.6069999933242798,
"eval_rewards/chosen": -0.009450582787394524,
"eval_rewards/margins": 0.0020401068031787872,
"eval_rewards/rejected": -0.011490690521895885,
"eval_runtime": 282.9062,
"eval_samples_per_second": 7.066,
"eval_steps_per_second": 0.442,
"step": 800
},
{
"epoch": 0.4243614931237721,
"grad_norm": 0.2488306854414885,
"learning_rate": 4.98512646345635e-06,
"log_odds_chosen": 0.28641897439956665,
"log_odds_ratio": -0.6547825336456299,
"logits/chosen": -0.22206516563892365,
"logits/rejected": -0.258362352848053,
"logps/chosen": -0.9418247938156128,
"logps/rejected": -1.1379072666168213,
"loss": 0.7665,
"nll_loss": 0.7691252827644348,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.009418248198926449,
"rewards/margins": 0.0019608228467404842,
"rewards/rejected": -0.011379070580005646,
"step": 810
},
{
"epoch": 0.42960052390307796,
"grad_norm": 0.23185879725371805,
"learning_rate": 4.92689265229591e-06,
"log_odds_chosen": 0.13744059205055237,
"log_odds_ratio": -0.7185770273208618,
"logits/chosen": -0.2039022445678711,
"logits/rejected": -0.19484642148017883,
"logps/chosen": -0.8942493200302124,
"logps/rejected": -0.9868205189704895,
"loss": 0.6985,
"nll_loss": 0.6817600727081299,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.008942494168877602,
"rewards/margins": 0.0009257116471417248,
"rewards/rejected": -0.009868205524981022,
"step": 820
},
{
"epoch": 0.4348395546823838,
"grad_norm": 0.26066946910165784,
"learning_rate": 4.8681811597249986e-06,
"log_odds_chosen": 0.31899920105934143,
"log_odds_ratio": -0.6595107316970825,
"logits/chosen": -0.2524524927139282,
"logits/rejected": -0.26533645391464233,
"logps/chosen": -0.9564129114151001,
"logps/rejected": -1.1792023181915283,
"loss": 0.7289,
"nll_loss": 0.736221194267273,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.009564128704369068,
"rewards/margins": 0.0022278937976807356,
"rewards/rejected": -0.011792022734880447,
"step": 830
},
{
"epoch": 0.4400785854616896,
"grad_norm": 0.2781863509449919,
"learning_rate": 4.80901164061291e-06,
"log_odds_chosen": 0.2759079933166504,
"log_odds_ratio": -0.6593595743179321,
"logits/chosen": -0.24136073887348175,
"logits/rejected": -0.24197664856910706,
"logps/chosen": -0.949256420135498,
"logps/rejected": -1.1427079439163208,
"loss": 0.7372,
"nll_loss": 0.6998056173324585,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.009492563083767891,
"rewards/margins": 0.001934514963068068,
"rewards/rejected": -0.011427078396081924,
"step": 840
},
{
"epoch": 0.4453176162409954,
"grad_norm": 0.24998305046902874,
"learning_rate": 4.7494039031626685e-06,
"log_odds_chosen": 0.20654296875,
"log_odds_ratio": -0.6940165162086487,
"logits/chosen": -0.23618969321250916,
"logits/rejected": -0.26105183362960815,
"logps/chosen": -0.949593186378479,
"logps/rejected": -1.0937670469284058,
"loss": 0.7206,
"nll_loss": 0.7645503282546997,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.0094959307461977,
"rewards/margins": 0.0014417401980608702,
"rewards/rejected": -0.01093767024576664,
"step": 850
},
{
"epoch": 0.45055664702030124,
"grad_norm": 0.20308732647543407,
"learning_rate": 4.689377902279818e-06,
"log_odds_chosen": 0.12388608604669571,
"log_odds_ratio": -0.7273297309875488,
"logits/chosen": -0.2292974442243576,
"logits/rejected": -0.22320961952209473,
"logps/chosen": -1.0063588619232178,
"logps/rejected": -1.0858452320098877,
"loss": 0.7091,
"nll_loss": 0.7223267555236816,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.010063587687909603,
"rewards/margins": 0.0007948653656058013,
"rewards/rejected": -0.010858452878892422,
"step": 860
},
{
"epoch": 0.45579567779960706,
"grad_norm": 0.3144056888989078,
"learning_rate": 4.628953732892118e-06,
"log_odds_chosen": 0.3134177327156067,
"log_odds_ratio": -0.6562970876693726,
"logits/chosen": -0.23619429767131805,
"logits/rejected": -0.25046223402023315,
"logps/chosen": -0.9373693466186523,
"logps/rejected": -1.1769232749938965,
"loss": 0.7338,
"nll_loss": 0.7179974913597107,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.009373693726956844,
"rewards/margins": 0.0023955402430146933,
"rewards/rejected": -0.011769233271479607,
"step": 870
},
{
"epoch": 0.4610347085789129,
"grad_norm": 0.20617728810621005,
"learning_rate": 4.568151623222352e-06,
"log_odds_chosen": 0.2670097351074219,
"log_odds_ratio": -0.6971144676208496,
"logits/chosen": -0.22974228858947754,
"logits/rejected": -0.25060245394706726,
"logps/chosen": -0.9138998985290527,
"logps/rejected": -1.1040019989013672,
"loss": 0.7077,
"nll_loss": 0.6301047801971436,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.009138999506831169,
"rewards/margins": 0.001901020877994597,
"rewards/rejected": -0.011040019802749157,
"step": 880
},
{
"epoch": 0.46627373935821875,
"grad_norm": 0.2652827403078759,
"learning_rate": 4.50699192801652e-06,
"log_odds_chosen": 0.3137222230434418,
"log_odds_ratio": -0.6815677285194397,
"logits/chosen": -0.26133638620376587,
"logits/rejected": -0.28535357117652893,
"logps/chosen": -0.9835060238838196,
"logps/rejected": -1.2045724391937256,
"loss": 0.7136,
"nll_loss": 0.7360855937004089,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.009835059754550457,
"rewards/margins": 0.0022106640972197056,
"rewards/rejected": -0.01204572431743145,
"step": 890
},
{
"epoch": 0.4715127701375246,
"grad_norm": 0.3207317475817683,
"learning_rate": 4.445495121729673e-06,
"log_odds_chosen": 0.2501332759857178,
"log_odds_ratio": -0.6778159141540527,
"logits/chosen": -0.24674773216247559,
"logits/rejected": -0.25447121262550354,
"logps/chosen": -0.9550365209579468,
"logps/rejected": -1.1225982904434204,
"loss": 0.7493,
"nll_loss": 0.70607590675354,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.009550364688038826,
"rewards/margins": 0.0016756162513047457,
"rewards/rejected": -0.011225981637835503,
"step": 900
},
{
"epoch": 0.4715127701375246,
"eval_log_odds_chosen": 0.2952045500278473,
"eval_log_odds_ratio": -0.6552044749259949,
"eval_logits/chosen": -0.2476220428943634,
"eval_logits/rejected": -0.2546870708465576,
"eval_logps/chosen": -0.9300667643547058,
"eval_logps/rejected": -1.1357324123382568,
"eval_loss": 0.5882026553153992,
"eval_nll_loss": 0.5820150971412659,
"eval_rewards/accuracies": 0.6079999804496765,
"eval_rewards/chosen": -0.009300666861236095,
"eval_rewards/margins": 0.002056657336652279,
"eval_rewards/rejected": -0.0113573232665658,
"eval_runtime": 272.7809,
"eval_samples_per_second": 7.328,
"eval_steps_per_second": 0.458,
"step": 900
},
{
"epoch": 0.4767518009168304,
"grad_norm": 0.23100558629186949,
"learning_rate": 4.3836817916716655e-06,
"log_odds_chosen": 0.33713942766189575,
"log_odds_ratio": -0.6442294120788574,
"logits/chosen": -0.25310784578323364,
"logits/rejected": -0.24791720509529114,
"logps/chosen": -0.913447380065918,
"logps/rejected": -1.1374478340148926,
"loss": 0.7021,
"nll_loss": 0.7241432666778564,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.009134473279118538,
"rewards/margins": 0.002240004250779748,
"rewards/rejected": -0.011374477297067642,
"step": 910
},
{
"epoch": 0.4819908316961362,
"grad_norm": 0.27311828272573985,
"learning_rate": 4.3215726311151454e-06,
"log_odds_chosen": 0.31479746103286743,
"log_odds_ratio": -0.6612164378166199,
"logits/chosen": -0.22212114930152893,
"logits/rejected": -0.23604507744312286,
"logps/chosen": -0.9464074969291687,
"logps/rejected": -1.160541296005249,
"loss": 0.7189,
"nll_loss": 0.7034357190132141,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.009464074857532978,
"rewards/margins": 0.0021413397043943405,
"rewards/rejected": -0.011605414561927319,
"step": 920
},
{
"epoch": 0.48722986247544203,
"grad_norm": 0.2251073682511537,
"learning_rate": 4.259188432368047e-06,
"log_odds_chosen": 0.2062271535396576,
"log_odds_ratio": -0.712655782699585,
"logits/chosen": -0.22355704009532928,
"logits/rejected": -0.21783855557441711,
"logps/chosen": -0.929416835308075,
"logps/rejected": -1.0706582069396973,
"loss": 0.7098,
"nll_loss": 0.6941349506378174,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.009294168092310429,
"rewards/margins": 0.0014124134322628379,
"rewards/rejected": -0.010706582106649876,
"step": 930
},
{
"epoch": 0.49246889325474785,
"grad_norm": 0.2896044869891697,
"learning_rate": 4.196550079812947e-06,
"log_odds_chosen": 0.14942023158073425,
"log_odds_ratio": -0.7291346788406372,
"logits/chosen": -0.20475438237190247,
"logits/rejected": -0.21312706172466278,
"logps/chosen": -0.9276836514472961,
"logps/rejected": -1.035669207572937,
"loss": 0.673,
"nll_loss": 0.6321808099746704,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.009276836179196835,
"rewards/margins": 0.0010798568837344646,
"rewards/rejected": -0.010356692597270012,
"step": 940
},
{
"epoch": 0.4977079240340537,
"grad_norm": 0.20608437650972947,
"learning_rate": 4.133678542915596e-06,
"log_odds_chosen": 0.21366234123706818,
"log_odds_ratio": -0.7176351547241211,
"logits/chosen": -0.1843259632587433,
"logits/rejected": -0.19231656193733215,
"logps/chosen": -0.9370969533920288,
"logps/rejected": -1.0868377685546875,
"loss": 0.7357,
"nll_loss": 0.689740777015686,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.009370969608426094,
"rewards/margins": 0.001497406861744821,
"rewards/rejected": -0.01086837612092495,
"step": 950
},
{
"epoch": 0.5029469548133595,
"grad_norm": 0.20440659461177407,
"learning_rate": 4.070594869204954e-06,
"log_odds_chosen": 0.31676673889160156,
"log_odds_ratio": -0.653446614742279,
"logits/chosen": -0.2184230387210846,
"logits/rejected": -0.232182115316391,
"logps/chosen": -0.8973399996757507,
"logps/rejected": -1.1070184707641602,
"loss": 0.6963,
"nll_loss": 0.6926812529563904,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.008973398245871067,
"rewards/margins": 0.0020967856980860233,
"rewards/rejected": -0.011070184409618378,
"step": 960
},
{
"epoch": 0.5081859855926654,
"grad_norm": 0.23363856927616253,
"learning_rate": 4.007320177227106e-06,
"log_odds_chosen": 0.18765881657600403,
"log_odds_ratio": -0.7033464908599854,
"logits/chosen": -0.22513191401958466,
"logits/rejected": -0.2241486757993698,
"logps/chosen": -0.9618891477584839,
"logps/rejected": -1.090888261795044,
"loss": 0.7067,
"nll_loss": 0.6825166940689087,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.009618892334401608,
"rewards/margins": 0.001289989217184484,
"rewards/rejected": -0.010908881202340126,
"step": 970
},
{
"epoch": 0.5134250163719711,
"grad_norm": 0.22410878238276724,
"learning_rate": 3.943875649475397e-06,
"log_odds_chosen": 0.2351769208908081,
"log_odds_ratio": -0.6929172277450562,
"logits/chosen": -0.23800428211688995,
"logits/rejected": -0.22524037957191467,
"logps/chosen": -0.9266209602355957,
"logps/rejected": -1.0913342237472534,
"loss": 0.7062,
"nll_loss": 0.7125518918037415,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.009266209788620472,
"rewards/margins": 0.0016471326816827059,
"rewards/rejected": -0.010913342237472534,
"step": 980
},
{
"epoch": 0.518664047151277,
"grad_norm": 0.2601248391316997,
"learning_rate": 3.880282525299161e-06,
"log_odds_chosen": 0.26979511976242065,
"log_odds_ratio": -0.6584133505821228,
"logits/chosen": -0.2896724343299866,
"logits/rejected": -0.27878767251968384,
"logps/chosen": -0.9138110280036926,
"logps/rejected": -1.0914112329483032,
"loss": 0.7303,
"nll_loss": 0.7162944078445435,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.009138109162449837,
"rewards/margins": 0.0017760014161467552,
"rewards/rejected": -0.010914110578596592,
"step": 990
},
{
"epoch": 0.5239030779305829,
"grad_norm": 0.2886836538722393,
"learning_rate": 3.816562093793414e-06,
"log_odds_chosen": 0.33312270045280457,
"log_odds_ratio": -0.675898551940918,
"logits/chosen": -0.23547939956188202,
"logits/rejected": -0.27134275436401367,
"logps/chosen": -0.9631227254867554,
"logps/rejected": -1.1864663362503052,
"loss": 0.7022,
"nll_loss": 0.701551079750061,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.00963122770190239,
"rewards/margins": 0.002233435632660985,
"rewards/rejected": -0.011864664033055305,
"step": 1000
},
{
"epoch": 0.5239030779305829,
"eval_log_odds_chosen": 0.2961971163749695,
"eval_log_odds_ratio": -0.6568813323974609,
"eval_logits/chosen": -0.25139424204826355,
"eval_logits/rejected": -0.25877639651298523,
"eval_logps/chosen": -0.9089908599853516,
"eval_logps/rejected": -1.110971450805664,
"eval_loss": 0.5842349529266357,
"eval_nll_loss": 0.5780314803123474,
"eval_rewards/accuracies": 0.6069999933242798,
"eval_rewards/chosen": -0.009089908562600613,
"eval_rewards/margins": 0.0020198060665279627,
"eval_rewards/rejected": -0.011109714396297932,
"eval_runtime": 268.1596,
"eval_samples_per_second": 7.455,
"eval_steps_per_second": 0.466,
"step": 1000
},
{
"epoch": 0.5291421087098886,
"grad_norm": 0.2409996471560838,
"learning_rate": 3.7527356866718955e-06,
"log_odds_chosen": 0.16977646946907043,
"log_odds_ratio": -0.718795120716095,
"logits/chosen": -0.234476238489151,
"logits/rejected": -0.24295465648174286,
"logps/chosen": -1.0260999202728271,
"logps/rejected": -1.149409532546997,
"loss": 0.7043,
"nll_loss": 0.6979072093963623,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.01026100106537342,
"rewards/margins": 0.0012330941390246153,
"rewards/rejected": -0.011494094505906105,
"step": 1010
},
{
"epoch": 0.5343811394891945,
"grad_norm": 0.24739341420079547,
"learning_rate": 3.6888246711258453e-06,
"log_odds_chosen": 0.36255502700805664,
"log_odds_ratio": -0.6197658777236938,
"logits/chosen": -0.24051399528980255,
"logits/rejected": -0.26482734084129333,
"logps/chosen": -0.875298798084259,
"logps/rejected": -1.1210336685180664,
"loss": 0.7153,
"nll_loss": 0.7133646607398987,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.008752988651394844,
"rewards/margins": 0.0024573481641709805,
"rewards/rejected": -0.011210335418581963,
"step": 1020
},
{
"epoch": 0.5396201702685003,
"grad_norm": 0.3154631649103451,
"learning_rate": 3.6248504426708986e-06,
"log_odds_chosen": 0.21956510841846466,
"log_odds_ratio": -0.693530261516571,
"logits/chosen": -0.2142527848482132,
"logits/rejected": -0.2322002351284027,
"logps/chosen": -1.004662275314331,
"logps/rejected": -1.155106782913208,
"loss": 0.7213,
"nll_loss": 0.7184774875640869,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.010046622715890408,
"rewards/margins": 0.0015044447500258684,
"rewards/rejected": -0.011551067233085632,
"step": 1030
},
{
"epoch": 0.5448592010478062,
"grad_norm": 0.3984212728138088,
"learning_rate": 3.5608344179844997e-06,
"log_odds_chosen": 0.07040030509233475,
"log_odds_ratio": -0.7465513348579407,
"logits/chosen": -0.26399117708206177,
"logits/rejected": -0.26066404581069946,
"logps/chosen": -0.9947193264961243,
"logps/rejected": -1.038916826248169,
"loss": 0.7643,
"nll_loss": 0.8070164918899536,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.009947193786501884,
"rewards/margins": 0.0004419738834258169,
"rewards/rejected": -0.010389168746769428,
"step": 1040
},
{
"epoch": 0.550098231827112,
"grad_norm": 0.24683028263597967,
"learning_rate": 3.4967980277362333e-06,
"log_odds_chosen": 0.26563113927841187,
"log_odds_ratio": -0.6936607956886292,
"logits/chosen": -0.24129600822925568,
"logits/rejected": -0.2225707471370697,
"logps/chosen": -0.9990348815917969,
"logps/rejected": -1.1718101501464844,
"loss": 0.6967,
"nll_loss": 0.6925816535949707,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.009990348480641842,
"rewards/margins": 0.001727753086015582,
"rewards/rejected": -0.011718102730810642,
"step": 1050
},
{
"epoch": 0.5553372626064178,
"grad_norm": 0.25916876482479656,
"learning_rate": 3.4327627094134725e-06,
"log_odds_chosen": 0.23857775330543518,
"log_odds_ratio": -0.6982980966567993,
"logits/chosen": -0.2231340855360031,
"logits/rejected": -0.23151478171348572,
"logps/chosen": -0.9416376352310181,
"logps/rejected": -1.1105958223342896,
"loss": 0.6977,
"nll_loss": 0.7219172716140747,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.009416376240551472,
"rewards/margins": 0.0016895814333111048,
"rewards/rejected": -0.011105956509709358,
"step": 1060
},
{
"epoch": 0.5605762933857237,
"grad_norm": 0.32629059742912764,
"learning_rate": 3.3687499001447395e-06,
"log_odds_chosen": 0.36377382278442383,
"log_odds_ratio": -0.6207367777824402,
"logits/chosen": -0.22005310654640198,
"logits/rejected": -0.2187117338180542,
"logps/chosen": -0.8868287801742554,
"logps/rejected": -1.1314175128936768,
"loss": 0.687,
"nll_loss": 0.6751260757446289,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.00886828824877739,
"rewards/margins": 0.0024458863772451878,
"rewards/rejected": -0.01131417416036129,
"step": 1070
},
{
"epoch": 0.5658153241650294,
"grad_norm": 0.29711247258862283,
"learning_rate": 3.304781029523195e-06,
"log_odds_chosen": 0.1744251698255539,
"log_odds_ratio": -0.7341504096984863,
"logits/chosen": -0.22395114600658417,
"logits/rejected": -0.2257882058620453,
"logps/chosen": -0.9004520177841187,
"logps/rejected": -1.040974497795105,
"loss": 0.6746,
"nll_loss": 0.6490055322647095,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.009004520252346992,
"rewards/margins": 0.0014052249025553465,
"rewards/rejected": -0.010409745387732983,
"step": 1080
},
{
"epoch": 0.5710543549443353,
"grad_norm": 0.22026906097892585,
"learning_rate": 3.240877512432638e-06,
"log_odds_chosen": 0.20723943412303925,
"log_odds_ratio": -0.7219654321670532,
"logits/chosen": -0.2135605365037918,
"logits/rejected": -0.2091299295425415,
"logps/chosen": -0.956895649433136,
"logps/rejected": -1.086503267288208,
"loss": 0.6855,
"nll_loss": 0.683186411857605,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.00956895761191845,
"rewards/margins": 0.0012960756430402398,
"rewards/rejected": -0.01086503267288208,
"step": 1090
},
{
"epoch": 0.5762933857236411,
"grad_norm": 0.22524737228540384,
"learning_rate": 3.1770607418784433e-06,
"log_odds_chosen": 0.25248509645462036,
"log_odds_ratio": -0.6578890085220337,
"logits/chosen": -0.2398686707019806,
"logits/rejected": -0.23859818279743195,
"logps/chosen": -0.9096572995185852,
"logps/rejected": -1.0772696733474731,
"loss": 0.6805,
"nll_loss": 0.6755146980285645,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.009096571244299412,
"rewards/margins": 0.0016761248698458076,
"rewards/rejected": -0.010772697627544403,
"step": 1100
},
{
"epoch": 0.5762933857236411,
"eval_log_odds_chosen": 0.2937109172344208,
"eval_log_odds_ratio": -0.660809338092804,
"eval_logits/chosen": -0.25185319781303406,
"eval_logits/rejected": -0.25904712080955505,
"eval_logps/chosen": -0.8865421414375305,
"eval_logps/rejected": -1.0833343267440796,
"eval_loss": 0.5807305574417114,
"eval_nll_loss": 0.5744020342826843,
"eval_rewards/accuracies": 0.6019999980926514,
"eval_rewards/chosen": -0.008865421637892723,
"eval_rewards/margins": 0.0019679218530654907,
"eval_rewards/rejected": -0.010833343490958214,
"eval_runtime": 271.1106,
"eval_samples_per_second": 7.373,
"eval_steps_per_second": 0.461,
"step": 1100
},
{
"epoch": 0.581532416502947,
"grad_norm": 0.45255186528770547,
"learning_rate": 3.1133520818258116e-06,
"log_odds_chosen": 0.4233662188053131,
"log_odds_ratio": -0.6200209856033325,
"logits/chosen": -0.24288010597229004,
"logits/rejected": -0.2747390866279602,
"logps/chosen": -0.8419440388679504,
"logps/rejected": -1.1101503372192383,
"loss": 0.7,
"nll_loss": 0.7085453271865845,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.008419441059231758,
"rewards/margins": 0.00268206256441772,
"rewards/rejected": -0.011101502925157547,
"step": 1110
},
{
"epoch": 0.5867714472822528,
"grad_norm": 0.2554458264274049,
"learning_rate": 3.0497728600477488e-06,
"log_odds_chosen": 0.2578720152378082,
"log_odds_ratio": -0.6819779276847839,
"logits/chosen": -0.226557657122612,
"logits/rejected": -0.2220630645751953,
"logps/chosen": -0.9298788905143738,
"logps/rejected": -1.101123571395874,
"loss": 0.7333,
"nll_loss": 0.6786555051803589,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.00929878931492567,
"rewards/margins": 0.0017124470323324203,
"rewards/rejected": -0.011011235415935516,
"step": 1120
},
{
"epoch": 0.5920104780615586,
"grad_norm": 0.26482758285065394,
"learning_rate": 2.986344360985162e-06,
"log_odds_chosen": 0.36092090606689453,
"log_odds_ratio": -0.6440589427947998,
"logits/chosen": -0.17206324636936188,
"logits/rejected": -0.19123123586177826,
"logps/chosen": -0.8286620378494263,
"logps/rejected": -1.0395228862762451,
"loss": 0.6597,
"nll_loss": 0.6818104982376099,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.008286619558930397,
"rewards/margins": 0.002108608838170767,
"rewards/rejected": -0.010395227000117302,
"step": 1130
},
{
"epoch": 0.5972495088408645,
"grad_norm": 0.24208228363944706,
"learning_rate": 2.923087818621452e-06,
"log_odds_chosen": 0.17570583522319794,
"log_odds_ratio": -0.7043822407722473,
"logits/chosen": -0.26782792806625366,
"logits/rejected": -0.26422086358070374,
"logps/chosen": -1.0224881172180176,
"logps/rejected": -1.1496175527572632,
"loss": 0.6784,
"nll_loss": 0.6882492899894714,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.01022487971931696,
"rewards/margins": 0.0012712948955595493,
"rewards/rejected": -0.011496175080537796,
"step": 1140
},
{
"epoch": 0.6024885396201702,
"grad_norm": 0.22840937868876304,
"learning_rate": 2.860024409374013e-06,
"log_odds_chosen": 0.19850441813468933,
"log_odds_ratio": -0.7063683271408081,
"logits/chosen": -0.21049292385578156,
"logits/rejected": -0.2100718915462494,
"logps/chosen": -0.9241889119148254,
"logps/rejected": -1.0648218393325806,
"loss": 0.7062,
"nll_loss": 0.676911473274231,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.009241889230906963,
"rewards/margins": 0.0014063273556530476,
"rewards/rejected": -0.010648216120898724,
"step": 1150
},
{
"epoch": 0.6077275703994761,
"grad_norm": 0.2640680842240368,
"learning_rate": 2.797175245004986e-06,
"log_odds_chosen": 0.26555079221725464,
"log_odds_ratio": -0.6697388887405396,
"logits/chosen": -0.2078789472579956,
"logits/rejected": -0.2241998165845871,
"logps/chosen": -0.9275274276733398,
"logps/rejected": -1.1062742471694946,
"loss": 0.7186,
"nll_loss": 0.7219773530960083,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.009275272488594055,
"rewards/margins": 0.0017874684417620301,
"rewards/rejected": -0.011062742210924625,
"step": 1160
},
{
"epoch": 0.6129666011787819,
"grad_norm": 0.24081971015397918,
"learning_rate": 2.734561365553671e-06,
"log_odds_chosen": 0.22908750176429749,
"log_odds_ratio": -0.7178616523742676,
"logits/chosen": -0.25099799036979675,
"logits/rejected": -0.24473123252391815,
"logps/chosen": -0.9188436269760132,
"logps/rejected": -1.0757328271865845,
"loss": 0.6875,
"nll_loss": 0.7033326625823975,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.009188435971736908,
"rewards/margins": 0.0015688911080360413,
"rewards/rejected": -0.01075732707977295,
"step": 1170
},
{
"epoch": 0.6182056319580878,
"grad_norm": 0.21690804460738192,
"learning_rate": 2.6722037322929485e-06,
"log_odds_chosen": 0.25051847100257874,
"log_odds_ratio": -0.6795316934585571,
"logits/chosen": -0.22288396954536438,
"logits/rejected": -0.2265399992465973,
"logps/chosen": -0.8900327682495117,
"logps/rejected": -1.0318130254745483,
"loss": 0.7016,
"nll_loss": 0.7159923911094666,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.008900327607989311,
"rewards/margins": 0.0014178026467561722,
"rewards/rejected": -0.010318130254745483,
"step": 1180
},
{
"epoch": 0.6234446627373936,
"grad_norm": 0.2480515943450666,
"learning_rate": 2.6101232207120546e-06,
"log_odds_chosen": 0.25469428300857544,
"log_odds_ratio": -0.7002100944519043,
"logits/chosen": -0.24988976120948792,
"logits/rejected": -0.25235018134117126,
"logps/chosen": -0.8989761471748352,
"logps/rejected": -1.0557796955108643,
"loss": 0.7201,
"nll_loss": 0.718826174736023,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.008989760652184486,
"rewards/margins": 0.0015680358046665788,
"rewards/rejected": -0.010557797737419605,
"step": 1190
},
{
"epoch": 0.6286836935166994,
"grad_norm": 0.34122416682838863,
"learning_rate": 2.5483406135281005e-06,
"log_odds_chosen": 0.41061514616012573,
"log_odds_ratio": -0.6271175146102905,
"logits/chosen": -0.20977671444416046,
"logits/rejected": -0.22667160630226135,
"logps/chosen": -0.7675566673278809,
"logps/rejected": -1.0119469165802002,
"loss": 0.6427,
"nll_loss": 0.6016801595687866,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.007675566710531712,
"rewards/margins": 0.002443903125822544,
"rewards/rejected": -0.010119469836354256,
"step": 1200
},
{
"epoch": 0.6286836935166994,
"eval_log_odds_chosen": 0.3024108111858368,
"eval_log_odds_ratio": -0.6608767509460449,
"eval_logits/chosen": -0.24303622543811798,
"eval_logits/rejected": -0.2482856959104538,
"eval_logps/chosen": -0.8682465553283691,
"eval_logps/rejected": -1.0670232772827148,
"eval_loss": 0.5780009031295776,
"eval_nll_loss": 0.5716609954833984,
"eval_rewards/accuracies": 0.6069999933242798,
"eval_rewards/chosen": -0.008682465180754662,
"eval_rewards/margins": 0.0019877671729773283,
"eval_rewards/rejected": -0.010670232586562634,
"eval_runtime": 278.9619,
"eval_samples_per_second": 7.166,
"eval_steps_per_second": 0.448,
"step": 1200
},
{
"epoch": 0.6339227242960053,
"grad_norm": 0.22680090574543296,
"learning_rate": 2.486876593728619e-06,
"log_odds_chosen": 0.3060600161552429,
"log_odds_ratio": -0.6400117874145508,
"logits/chosen": -0.20497791469097137,
"logits/rejected": -0.21911552548408508,
"logps/chosen": -0.8201937675476074,
"logps/rejected": -1.0039427280426025,
"loss": 0.6677,
"nll_loss": 0.6524402499198914,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.00820193625986576,
"rewards/margins": 0.0018374897772446275,
"rewards/rejected": -0.010039427317678928,
"step": 1210
},
{
"epoch": 0.639161755075311,
"grad_norm": 0.18697079192797328,
"learning_rate": 2.4257517376475235e-06,
"log_odds_chosen": 0.24433453381061554,
"log_odds_ratio": -0.7043715715408325,
"logits/chosen": -0.21600952744483948,
"logits/rejected": -0.21840915083885193,
"logps/chosen": -0.8748706579208374,
"logps/rejected": -1.038516879081726,
"loss": 0.689,
"nll_loss": 0.7082723379135132,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.008748706430196762,
"rewards/margins": 0.0016364629846066236,
"rewards/rejected": -0.010385168716311455,
"step": 1220
},
{
"epoch": 0.6444007858546169,
"grad_norm": 0.26043966163192633,
"learning_rate": 2.3649865080767573e-06,
"log_odds_chosen": 0.39842092990875244,
"log_odds_ratio": -0.6394789218902588,
"logits/chosen": -0.17968204617500305,
"logits/rejected": -0.22999629378318787,
"logps/chosen": -0.8687442541122437,
"logps/rejected": -1.1317379474639893,
"loss": 0.691,
"nll_loss": 0.6364887952804565,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.008687443099915981,
"rewards/margins": 0.0026299364399164915,
"rewards/rejected": -0.011317379772663116,
"step": 1230
},
{
"epoch": 0.6496398166339227,
"grad_norm": 0.2934035468939971,
"learning_rate": 2.3046012474159536e-06,
"log_odds_chosen": 0.18857654929161072,
"log_odds_ratio": -0.7218400835990906,
"logits/chosen": -0.22426645457744598,
"logits/rejected": -0.2606600522994995,
"logps/chosen": -0.8423234820365906,
"logps/rejected": -0.9672554135322571,
"loss": 0.6626,
"nll_loss": 0.6410557627677917,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.008423235267400742,
"rewards/margins": 0.0012493181275203824,
"rewards/rejected": -0.009672552347183228,
"step": 1240
},
{
"epoch": 0.6548788474132285,
"grad_norm": 0.2750280322287747,
"learning_rate": 2.2446161708624088e-06,
"log_odds_chosen": 0.35305264592170715,
"log_odds_ratio": -0.6322815418243408,
"logits/chosen": -0.22593221068382263,
"logits/rejected": -0.23385939002037048,
"logps/chosen": -0.8886737823486328,
"logps/rejected": -1.1158584356307983,
"loss": 0.7241,
"nll_loss": 0.7064876556396484,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.008886737748980522,
"rewards/margins": 0.0022718466352671385,
"rewards/rejected": -0.011158584617078304,
"step": 1250
},
{
"epoch": 0.6601178781925344,
"grad_norm": 0.2850170024015937,
"learning_rate": 2.1850513596436247e-06,
"log_odds_chosen": 0.2020442932844162,
"log_odds_ratio": -0.7057245373725891,
"logits/chosen": -0.2613026201725006,
"logits/rejected": -0.2628094553947449,
"logps/chosen": -0.8931059837341309,
"logps/rejected": -1.0006712675094604,
"loss": 0.6932,
"nll_loss": 0.6863256692886353,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.008931060321629047,
"rewards/margins": 0.001075651729479432,
"rewards/rejected": -0.010006711818277836,
"step": 1260
},
{
"epoch": 0.6653569089718402,
"grad_norm": 0.2334203645902276,
"learning_rate": 2.1259267542947185e-06,
"log_odds_chosen": 0.32196345925331116,
"log_odds_ratio": -0.6869245767593384,
"logits/chosen": -0.22752761840820312,
"logits/rejected": -0.22912946343421936,
"logps/chosen": -0.8861078023910522,
"logps/rejected": -1.070960283279419,
"loss": 0.6958,
"nll_loss": 0.7246706485748291,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.008861077949404716,
"rewards/margins": 0.0018485232722014189,
"rewards/rejected": -0.010709600523114204,
"step": 1270
},
{
"epoch": 0.6705959397511461,
"grad_norm": 0.2937841890124048,
"learning_rate": 2.067262147982912e-06,
"log_odds_chosen": 0.32801932096481323,
"log_odds_ratio": -0.6607939004898071,
"logits/chosen": -0.24211068451404572,
"logits/rejected": -0.2737501263618469,
"logps/chosen": -0.8753350377082825,
"logps/rejected": -1.0936840772628784,
"loss": 0.673,
"nll_loss": 0.7191804051399231,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.008753350004553795,
"rewards/margins": 0.002183489967137575,
"rewards/rejected": -0.010936839506030083,
"step": 1280
},
{
"epoch": 0.6758349705304518,
"grad_norm": 0.28448016420781497,
"learning_rate": 2.009077179881372e-06,
"log_odds_chosen": 0.17785023152828217,
"log_odds_ratio": -0.6940667033195496,
"logits/chosen": -0.20315225422382355,
"logits/rejected": -0.21491765975952148,
"logps/chosen": -0.9090517163276672,
"logps/rejected": -1.0145957469940186,
"loss": 0.7026,
"nll_loss": 0.6749101281166077,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.009090515784919262,
"rewards/margins": 0.0010554410982877016,
"rewards/rejected": -0.01014595665037632,
"step": 1290
},
{
"epoch": 0.6810740013097577,
"grad_norm": 0.24264184898863173,
"learning_rate": 1.9513913285945946e-06,
"log_odds_chosen": 0.42332401871681213,
"log_odds_ratio": -0.6192356944084167,
"logits/chosen": -0.1945888102054596,
"logits/rejected": -0.22977054119110107,
"logps/chosen": -0.8285413980484009,
"logps/rejected": -1.0920394659042358,
"loss": 0.6762,
"nll_loss": 0.6468032598495483,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.00828541349619627,
"rewards/margins": 0.0026349793188273907,
"rewards/rejected": -0.010920394212007523,
"step": 1300
},
{
"epoch": 0.6810740013097577,
"eval_log_odds_chosen": 0.3069436550140381,
"eval_log_odds_ratio": -0.6618441939353943,
"eval_logits/chosen": -0.23222360014915466,
"eval_logits/rejected": -0.23756533861160278,
"eval_logps/chosen": -0.8586292266845703,
"eval_logps/rejected": -1.0576375722885132,
"eval_loss": 0.5761923789978027,
"eval_nll_loss": 0.5697891712188721,
"eval_rewards/accuracies": 0.6069999933242798,
"eval_rewards/chosen": -0.008586292155086994,
"eval_rewards/margins": 0.0019900836050510406,
"eval_rewards/rejected": -0.01057637482881546,
"eval_runtime": 268.7533,
"eval_samples_per_second": 7.438,
"eval_steps_per_second": 0.465,
"step": 1300
},
{
"epoch": 0.6863130320890635,
"grad_norm": 0.22307221268147773,
"learning_rate": 1.8942239056375397e-06,
"log_odds_chosen": 0.18992213904857635,
"log_odds_ratio": -0.702189564704895,
"logits/chosen": -0.2199142426252365,
"logits/rejected": -0.20674149692058563,
"logps/chosen": -0.8908072710037231,
"logps/rejected": -1.0369397401809692,
"loss": 0.6973,
"nll_loss": 0.6958785057067871,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.008908072486519814,
"rewards/margins": 0.0014613252133131027,
"rewards/rejected": -0.010369397699832916,
"step": 1310
},
{
"epoch": 0.6915520628683693,
"grad_norm": 0.30978725818000513,
"learning_rate": 1.837594048970723e-06,
"log_odds_chosen": 0.14045199751853943,
"log_odds_ratio": -0.7219871878623962,
"logits/chosen": -0.2725422978401184,
"logits/rejected": -0.26409873366355896,
"logps/chosen": -0.9043842554092407,
"logps/rejected": -0.9981764554977417,
"loss": 0.6863,
"nll_loss": 0.7046749591827393,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.009043841622769833,
"rewards/margins": 0.0009379230323247612,
"rewards/rejected": -0.009981764480471611,
"step": 1320
},
{
"epoch": 0.6967910936476752,
"grad_norm": 0.22502876667437588,
"learning_rate": 1.7815207165933726e-06,
"log_odds_chosen": 0.24389496445655823,
"log_odds_ratio": -0.7243469953536987,
"logits/chosen": -0.22347286343574524,
"logits/rejected": -0.2305651158094406,
"logps/chosen": -0.8554804921150208,
"logps/rejected": -1.0290186405181885,
"loss": 0.6949,
"nll_loss": 0.7154419422149658,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.008554804138839245,
"rewards/margins": 0.0017353817820549011,
"rewards/rejected": -0.010290185920894146,
"step": 1330
},
{
"epoch": 0.702030124426981,
"grad_norm": 0.3089322594713111,
"learning_rate": 1.7260226801968695e-06,
"log_odds_chosen": 0.23613600432872772,
"log_odds_ratio": -0.691783607006073,
"logits/chosen": -0.21107229590415955,
"logits/rejected": -0.21636977791786194,
"logps/chosen": -0.9278246164321899,
"logps/rejected": -1.0566743612289429,
"loss": 0.6692,
"nll_loss": 0.6606216430664062,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.009278246201574802,
"rewards/margins": 0.0012884981697425246,
"rewards/rejected": -0.010566744022071362,
"step": 1340
},
{
"epoch": 0.7072691552062869,
"grad_norm": 0.2668190498643392,
"learning_rate": 1.671118518880532e-06,
"log_odds_chosen": 0.189827561378479,
"log_odds_ratio": -0.7072278261184692,
"logits/chosen": -0.2188723087310791,
"logits/rejected": -0.21698541939258575,
"logps/chosen": -0.896720290184021,
"logps/rejected": -1.0248229503631592,
"loss": 0.6859,
"nll_loss": 0.6656599640846252,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.008967203088104725,
"rewards/margins": 0.0012810255866497755,
"rewards/rejected": -0.010248229838907719,
"step": 1350
},
{
"epoch": 0.7125081859855926,
"grad_norm": 0.24259424337870872,
"learning_rate": 1.6168266129318865e-06,
"log_odds_chosen": 0.16265830397605896,
"log_odds_ratio": -0.7222410440444946,
"logits/chosen": -0.20469431579113007,
"logits/rejected": -0.21427664160728455,
"logps/chosen": -0.8708987236022949,
"logps/rejected": -0.9569934010505676,
"loss": 0.704,
"nll_loss": 0.6836594343185425,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.008708987385034561,
"rewards/margins": 0.0008609470096416771,
"rewards/rejected": -0.009569934569299221,
"step": 1360
},
{
"epoch": 0.7177472167648985,
"grad_norm": 0.2256184064459049,
"learning_rate": 1.5631651376734926e-06,
"log_odds_chosen": 0.22786390781402588,
"log_odds_ratio": -0.711846649646759,
"logits/chosen": -0.22860285639762878,
"logits/rejected": -0.24218401312828064,
"logps/chosen": -0.8583256602287292,
"logps/rejected": -0.9923291206359863,
"loss": 0.7127,
"nll_loss": 0.6810920238494873,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.008583256043493748,
"rewards/margins": 0.0013400347670540214,
"rewards/rejected": -0.009923290461301804,
"step": 1370
},
{
"epoch": 0.7229862475442044,
"grad_norm": 0.23503577068826886,
"learning_rate": 1.5101520573783751e-06,
"log_odds_chosen": 0.3741925358772278,
"log_odds_ratio": -0.6353830695152283,
"logits/chosen": -0.21928901970386505,
"logits/rejected": -0.24703797698020935,
"logps/chosen": -0.8672024011611938,
"logps/rejected": -1.1037095785140991,
"loss": 0.7124,
"nll_loss": 0.6939696073532104,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.008672022260725498,
"rewards/margins": 0.0023650729563087225,
"rewards/rejected": -0.011037096381187439,
"step": 1380
},
{
"epoch": 0.7282252783235101,
"grad_norm": 0.23280207331329258,
"learning_rate": 1.4578051192561342e-06,
"log_odds_chosen": 0.1830010563135147,
"log_odds_ratio": -0.7071625590324402,
"logits/chosen": -0.21185937523841858,
"logits/rejected": -0.20238959789276123,
"logps/chosen": -0.8515766263008118,
"logps/rejected": -0.9668426513671875,
"loss": 0.7134,
"nll_loss": 0.6686742901802063,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.008515766821801662,
"rewards/margins": 0.0011526598827913404,
"rewards/rejected": -0.009668425656855106,
"step": 1390
},
{
"epoch": 0.733464309102816,
"grad_norm": 0.2380029203431237,
"learning_rate": 1.4061418475116842e-06,
"log_odds_chosen": 0.39564546942710876,
"log_odds_ratio": -0.6270996332168579,
"logits/chosen": -0.24804405868053436,
"logits/rejected": -0.2720070779323578,
"logps/chosen": -0.8107814788818359,
"logps/rejected": -1.06538724899292,
"loss": 0.6944,
"nll_loss": 0.7024892568588257,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.008107814006507397,
"rewards/margins": 0.002546058502048254,
"rewards/rejected": -0.010653872042894363,
"step": 1400
},
{
"epoch": 0.733464309102816,
"eval_log_odds_chosen": 0.3101637065410614,
"eval_log_odds_ratio": -0.6609058976173401,
"eval_logits/chosen": -0.24197958409786224,
"eval_logits/rejected": -0.24680981040000916,
"eval_logps/chosen": -0.8542194962501526,
"eval_logps/rejected": -1.0547674894332886,
"eval_loss": 0.575017511844635,
"eval_nll_loss": 0.5686248540878296,
"eval_rewards/accuracies": 0.6069999933242798,
"eval_rewards/chosen": -0.008542194031178951,
"eval_rewards/margins": 0.0020054795313626528,
"eval_rewards/rejected": -0.010547674261033535,
"eval_runtime": 281.1827,
"eval_samples_per_second": 7.109,
"eval_steps_per_second": 0.445,
"step": 1400
},
{
"epoch": 0.7387033398821218,
"grad_norm": 0.2669416945991939,
"learning_rate": 1.3551795374786858e-06,
"log_odds_chosen": 0.25648033618927,
"log_odds_ratio": -0.6926220059394836,
"logits/chosen": -0.2435269057750702,
"logits/rejected": -0.2436356544494629,
"logps/chosen": -0.8806196451187134,
"logps/rejected": -1.0599586963653564,
"loss": 0.7048,
"nll_loss": 0.696427583694458,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.008806196041405201,
"rewards/margins": 0.0017933888593688607,
"rewards/rejected": -0.010599585250020027,
"step": 1410
},
{
"epoch": 0.7439423706614277,
"grad_norm": 0.24647473183287075,
"learning_rate": 1.3049352498295716e-06,
"log_odds_chosen": 0.38035115599632263,
"log_odds_ratio": -0.6361268162727356,
"logits/chosen": -0.24278739094734192,
"logits/rejected": -0.26014792919158936,
"logps/chosen": -0.8281903266906738,
"logps/rejected": -1.0844037532806396,
"loss": 0.6639,
"nll_loss": 0.6452184915542603,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.008281903341412544,
"rewards/margins": 0.0025621347595006227,
"rewards/rejected": -0.010844036936759949,
"step": 1420
},
{
"epoch": 0.7491814014407334,
"grad_norm": 0.3041558655657045,
"learning_rate": 1.2554258048641397e-06,
"log_odds_chosen": 0.19218070805072784,
"log_odds_ratio": -0.7099407911300659,
"logits/chosen": -0.2104649543762207,
"logits/rejected": -0.24154922366142273,
"logps/chosen": -0.8334420919418335,
"logps/rejected": -0.9641669988632202,
"loss": 0.6748,
"nll_loss": 0.6133008599281311,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.008334420621395111,
"rewards/margins": 0.0013072482543066144,
"rewards/rejected": -0.009641669690608978,
"step": 1430
},
{
"epoch": 0.7544204322200393,
"grad_norm": 0.25179642126636953,
"learning_rate": 1.2066677768786188e-06,
"log_odds_chosen": 0.2702215313911438,
"log_odds_ratio": -0.6887364387512207,
"logits/chosen": -0.2444891482591629,
"logits/rejected": -0.24194936454296112,
"logps/chosen": -0.9080901145935059,
"logps/rejected": -1.1055560111999512,
"loss": 0.6932,
"nll_loss": 0.6903983354568481,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.009080900810658932,
"rewards/margins": 0.001974658574908972,
"rewards/rejected": -0.01105555985122919,
"step": 1440
},
{
"epoch": 0.7596594629993452,
"grad_norm": 0.2566284168448171,
"learning_rate": 1.1586774886170772e-06,
"log_odds_chosen": 0.30707067251205444,
"log_odds_ratio": -0.6694291234016418,
"logits/chosen": -0.28426066040992737,
"logits/rejected": -0.26999443769454956,
"logps/chosen": -0.8766347765922546,
"logps/rejected": -1.0652177333831787,
"loss": 0.7163,
"nll_loss": 0.7289090752601624,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.008766347542405128,
"rewards/margins": 0.0018858297262340784,
"rewards/rejected": -0.010652177035808563,
"step": 1450
},
{
"epoch": 0.7648984937786509,
"grad_norm": 0.19726939440168126,
"learning_rate": 1.1114710058070592e-06,
"log_odds_chosen": 0.09831424057483673,
"log_odds_ratio": -0.7557646632194519,
"logits/chosen": -0.19436194002628326,
"logits/rejected": -0.19044212996959686,
"logps/chosen": -0.885438084602356,
"logps/rejected": -0.9487984776496887,
"loss": 0.6404,
"nll_loss": 0.634408712387085,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.008854379877448082,
"rewards/margins": 0.0006336040096357465,
"rewards/rejected": -0.009487984701991081,
"step": 1460
},
{
"epoch": 0.7701375245579568,
"grad_norm": 0.2560415712825397,
"learning_rate": 1.065064131781252e-06,
"log_odds_chosen": 0.13219106197357178,
"log_odds_ratio": -0.7492179870605469,
"logits/chosen": -0.22603929042816162,
"logits/rejected": -0.24617178738117218,
"logps/chosen": -0.9043794870376587,
"logps/rejected": -1.0165117979049683,
"loss": 0.7111,
"nll_loss": 0.6886984705924988,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.009043793193995953,
"rewards/margins": 0.0011213240213692188,
"rewards/rejected": -0.010165116749703884,
"step": 1470
},
{
"epoch": 0.7753765553372626,
"grad_norm": 0.2940452908279725,
"learning_rate": 1.0194724021869967e-06,
"log_odds_chosen": 0.26290208101272583,
"log_odds_ratio": -0.6962881684303284,
"logits/chosen": -0.23025016486644745,
"logits/rejected": -0.26146119832992554,
"logps/chosen": -0.889056384563446,
"logps/rejected": -1.077383279800415,
"loss": 0.7356,
"nll_loss": 0.7125190496444702,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.008890563622117043,
"rewards/margins": 0.0018832674250006676,
"rewards/rejected": -0.010773831978440285,
"step": 1480
},
{
"epoch": 0.7806155861165684,
"grad_norm": 0.30243858167930426,
"learning_rate": 9.747110797854164e-07,
"log_odds_chosen": 0.21796353161334991,
"log_odds_ratio": -0.7083435654640198,
"logits/chosen": -0.24902808666229248,
"logits/rejected": -0.2640915513038635,
"logps/chosen": -0.8834966421127319,
"logps/rejected": -1.0319344997406006,
"loss": 0.7424,
"nll_loss": 0.7267500162124634,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.008834966458380222,
"rewards/margins": 0.0014843789394944906,
"rewards/rejected": -0.010319346562027931,
"step": 1490
},
{
"epoch": 0.7858546168958742,
"grad_norm": 0.28561425999576384,
"learning_rate": 9.307951493418893e-07,
"log_odds_chosen": 0.43342700600624084,
"log_odds_ratio": -0.6266660690307617,
"logits/chosen": -0.20827969908714294,
"logits/rejected": -0.22024419903755188,
"logps/chosen": -0.8941072225570679,
"logps/rejected": -1.1734715700149536,
"loss": 0.6695,
"nll_loss": 0.6944609880447388,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.008941072039306164,
"rewards/margins": 0.002793641993775964,
"rewards/rejected": -0.01173471286892891,
"step": 1500
},
{
"epoch": 0.7858546168958742,
"eval_log_odds_chosen": 0.3134806752204895,
"eval_log_odds_ratio": -0.661631166934967,
"eval_logits/chosen": -0.2371899038553238,
"eval_logits/rejected": -0.2425604611635208,
"eval_logps/chosen": -0.8493260145187378,
"eval_logps/rejected": -1.0505434274673462,
"eval_loss": 0.5742121338844299,
"eval_nll_loss": 0.5677821040153503,
"eval_rewards/accuracies": 0.6079999804496765,
"eval_rewards/chosen": -0.008493260480463505,
"eval_rewards/margins": 0.002012175042182207,
"eval_rewards/rejected": -0.01050543412566185,
"eval_runtime": 270.8065,
"eval_samples_per_second": 7.382,
"eval_steps_per_second": 0.462,
"step": 1500
},
{
"epoch": 0.7910936476751801,
"grad_norm": 0.2254797688672913,
"learning_rate": 8.877393126096055e-07,
"log_odds_chosen": 0.24689963459968567,
"log_odds_ratio": -0.6949166059494019,
"logits/chosen": -0.2658199369907379,
"logits/rejected": -0.2810281813144684,
"logps/chosen": -0.8999541997909546,
"logps/rejected": -1.086625099182129,
"loss": 0.7423,
"nll_loss": 0.7465766668319702,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.008999543264508247,
"rewards/margins": 0.0018667096737772226,
"rewards/rejected": -0.010866251774132252,
"step": 1510
},
{
"epoch": 0.796332678454486,
"grad_norm": 0.24924822034612845,
"learning_rate": 8.455579834078397e-07,
"log_odds_chosen": 0.3433853089809418,
"log_odds_ratio": -0.6635347604751587,
"logits/chosen": -0.18545587360858917,
"logits/rejected": -0.200174480676651,
"logps/chosen": -0.8561753034591675,
"logps/rejected": -1.0463998317718506,
"loss": 0.6971,
"nll_loss": 0.6929630041122437,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.008561753667891026,
"rewards/margins": 0.0019022446358576417,
"rewards/rejected": -0.010463997721672058,
"step": 1520
},
{
"epoch": 0.8015717092337917,
"grad_norm": 0.2718986137429928,
"learning_rate": 8.042652827966437e-07,
"log_odds_chosen": 0.26018717885017395,
"log_odds_ratio": -0.6947474479675293,
"logits/chosen": -0.25021594762802124,
"logits/rejected": -0.24818949401378632,
"logps/chosen": -0.8872531652450562,
"logps/rejected": -1.0701402425765991,
"loss": 0.7235,
"nll_loss": 0.7240532636642456,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.008872531354427338,
"rewards/margins": 0.0018288707360625267,
"rewards/rejected": -0.010701403021812439,
"step": 1530
},
{
"epoch": 0.8068107400130976,
"grad_norm": 0.5108343233985189,
"learning_rate": 7.638750343495277e-07,
"log_odds_chosen": 0.3117789924144745,
"log_odds_ratio": -0.6702481508255005,
"logits/chosen": -0.23931124806404114,
"logits/rejected": -0.22990107536315918,
"logps/chosen": -0.8592001795768738,
"logps/rejected": -1.0367764234542847,
"loss": 0.6604,
"nll_loss": 0.6802663803100586,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.008592001162469387,
"rewards/margins": 0.00177576276473701,
"rewards/rejected": -0.01036776602268219,
"step": 1540
},
{
"epoch": 0.8120497707924034,
"grad_norm": 0.2781111416927778,
"learning_rate": 7.244007595257382e-07,
"log_odds_chosen": 0.27534064650535583,
"log_odds_ratio": -0.6695243716239929,
"logits/chosen": -0.20855948328971863,
"logits/rejected": -0.23724588751792908,
"logps/chosen": -0.8493406176567078,
"logps/rejected": -1.0317304134368896,
"loss": 0.6768,
"nll_loss": 0.6526767015457153,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.00849340669810772,
"rewards/margins": 0.0018238987540826201,
"rewards/rejected": -0.010317305102944374,
"step": 1550
},
{
"epoch": 0.8172888015717092,
"grad_norm": 0.3133123324918134,
"learning_rate": 6.858556731436754e-07,
"log_odds_chosen": 0.15951837599277496,
"log_odds_ratio": -0.7142313718795776,
"logits/chosen": -0.24038231372833252,
"logits/rejected": -0.23249582946300507,
"logps/chosen": -0.9233297109603882,
"logps/rejected": -1.008540391921997,
"loss": 0.6989,
"nll_loss": 0.7119780778884888,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.009233297780156136,
"rewards/margins": 0.0008521073614247143,
"rewards/rejected": -0.010085404850542545,
"step": 1560
},
{
"epoch": 0.822527832351015,
"grad_norm": 0.42996736238352995,
"learning_rate": 6.482526789569585e-07,
"log_odds_chosen": 0.2984713315963745,
"log_odds_ratio": -0.6555167436599731,
"logits/chosen": -0.23409931361675262,
"logits/rejected": -0.26614493131637573,
"logps/chosen": -0.8079819679260254,
"logps/rejected": -0.9996023178100586,
"loss": 0.6759,
"nll_loss": 0.6624296307563782,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.00807981938123703,
"rewards/margins": 0.001916204346343875,
"rewards/rejected": -0.009996023960411549,
"step": 1570
},
{
"epoch": 0.8277668631303209,
"grad_norm": 0.23537449504928848,
"learning_rate": 6.116043653346403e-07,
"log_odds_chosen": 0.38262271881103516,
"log_odds_ratio": -0.6627165675163269,
"logits/chosen": -0.22528938949108124,
"logits/rejected": -0.23289379477500916,
"logps/chosen": -0.903620719909668,
"logps/rejected": -1.1372332572937012,
"loss": 0.6809,
"nll_loss": 0.6387246251106262,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.009036206640303135,
"rewards/margins": 0.0023361260537058115,
"rewards/rejected": -0.011372332461178303,
"step": 1580
},
{
"epoch": 0.8330058939096268,
"grad_norm": 0.24050810385940227,
"learning_rate": 5.759230010469826e-07,
"log_odds_chosen": 0.11879537254571915,
"log_odds_ratio": -0.7360481023788452,
"logits/chosen": -0.23608234524726868,
"logits/rejected": -0.2152477204799652,
"logps/chosen": -0.8976501226425171,
"logps/rejected": -0.9821032285690308,
"loss": 0.6742,
"nll_loss": 0.6351466178894043,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.00897650234401226,
"rewards/margins": 0.0008445307612419128,
"rewards/rejected": -0.009821033105254173,
"step": 1590
},
{
"epoch": 0.8382449246889325,
"grad_norm": 0.2736382551857292,
"learning_rate": 5.412205311582433e-07,
"log_odds_chosen": 0.26674309372901917,
"log_odds_ratio": -0.7051304578781128,
"logits/chosen": -0.23347100615501404,
"logits/rejected": -0.24622151255607605,
"logps/chosen": -0.8361061811447144,
"logps/rejected": -1.024967908859253,
"loss": 0.7258,
"nll_loss": 0.7080037593841553,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.008361062034964561,
"rewards/margins": 0.0018886181060224771,
"rewards/rejected": -0.010249679908156395,
"step": 1600
},
{
"epoch": 0.8382449246889325,
"eval_log_odds_chosen": 0.3139868974685669,
"eval_log_odds_ratio": -0.6618570685386658,
"eval_logits/chosen": -0.23709820210933685,
"eval_logits/rejected": -0.241834819316864,
"eval_logps/chosen": -0.8485015034675598,
"eval_logps/rejected": -1.0496830940246582,
"eval_loss": 0.5737613439559937,
"eval_nll_loss": 0.5673460960388184,
"eval_rewards/accuracies": 0.6079999804496765,
"eval_rewards/chosen": -0.00848501455038786,
"eval_rewards/margins": 0.00201181648299098,
"eval_rewards/rejected": -0.010496831499040127,
"eval_runtime": 278.0418,
"eval_samples_per_second": 7.19,
"eval_steps_per_second": 0.45,
"step": 1600
},
{
"epoch": 0.8434839554682384,
"grad_norm": 0.2461154279813755,
"learning_rate": 5.075085730278202e-07,
"log_odds_chosen": 0.11390231549739838,
"log_odds_ratio": -0.7587698101997375,
"logits/chosen": -0.2537403702735901,
"logits/rejected": -0.26043227314949036,
"logps/chosen": -0.933973491191864,
"logps/rejected": -1.0090564489364624,
"loss": 0.6737,
"nll_loss": 0.6847957372665405,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.00933973491191864,
"rewards/margins": 0.000750830746255815,
"rewards/rejected": -0.01009056530892849,
"step": 1610
},
{
"epoch": 0.8487229862475442,
"grad_norm": 0.293725880646676,
"learning_rate": 4.747984124211031e-07,
"log_odds_chosen": 0.4481363296508789,
"log_odds_ratio": -0.6022178530693054,
"logits/chosen": -0.26498937606811523,
"logits/rejected": -0.29027503728866577,
"logps/chosen": -0.82249516248703,
"logps/rejected": -1.0860894918441772,
"loss": 0.682,
"nll_loss": 0.6935005784034729,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.008224951103329659,
"rewards/margins": 0.0026359441690146923,
"rewards/rejected": -0.010860895738005638,
"step": 1620
},
{
"epoch": 0.85396201702685,
"grad_norm": 0.28181497622786605,
"learning_rate": 4.4310099973133324e-07,
"log_odds_chosen": 0.1358368694782257,
"log_odds_ratio": -0.7588969469070435,
"logits/chosen": -0.25816676020622253,
"logits/rejected": -0.2658771574497223,
"logps/chosen": -0.9440135955810547,
"logps/rejected": -1.0287643671035767,
"loss": 0.7223,
"nll_loss": 0.7669018507003784,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.009440136142075062,
"rewards/margins": 0.0008475076174363494,
"rewards/rejected": -0.010287643410265446,
"step": 1630
},
{
"epoch": 0.8592010478061559,
"grad_norm": 0.24767085551640736,
"learning_rate": 4.124269463137341e-07,
"log_odds_chosen": 0.24949102103710175,
"log_odds_ratio": -0.6826614141464233,
"logits/chosen": -0.24843844771385193,
"logits/rejected": -0.2676991820335388,
"logps/chosen": -0.8724035024642944,
"logps/rejected": -1.0047967433929443,
"loss": 0.6895,
"nll_loss": 0.6859319806098938,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.008724034763872623,
"rewards/margins": 0.001323932665400207,
"rewards/rejected": -0.010047967545688152,
"step": 1640
},
{
"epoch": 0.8644400785854617,
"grad_norm": 0.273095979053061,
"learning_rate": 3.8278652093315045e-07,
"log_odds_chosen": 0.21245428919792175,
"log_odds_ratio": -0.6998537182807922,
"logits/chosen": -0.2235928326845169,
"logits/rejected": -0.2198958396911621,
"logps/chosen": -0.9110556840896606,
"logps/rejected": -1.014875888824463,
"loss": 0.6751,
"nll_loss": 0.6564816236495972,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.009110555984079838,
"rewards/margins": 0.0010382026666775346,
"rewards/rejected": -0.010148759000003338,
"step": 1650
},
{
"epoch": 0.8696791093647676,
"grad_norm": 0.21686983060047896,
"learning_rate": 3.5418964632636075e-07,
"log_odds_chosen": 0.3318633735179901,
"log_odds_ratio": -0.6623369455337524,
"logits/chosen": -0.24393992125988007,
"logits/rejected": -0.25989559292793274,
"logps/chosen": -0.8131970167160034,
"logps/rejected": -1.0291340351104736,
"loss": 0.6765,
"nll_loss": 0.627708375453949,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.008131968788802624,
"rewards/margins": 0.002159371506422758,
"rewards/rejected": -0.01029134076088667,
"step": 1660
},
{
"epoch": 0.8749181401440733,
"grad_norm": 0.23331375473591626,
"learning_rate": 3.266458958802463e-07,
"log_odds_chosen": 0.23905089497566223,
"log_odds_ratio": -0.7230226397514343,
"logits/chosen": -0.20989327132701874,
"logits/rejected": -0.2193128615617752,
"logps/chosen": -0.8955384492874146,
"logps/rejected": -1.0794498920440674,
"loss": 0.7067,
"nll_loss": 0.7193494439125061,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.00895538367331028,
"rewards/margins": 0.0018391149351373315,
"rewards/rejected": -0.010794498957693577,
"step": 1670
},
{
"epoch": 0.8801571709233792,
"grad_norm": 0.2388493711074595,
"learning_rate": 3.0016449042690057e-07,
"log_odds_chosen": 0.3502407968044281,
"log_odds_ratio": -0.6578859686851501,
"logits/chosen": -0.2545422613620758,
"logits/rejected": -0.24275808036327362,
"logps/chosen": -0.868048369884491,
"logps/rejected": -1.0792254209518433,
"loss": 0.6822,
"nll_loss": 0.7034357190132141,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.008680484257638454,
"rewards/margins": 0.0021117704454809427,
"rewards/rejected": -0.010792254470288754,
"step": 1680
},
{
"epoch": 0.885396201702685,
"grad_norm": 0.21583687175178848,
"learning_rate": 2.747542951567702e-07,
"log_odds_chosen": 0.2685350179672241,
"log_odds_ratio": -0.7099257707595825,
"logits/chosen": -0.2754712998867035,
"logits/rejected": -0.27395230531692505,
"logps/chosen": -0.8825618624687195,
"logps/rejected": -1.0439611673355103,
"loss": 0.736,
"nll_loss": 0.7420133948326111,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.008825617842376232,
"rewards/margins": 0.001613991567865014,
"rewards/rejected": -0.010439610108733177,
"step": 1690
},
{
"epoch": 0.8906352324819908,
"grad_norm": 0.2625664453357368,
"learning_rate": 2.5042381665084907e-07,
"log_odds_chosen": 0.2786557972431183,
"log_odds_ratio": -0.674875020980835,
"logits/chosen": -0.25832805037498474,
"logits/rejected": -0.2771572470664978,
"logps/chosen": -0.8848080635070801,
"logps/rejected": -1.0470314025878906,
"loss": 0.7193,
"nll_loss": 0.7683790326118469,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.008848080411553383,
"rewards/margins": 0.001622233772650361,
"rewards/rejected": -0.0104703139513731,
"step": 1700
},
{
"epoch": 0.8906352324819908,
"eval_log_odds_chosen": 0.3162248134613037,
"eval_log_odds_ratio": -0.6610245108604431,
"eval_logits/chosen": -0.2351589947938919,
"eval_logits/rejected": -0.24033093452453613,
"eval_logps/chosen": -0.8476623296737671,
"eval_logps/rejected": -1.0499019622802734,
"eval_loss": 0.5735238194465637,
"eval_nll_loss": 0.5670892596244812,
"eval_rewards/accuracies": 0.6050000190734863,
"eval_rewards/chosen": -0.008476623333990574,
"eval_rewards/margins": 0.0020223965402692556,
"eval_rewards/rejected": -0.010499019175767899,
"eval_runtime": 282.3911,
"eval_samples_per_second": 7.079,
"eval_steps_per_second": 0.443,
"step": 1700
},
{
"epoch": 0.8958742632612967,
"grad_norm": 0.2201709218551474,
"learning_rate": 2.2718120003292786e-07,
"log_odds_chosen": 0.1792387217283249,
"log_odds_ratio": -0.7307632565498352,
"logits/chosen": -0.2693052291870117,
"logits/rejected": -0.24535951018333435,
"logps/chosen": -0.9198406338691711,
"logps/rejected": -1.0340744256973267,
"loss": 0.6955,
"nll_loss": 0.6777786016464233,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.009198406711220741,
"rewards/margins": 0.0011423381511121988,
"rewards/rejected": -0.010340743698179722,
"step": 1710
},
{
"epoch": 0.9011132940406025,
"grad_norm": 0.25776105345582007,
"learning_rate": 2.0503422624285079e-07,
"log_odds_chosen": 0.11539041996002197,
"log_odds_ratio": -0.7499845623970032,
"logits/chosen": -0.2296096533536911,
"logits/rejected": -0.22232845425605774,
"logps/chosen": -0.9569549560546875,
"logps/rejected": -1.0448832511901855,
"loss": 0.7511,
"nll_loss": 0.7580893635749817,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.00956954900175333,
"rewards/margins": 0.0008792821317911148,
"rewards/rejected": -0.01044883020222187,
"step": 1720
},
{
"epoch": 0.9063523248199084,
"grad_norm": 0.2706279093106233,
"learning_rate": 1.8399030943168143e-07,
"log_odds_chosen": 0.3941977620124817,
"log_odds_ratio": -0.6337345242500305,
"logits/chosen": -0.24551761150360107,
"logits/rejected": -0.2374114990234375,
"logps/chosen": -0.9035981297492981,
"logps/rejected": -1.148105502128601,
"loss": 0.6806,
"nll_loss": 0.66960209608078,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.009035980328917503,
"rewards/margins": 0.002445075660943985,
"rewards/rejected": -0.011481055058538914,
"step": 1730
},
{
"epoch": 0.9115913555992141,
"grad_norm": 0.3609652737051337,
"learning_rate": 1.6405649447966974e-07,
"log_odds_chosen": 0.3195926547050476,
"log_odds_ratio": -0.6484790444374084,
"logits/chosen": -0.2420916110277176,
"logits/rejected": -0.2452610284090042,
"logps/chosen": -0.8236813545227051,
"logps/rejected": -1.0253655910491943,
"loss": 0.6678,
"nll_loss": 0.6674818396568298,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.008236813358962536,
"rewards/margins": 0.002016842132434249,
"rewards/rejected": -0.010253656655550003,
"step": 1740
},
{
"epoch": 0.91683038637852,
"grad_norm": 0.31053252175434054,
"learning_rate": 1.4523945463783188e-07,
"log_odds_chosen": 0.39518290758132935,
"log_odds_ratio": -0.6608596444129944,
"logits/chosen": -0.24014584720134735,
"logits/rejected": -0.2523557245731354,
"logps/chosen": -0.8841649293899536,
"logps/rejected": -1.1480939388275146,
"loss": 0.715,
"nll_loss": 0.7150102853775024,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.008841649629175663,
"rewards/margins": 0.00263928878121078,
"rewards/rejected": -0.011480937711894512,
"step": 1750
},
{
"epoch": 0.9220694171578258,
"grad_norm": 0.2695837646740879,
"learning_rate": 1.2754548929394504e-07,
"log_odds_chosen": 0.30153781175613403,
"log_odds_ratio": -0.6760331988334656,
"logits/chosen": -0.24449577927589417,
"logits/rejected": -0.26775243878364563,
"logps/chosen": -0.8484500646591187,
"logps/rejected": -1.0482269525527954,
"loss": 0.6887,
"nll_loss": 0.7010708451271057,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.008484501391649246,
"rewards/margins": 0.0019977684132754803,
"rewards/rejected": -0.01048226933926344,
"step": 1760
},
{
"epoch": 0.9273084479371316,
"grad_norm": 0.3245310485170031,
"learning_rate": 1.1098052186369816e-07,
"log_odds_chosen": 0.22715063393115997,
"log_odds_ratio": -0.709073543548584,
"logits/chosen": -0.2291758507490158,
"logits/rejected": -0.24192312359809875,
"logps/chosen": -0.9002648591995239,
"logps/rejected": -1.0433080196380615,
"loss": 0.6921,
"nll_loss": 0.6624379754066467,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.00900264736264944,
"rewards/margins": 0.0014304317301139235,
"rewards/rejected": -0.010433079674839973,
"step": 1770
},
{
"epoch": 0.9325474787164375,
"grad_norm": 0.3213633363643417,
"learning_rate": 9.555009780770584e-08,
"log_odds_chosen": 0.29082897305488586,
"log_odds_ratio": -0.6803869605064392,
"logits/chosen": -0.2208433598279953,
"logits/rejected": -0.25635868310928345,
"logps/chosen": -0.8286264538764954,
"logps/rejected": -1.0254353284835815,
"loss": 0.679,
"nll_loss": 0.6641760468482971,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.008286263793706894,
"rewards/margins": 0.0019680894911289215,
"rewards/rejected": -0.01025435421615839,
"step": 1780
},
{
"epoch": 0.9377865094957433,
"grad_norm": 0.306877632695617,
"learning_rate": 8.125938277505645e-08,
"log_odds_chosen": 0.31603384017944336,
"log_odds_ratio": -0.6547614336013794,
"logits/chosen": -0.2147616595029831,
"logits/rejected": -0.22782523930072784,
"logps/chosen": -0.8640085458755493,
"logps/rejected": -1.0608965158462524,
"loss": 0.6808,
"nll_loss": 0.6702268719673157,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.008640085346996784,
"rewards/margins": 0.0019688797183334827,
"rewards/rejected": -0.01060896459966898,
"step": 1790
},
{
"epoch": 0.9430255402750491,
"grad_norm": 0.20245915700115008,
"learning_rate": 6.81131608740026e-08,
"log_odds_chosen": 0.22377637028694153,
"log_odds_ratio": -0.7018457651138306,
"logits/chosen": -0.19310477375984192,
"logits/rejected": -0.2190311849117279,
"logps/chosen": -0.8574590682983398,
"logps/rejected": -0.9911470413208008,
"loss": 0.7038,
"nll_loss": 0.6608118414878845,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.008574591018259525,
"rewards/margins": 0.0013368797954171896,
"rewards/rejected": -0.009911470115184784,
"step": 1800
},
{
"epoch": 0.9430255402750491,
"eval_log_odds_chosen": 0.3163548707962036,
"eval_log_odds_ratio": -0.661532461643219,
"eval_logits/chosen": -0.23114821314811707,
"eval_logits/rejected": -0.23599176108837128,
"eval_logps/chosen": -0.8470891118049622,
"eval_logps/rejected": -1.049268364906311,
"eval_loss": 0.5734038949012756,
"eval_nll_loss": 0.5669639706611633,
"eval_rewards/accuracies": 0.609000027179718,
"eval_rewards/chosen": -0.008470890112221241,
"eval_rewards/margins": 0.00202179211191833,
"eval_rewards/rejected": -0.010492682456970215,
"eval_runtime": 269.8606,
"eval_samples_per_second": 7.408,
"eval_steps_per_second": 0.463,
"step": 1800
},
{
"epoch": 0.9482645710543549,
"grad_norm": 0.3628155654733571,
"learning_rate": 5.611583307038381e-08,
"log_odds_chosen": 0.24962477385997772,
"log_odds_ratio": -0.6831658482551575,
"logits/chosen": -0.23160485923290253,
"logits/rejected": -0.2222505509853363,
"logps/chosen": -0.8988336324691772,
"logps/rejected": -1.0403974056243896,
"loss": 0.7116,
"nll_loss": 0.6903770565986633,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.008988335728645325,
"rewards/margins": 0.0014156366232782602,
"rewards/rejected": -0.010403972119092941,
"step": 1810
},
{
"epoch": 0.9535036018336608,
"grad_norm": 0.2483018209845032,
"learning_rate": 4.527141571431498e-08,
"log_odds_chosen": 0.22445717453956604,
"log_odds_ratio": -0.6999959945678711,
"logits/chosen": -0.23954102396965027,
"logits/rejected": -0.24183711409568787,
"logps/chosen": -0.8445944786071777,
"logps/rejected": -0.9891592860221863,
"loss": 0.6892,
"nll_loss": 0.7019317746162415,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.00844594370573759,
"rewards/margins": 0.0014456479111686349,
"rewards/rejected": -0.009891592897474766,
"step": 1820
},
{
"epoch": 0.9587426326129665,
"grad_norm": 0.2131281074522212,
"learning_rate": 3.5583539195629285e-08,
"log_odds_chosen": 0.43377724289894104,
"log_odds_ratio": -0.6147680282592773,
"logits/chosen": -0.2520751655101776,
"logits/rejected": -0.2712712287902832,
"logps/chosen": -0.8412583470344543,
"logps/rejected": -1.1093076467514038,
"loss": 0.7113,
"nll_loss": 0.7085931897163391,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.00841258279979229,
"rewards/margins": 0.00268049375154078,
"rewards/rejected": -0.011093077249825,
"step": 1830
},
{
"epoch": 0.9639816633922724,
"grad_norm": 0.4078068154194888,
"learning_rate": 2.7055446728532382e-08,
"log_odds_chosen": 0.23464258015155792,
"log_odds_ratio": -0.707868218421936,
"logits/chosen": -0.25544843077659607,
"logits/rejected": -0.27141958475112915,
"logps/chosen": -0.8739751577377319,
"logps/rejected": -1.0185130834579468,
"loss": 0.7277,
"nll_loss": 0.7003148198127747,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.008739751763641834,
"rewards/margins": 0.0014453793410211802,
"rewards/rejected": -0.010185131803154945,
"step": 1840
},
{
"epoch": 0.9692206941715783,
"grad_norm": 0.2412577505445294,
"learning_rate": 1.9689993265870176e-08,
"log_odds_chosen": 0.23277541995048523,
"log_odds_ratio": -0.6795376539230347,
"logits/chosen": -0.2662840187549591,
"logits/rejected": -0.2727211117744446,
"logps/chosen": -0.895865261554718,
"logps/rejected": -1.037330150604248,
"loss": 0.6942,
"nll_loss": 0.7306576371192932,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.00895865261554718,
"rewards/margins": 0.0014146488392725587,
"rewards/rejected": -0.010373301804065704,
"step": 1850
},
{
"epoch": 0.9744597249508841,
"grad_norm": 0.3033721699704284,
"learning_rate": 1.3489644543374479e-08,
"log_odds_chosen": 0.29598861932754517,
"log_odds_ratio": -0.6771318912506104,
"logits/chosen": -0.21042628586292267,
"logits/rejected": -0.23291948437690735,
"logps/chosen": -0.8353894948959351,
"logps/rejected": -0.9999237060546875,
"loss": 0.6644,
"nll_loss": 0.6520312428474426,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.00835389643907547,
"rewards/margins": 0.001645339885726571,
"rewards/rejected": -0.009999236091971397,
"step": 1860
},
{
"epoch": 0.9796987557301899,
"grad_norm": 0.27851831050728887,
"learning_rate": 8.456476254209367e-09,
"log_odds_chosen": 0.25299039483070374,
"log_odds_ratio": -0.6810643076896667,
"logits/chosen": -0.20314674079418182,
"logits/rejected": -0.22378787398338318,
"logps/chosen": -0.8736406564712524,
"logps/rejected": -1.0240364074707031,
"loss": 0.7072,
"nll_loss": 0.6920244693756104,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.008736406452953815,
"rewards/margins": 0.0015039570862427354,
"rewards/rejected": -0.010240363888442516,
"step": 1870
},
{
"epoch": 0.9849377865094957,
"grad_norm": 0.24734569987361493,
"learning_rate": 4.592173354088291e-09,
"log_odds_chosen": 0.3224944472312927,
"log_odds_ratio": -0.678228497505188,
"logits/chosen": -0.2332799881696701,
"logits/rejected": -0.2256723940372467,
"logps/chosen": -0.8689178228378296,
"logps/rejected": -1.074220061302185,
"loss": 0.6902,
"nll_loss": 0.6795616745948792,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.00868917815387249,
"rewards/margins": 0.002053022850304842,
"rewards/rejected": -0.010742200538516045,
"step": 1880
},
{
"epoch": 0.9901768172888016,
"grad_norm": 0.2370498550298322,
"learning_rate": 1.8980294972025245e-09,
"log_odds_chosen": 0.29929059743881226,
"log_odds_ratio": -0.6822630763053894,
"logits/chosen": -0.21594195067882538,
"logits/rejected": -0.24053767323493958,
"logps/chosen": -0.8797448873519897,
"logps/rejected": -1.052920937538147,
"loss": 0.7147,
"nll_loss": 0.7171341180801392,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.008797448128461838,
"rewards/margins": 0.0017317605670541525,
"rewards/rejected": -0.010529209859669209,
"step": 1890
},
{
"epoch": 0.9954158480681073,
"grad_norm": 0.2673964952852329,
"learning_rate": 3.749466031427451e-10,
"log_odds_chosen": 0.35855141282081604,
"log_odds_ratio": -0.6542550325393677,
"logits/chosen": -0.20227336883544922,
"logits/rejected": -0.20370423793792725,
"logps/chosen": -0.8851898312568665,
"logps/rejected": -1.0999327898025513,
"loss": 0.6723,
"nll_loss": 0.662273108959198,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.008851898834109306,
"rewards/margins": 0.002147428225725889,
"rewards/rejected": -0.010999326594173908,
"step": 1900
},
{
"epoch": 0.9954158480681073,
"eval_log_odds_chosen": 0.316756933927536,
"eval_log_odds_ratio": -0.6615029573440552,
"eval_logits/chosen": -0.23195622861385345,
"eval_logits/rejected": -0.2369166910648346,
"eval_logps/chosen": -0.847005307674408,
"eval_logps/rejected": -1.0492846965789795,
"eval_loss": 0.5733689069747925,
"eval_nll_loss": 0.5669326186180115,
"eval_rewards/accuracies": 0.6069999933242798,
"eval_rewards/chosen": -0.008470052853226662,
"eval_rewards/margins": 0.002022792585194111,
"eval_rewards/rejected": -0.010492845438420773,
"eval_runtime": 280.6123,
"eval_samples_per_second": 7.124,
"eval_steps_per_second": 0.445,
"step": 1900
},
{
"epoch": 0.999607072691552,
"step": 1908,
"total_flos": 0.0,
"train_loss": 0.9266155345884759,
"train_runtime": 40145.686,
"train_samples_per_second": 1.521,
"train_steps_per_second": 0.048
}
],
"logging_steps": 10,
"max_steps": 1908,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}