{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3783224024853298, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002761477390403866, "grad_norm": 0.5198726654052734, "learning_rate": 0.0, "log_odds_chosen": 0.4317269027233124, "log_odds_ratio": -0.5042418837547302, "logits/chosen": -0.5456271767616272, "logits/rejected": -0.10779725015163422, "logps/chosen": -1.998489260673523, "logps/rejected": -2.3799774646759033, "loss": 2.1834, "nll_loss": 2.1329703330993652, "rewards/accuracies": 1.0, "rewards/chosen": -0.199848935008049, "rewards/margins": 0.03814878687262535, "rewards/rejected": -0.23799772560596466, "step": 1 }, { "epoch": 0.005522954780807732, "grad_norm": 0.7381364107131958, "learning_rate": 4.587155963302753e-08, "log_odds_chosen": 0.4411306381225586, "log_odds_ratio": -0.4988963305950165, "logits/chosen": -0.6594648361206055, "logits/rejected": -0.07251911610364914, "logps/chosen": -2.0794589519500732, "logps/rejected": -2.473759889602661, "loss": 2.2463, "nll_loss": 2.196385383605957, "rewards/accuracies": 1.0, "rewards/chosen": -0.20794589817523956, "rewards/margins": 0.039430104196071625, "rewards/rejected": -0.24737600982189178, "step": 2 }, { "epoch": 0.008284432171211598, "grad_norm": 0.6377604603767395, "learning_rate": 9.174311926605506e-08, "log_odds_chosen": 0.5233978033065796, "log_odds_ratio": -0.4696895480155945, "logits/chosen": -0.7436237931251526, "logits/rejected": -0.04083387181162834, "logps/chosen": -2.0129284858703613, "logps/rejected": -2.4808387756347656, "loss": 2.1678, "nll_loss": 2.120811700820923, "rewards/accuracies": 1.0, "rewards/chosen": -0.20129284262657166, "rewards/margins": 0.04679100960493088, "rewards/rejected": -0.24808385968208313, "step": 3 }, { "epoch": 0.011045909561615464, "grad_norm": 0.5183601379394531, "learning_rate": 1.376146788990826e-07, "log_odds_chosen": 0.4738181531429291, "log_odds_ratio": -0.4919201731681824, "logits/chosen": -0.5707842111587524, "logits/rejected": -0.06899966299533844, "logps/chosen": -2.006629467010498, "logps/rejected": -2.429466724395752, "loss": 2.1541, "nll_loss": 2.104935646057129, "rewards/accuracies": 1.0, "rewards/chosen": -0.20066295564174652, "rewards/margins": 0.04228372871875763, "rewards/rejected": -0.24294668436050415, "step": 4 }, { "epoch": 0.013807386952019331, "grad_norm": 0.5362528562545776, "learning_rate": 1.8348623853211012e-07, "log_odds_chosen": 0.47211968898773193, "log_odds_ratio": -0.49660325050354004, "logits/chosen": -0.5831207633018494, "logits/rejected": -0.13220791518688202, "logps/chosen": -1.9650582075119019, "logps/rejected": -2.3817272186279297, "loss": 2.1187, "nll_loss": 2.0690417289733887, "rewards/accuracies": 1.0, "rewards/chosen": -0.1965058296918869, "rewards/margins": 0.04166688770055771, "rewards/rejected": -0.23817269504070282, "step": 5 }, { "epoch": 0.016568864342423197, "grad_norm": 0.5392005443572998, "learning_rate": 2.2935779816513764e-07, "log_odds_chosen": 0.3942825496196747, "log_odds_ratio": -0.518440306186676, "logits/chosen": -0.5348646640777588, "logits/rejected": -0.23154743015766144, "logps/chosen": -2.012246608734131, "logps/rejected": -2.361220359802246, "loss": 2.178, "nll_loss": 2.126155376434326, "rewards/accuracies": 1.0, "rewards/chosen": -0.2012246698141098, "rewards/margins": 0.03489736467599869, "rewards/rejected": -0.2361220419406891, "step": 6 }, { "epoch": 0.019330341732827064, "grad_norm": 0.5474534630775452, "learning_rate": 2.752293577981652e-07, "log_odds_chosen": 0.2622841000556946, "log_odds_ratio": -0.576733410358429, "logits/chosen": -0.5426779985427856, "logits/rejected": -0.2341059297323227, "logps/chosen": -2.074805736541748, "logps/rejected": -2.310166120529175, "loss": 2.2374, "nll_loss": 2.179717779159546, "rewards/accuracies": 0.875, "rewards/chosen": -0.2074805647134781, "rewards/margins": 0.023536043241620064, "rewards/rejected": -0.231016606092453, "step": 7 }, { "epoch": 0.022091819123230928, "grad_norm": 0.5940824747085571, "learning_rate": 3.211009174311927e-07, "log_odds_chosen": 0.41454389691352844, "log_odds_ratio": -0.5088675618171692, "logits/chosen": -0.45266416668891907, "logits/rejected": -0.03741999715566635, "logps/chosen": -2.081907033920288, "logps/rejected": -2.452606439590454, "loss": 2.2474, "nll_loss": 2.19650936126709, "rewards/accuracies": 1.0, "rewards/chosen": -0.2081906944513321, "rewards/margins": 0.037069931626319885, "rewards/rejected": -0.24526062607765198, "step": 8 }, { "epoch": 0.024853296513634795, "grad_norm": 0.5987945795059204, "learning_rate": 3.6697247706422023e-07, "log_odds_chosen": 0.666429340839386, "log_odds_ratio": -0.42349112033843994, "logits/chosen": -0.7504861950874329, "logits/rejected": -0.16368302702903748, "logps/chosen": -1.9702867269515991, "logps/rejected": -2.566361427307129, "loss": 2.1456, "nll_loss": 2.10321307182312, "rewards/accuracies": 1.0, "rewards/chosen": -0.19702866673469543, "rewards/margins": 0.05960746854543686, "rewards/rejected": -0.2566361427307129, "step": 9 }, { "epoch": 0.027614773904038662, "grad_norm": 0.7403894662857056, "learning_rate": 4.128440366972478e-07, "log_odds_chosen": 0.44253280758857727, "log_odds_ratio": -0.502151370048523, "logits/chosen": -0.7380926012992859, "logits/rejected": -0.23479697108268738, "logps/chosen": -2.0925681591033936, "logps/rejected": -2.4903366565704346, "loss": 2.2572, "nll_loss": 2.206953287124634, "rewards/accuracies": 1.0, "rewards/chosen": -0.20925681293010712, "rewards/margins": 0.03977686166763306, "rewards/rejected": -0.24903367459774017, "step": 10 }, { "epoch": 0.030376251294442526, "grad_norm": 0.6613215208053589, "learning_rate": 4.587155963302753e-07, "log_odds_chosen": 0.5335031151771545, "log_odds_ratio": -0.4685918688774109, "logits/chosen": -0.6707795262336731, "logits/rejected": -0.13829460740089417, "logps/chosen": -2.0549967288970947, "logps/rejected": -2.5354185104370117, "loss": 2.2247, "nll_loss": 2.1778128147125244, "rewards/accuracies": 1.0, "rewards/chosen": -0.20549967885017395, "rewards/margins": 0.04804220795631409, "rewards/rejected": -0.25354188680648804, "step": 11 }, { "epoch": 0.03313772868484639, "grad_norm": 0.5580596327781677, "learning_rate": 5.045871559633028e-07, "log_odds_chosen": 0.4023962616920471, "log_odds_ratio": -0.5220973491668701, "logits/chosen": -0.5904384851455688, "logits/rejected": 0.0007063774392008781, "logps/chosen": -1.958735704421997, "logps/rejected": -2.3157620429992676, "loss": 2.1455, "nll_loss": 2.09328293800354, "rewards/accuracies": 0.75, "rewards/chosen": -0.19587357342243195, "rewards/margins": 0.03570263832807541, "rewards/rejected": -0.23157618939876556, "step": 12 }, { "epoch": 0.03589920607525026, "grad_norm": 0.4629175662994385, "learning_rate": 5.504587155963304e-07, "log_odds_chosen": 0.4922281503677368, "log_odds_ratio": -0.4793952405452728, "logits/chosen": -0.489984929561615, "logits/rejected": -0.08446945250034332, "logps/chosen": -1.8884625434875488, "logps/rejected": -2.318817377090454, "loss": 2.0661, "nll_loss": 2.0181963443756104, "rewards/accuracies": 1.0, "rewards/chosen": -0.18884626030921936, "rewards/margins": 0.043035492300987244, "rewards/rejected": -0.2318817526102066, "step": 13 }, { "epoch": 0.03866068346565413, "grad_norm": 0.6970926523208618, "learning_rate": 5.963302752293579e-07, "log_odds_chosen": 0.46570533514022827, "log_odds_ratio": -0.49952778220176697, "logits/chosen": -0.6894131898880005, "logits/rejected": -0.010050175711512566, "logps/chosen": -2.0864779949188232, "logps/rejected": -2.503685235977173, "loss": 2.2329, "nll_loss": 2.182929277420044, "rewards/accuracies": 0.875, "rewards/chosen": -0.20864778757095337, "rewards/margins": 0.041720740497112274, "rewards/rejected": -0.25036853551864624, "step": 14 }, { "epoch": 0.04142216085605799, "grad_norm": 0.7035249471664429, "learning_rate": 6.422018348623854e-07, "log_odds_chosen": 0.5717816948890686, "log_odds_ratio": -0.44831815361976624, "logits/chosen": -0.7732019424438477, "logits/rejected": 0.13750173151493073, "logps/chosen": -2.0055058002471924, "logps/rejected": -2.513883590698242, "loss": 2.1667, "nll_loss": 2.1218552589416504, "rewards/accuracies": 1.0, "rewards/chosen": -0.20055057108402252, "rewards/margins": 0.05083777755498886, "rewards/rejected": -0.2513883411884308, "step": 15 }, { "epoch": 0.044183638246461855, "grad_norm": 0.489059180021286, "learning_rate": 6.880733944954129e-07, "log_odds_chosen": 0.45178472995758057, "log_odds_ratio": -0.501604437828064, "logits/chosen": -0.6046928763389587, "logits/rejected": 0.05104057490825653, "logps/chosen": -1.8943061828613281, "logps/rejected": -2.2864151000976562, "loss": 2.0731, "nll_loss": 2.022980213165283, "rewards/accuracies": 0.875, "rewards/chosen": -0.18943063914775848, "rewards/margins": 0.03921087831258774, "rewards/rejected": -0.22864152491092682, "step": 16 }, { "epoch": 0.04694511563686572, "grad_norm": 0.5798972249031067, "learning_rate": 7.339449541284405e-07, "log_odds_chosen": 0.521186113357544, "log_odds_ratio": -0.4737667441368103, "logits/chosen": -0.5957604646682739, "logits/rejected": 0.05874314904212952, "logps/chosen": -1.9864083528518677, "logps/rejected": -2.4512085914611816, "loss": 2.1331, "nll_loss": 2.085712194442749, "rewards/accuracies": 1.0, "rewards/chosen": -0.1986408233642578, "rewards/margins": 0.04648003727197647, "rewards/rejected": -0.24512089788913727, "step": 17 }, { "epoch": 0.04970659302726959, "grad_norm": 0.602294921875, "learning_rate": 7.79816513761468e-07, "log_odds_chosen": 0.6343832612037659, "log_odds_ratio": -0.42983177304267883, "logits/chosen": -0.6467751264572144, "logits/rejected": -0.0914345234632492, "logps/chosen": -2.020120143890381, "logps/rejected": -2.5913381576538086, "loss": 2.1654, "nll_loss": 2.1224629878997803, "rewards/accuracies": 1.0, "rewards/chosen": -0.2020120471715927, "rewards/margins": 0.05712177976965904, "rewards/rejected": -0.25913381576538086, "step": 18 }, { "epoch": 0.05246807041767346, "grad_norm": 0.6167095303535461, "learning_rate": 8.256880733944956e-07, "log_odds_chosen": 0.23837895691394806, "log_odds_ratio": -0.5851647853851318, "logits/chosen": -0.6367707848548889, "logits/rejected": -0.06072646751999855, "logps/chosen": -2.055112838745117, "logps/rejected": -2.2656140327453613, "loss": 2.2217, "nll_loss": 2.163205146789551, "rewards/accuracies": 1.0, "rewards/chosen": -0.20551128685474396, "rewards/margins": 0.021050114184617996, "rewards/rejected": -0.22656141221523285, "step": 19 }, { "epoch": 0.055229547808077324, "grad_norm": 0.5270997285842896, "learning_rate": 8.71559633027523e-07, "log_odds_chosen": 0.4399060010910034, "log_odds_ratio": -0.5041620135307312, "logits/chosen": -0.5500829815864563, "logits/rejected": 0.014720816165208817, "logps/chosen": -1.9720858335494995, "logps/rejected": -2.3600573539733887, "loss": 2.1571, "nll_loss": 2.1066458225250244, "rewards/accuracies": 1.0, "rewards/chosen": -0.19720861315727234, "rewards/margins": 0.038797151297330856, "rewards/rejected": -0.2360057532787323, "step": 20 }, { "epoch": 0.057991025198481184, "grad_norm": 0.639258086681366, "learning_rate": 9.174311926605506e-07, "log_odds_chosen": 0.35250768065452576, "log_odds_ratio": -0.5405789017677307, "logits/chosen": -0.45625555515289307, "logits/rejected": -0.03890611231327057, "logps/chosen": -2.1359739303588867, "logps/rejected": -2.454291820526123, "loss": 2.2807, "nll_loss": 2.2266433238983154, "rewards/accuracies": 0.75, "rewards/chosen": -0.21359741687774658, "rewards/margins": 0.0318317785859108, "rewards/rejected": -0.2454291731119156, "step": 21 }, { "epoch": 0.06075250258888505, "grad_norm": 0.5805932283401489, "learning_rate": 9.633027522935782e-07, "log_odds_chosen": 0.5011261105537415, "log_odds_ratio": -0.4887810945510864, "logits/chosen": -0.6073933839797974, "logits/rejected": -0.18206848204135895, "logps/chosen": -1.9807653427124023, "logps/rejected": -2.4255237579345703, "loss": 2.1351, "nll_loss": 2.0862059593200684, "rewards/accuracies": 0.875, "rewards/chosen": -0.1980765163898468, "rewards/margins": 0.044475845992565155, "rewards/rejected": -0.24255238473415375, "step": 22 }, { "epoch": 0.06351397997928893, "grad_norm": 0.5500138998031616, "learning_rate": 1.0091743119266057e-06, "log_odds_chosen": 0.3974594175815582, "log_odds_ratio": -0.5229506492614746, "logits/chosen": -0.5682663321495056, "logits/rejected": -0.3609941005706787, "logps/chosen": -1.963416576385498, "logps/rejected": -2.318310499191284, "loss": 2.1453, "nll_loss": 2.0929956436157227, "rewards/accuracies": 1.0, "rewards/chosen": -0.19634166359901428, "rewards/margins": 0.03548937663435936, "rewards/rejected": -0.23183104395866394, "step": 23 }, { "epoch": 0.06627545736969279, "grad_norm": 0.5702619552612305, "learning_rate": 1.055045871559633e-06, "log_odds_chosen": 0.522091805934906, "log_odds_ratio": -0.4669547379016876, "logits/chosen": -0.5776705741882324, "logits/rejected": 0.03815801814198494, "logps/chosen": -1.9660505056381226, "logps/rejected": -2.429518461227417, "loss": 2.1353, "nll_loss": 2.0886194705963135, "rewards/accuracies": 1.0, "rewards/chosen": -0.19660505652427673, "rewards/margins": 0.04634679853916168, "rewards/rejected": -0.24295185506343842, "step": 24 }, { "epoch": 0.06903693476009665, "grad_norm": 0.6908774375915527, "learning_rate": 1.1009174311926608e-06, "log_odds_chosen": 0.6555817723274231, "log_odds_ratio": -0.42638978362083435, "logits/chosen": -0.6369448304176331, "logits/rejected": -0.0680353045463562, "logps/chosen": -1.996623158454895, "logps/rejected": -2.585740327835083, "loss": 2.157, "nll_loss": 2.1143739223480225, "rewards/accuracies": 1.0, "rewards/chosen": -0.19966234266757965, "rewards/margins": 0.058911725878715515, "rewards/rejected": -0.25857406854629517, "step": 25 }, { "epoch": 0.07179841215050052, "grad_norm": 0.6247107982635498, "learning_rate": 1.1467889908256882e-06, "log_odds_chosen": 0.37967437505722046, "log_odds_ratio": -0.528645396232605, "logits/chosen": -0.5880488157272339, "logits/rejected": -0.11361770331859589, "logps/chosen": -2.0194289684295654, "logps/rejected": -2.3577256202697754, "loss": 2.1956, "nll_loss": 2.142758369445801, "rewards/accuracies": 1.0, "rewards/chosen": -0.20194292068481445, "rewards/margins": 0.03382965177297592, "rewards/rejected": -0.23577255010604858, "step": 26 }, { "epoch": 0.07455988954090438, "grad_norm": 0.44772788882255554, "learning_rate": 1.1926605504587159e-06, "log_odds_chosen": 0.4675738513469696, "log_odds_ratio": -0.4964170753955841, "logits/chosen": -0.434501588344574, "logits/rejected": -0.17547550797462463, "logps/chosen": -1.865759253501892, "logps/rejected": -2.272789239883423, "loss": 2.0462, "nll_loss": 1.996600866317749, "rewards/accuracies": 1.0, "rewards/chosen": -0.18657593429088593, "rewards/margins": 0.04070296883583069, "rewards/rejected": -0.2272789031267166, "step": 27 }, { "epoch": 0.07732136693130826, "grad_norm": 0.5303645730018616, "learning_rate": 1.2385321100917433e-06, "log_odds_chosen": 0.5267918109893799, "log_odds_ratio": -0.46872708201408386, "logits/chosen": -0.5293477773666382, "logits/rejected": -0.20712637901306152, "logps/chosen": -1.9557033777236938, "logps/rejected": -2.422588586807251, "loss": 2.123, "nll_loss": 2.0761377811431885, "rewards/accuracies": 1.0, "rewards/chosen": -0.19557033479213715, "rewards/margins": 0.04668852686882019, "rewards/rejected": -0.24225887656211853, "step": 28 }, { "epoch": 0.08008284432171212, "grad_norm": 0.558870792388916, "learning_rate": 1.2844036697247707e-06, "log_odds_chosen": 0.4761933982372284, "log_odds_ratio": -0.49172335863113403, "logits/chosen": -0.5614610314369202, "logits/rejected": -0.03958575427532196, "logps/chosen": -2.044667959213257, "logps/rejected": -2.4699392318725586, "loss": 2.2024, "nll_loss": 2.153231143951416, "rewards/accuracies": 1.0, "rewards/chosen": -0.2044668048620224, "rewards/margins": 0.042527101933956146, "rewards/rejected": -0.24699391424655914, "step": 29 }, { "epoch": 0.08284432171211598, "grad_norm": 0.6042240262031555, "learning_rate": 1.3302752293577984e-06, "log_odds_chosen": 0.6456740498542786, "log_odds_ratio": -0.4289007782936096, "logits/chosen": -0.5478062629699707, "logits/rejected": -0.0002663079649209976, "logps/chosen": -2.02502703666687, "logps/rejected": -2.60587477684021, "loss": 2.1737, "nll_loss": 2.1308515071868896, "rewards/accuracies": 1.0, "rewards/chosen": -0.20250271260738373, "rewards/margins": 0.058084748685359955, "rewards/rejected": -0.2605874538421631, "step": 30 }, { "epoch": 0.08560579910251985, "grad_norm": 0.6082971692085266, "learning_rate": 1.3761467889908258e-06, "log_odds_chosen": 0.5572243928909302, "log_odds_ratio": -0.45846420526504517, "logits/chosen": -0.5786283016204834, "logits/rejected": -0.06976839900016785, "logps/chosen": -2.0147552490234375, "logps/rejected": -2.5146052837371826, "loss": 2.1834, "nll_loss": 2.1375184059143066, "rewards/accuracies": 1.0, "rewards/chosen": -0.20147554576396942, "rewards/margins": 0.049984999001026154, "rewards/rejected": -0.2514605224132538, "step": 31 }, { "epoch": 0.08836727649292371, "grad_norm": 0.4792933762073517, "learning_rate": 1.4220183486238535e-06, "log_odds_chosen": 0.43500036001205444, "log_odds_ratio": -0.5046517252922058, "logits/chosen": -0.4258999824523926, "logits/rejected": 0.003258749842643738, "logps/chosen": -2.011561870574951, "logps/rejected": -2.398580312728882, "loss": 2.1831, "nll_loss": 2.132627010345459, "rewards/accuracies": 1.0, "rewards/chosen": -0.20115619897842407, "rewards/margins": 0.03870181739330292, "rewards/rejected": -0.239858016371727, "step": 32 }, { "epoch": 0.09112875388332758, "grad_norm": 0.5385621786117554, "learning_rate": 1.467889908256881e-06, "log_odds_chosen": 0.573083758354187, "log_odds_ratio": -0.4524250328540802, "logits/chosen": -0.4552776515483856, "logits/rejected": -0.06403280049562454, "logps/chosen": -1.9241302013397217, "logps/rejected": -2.4318630695343018, "loss": 2.0919, "nll_loss": 2.046658754348755, "rewards/accuracies": 1.0, "rewards/chosen": -0.19241301715373993, "rewards/margins": 0.05077328532934189, "rewards/rejected": -0.24318630993366241, "step": 33 }, { "epoch": 0.09389023127373144, "grad_norm": 0.4870143234729767, "learning_rate": 1.5137614678899084e-06, "log_odds_chosen": 0.6046110391616821, "log_odds_ratio": -0.4510130286216736, "logits/chosen": -0.40763065218925476, "logits/rejected": -0.08770006895065308, "logps/chosen": -1.922775387763977, "logps/rejected": -2.462465524673462, "loss": 2.0741, "nll_loss": 2.028975009918213, "rewards/accuracies": 1.0, "rewards/chosen": -0.19227753579616547, "rewards/margins": 0.053969040513038635, "rewards/rejected": -0.2462465912103653, "step": 34 }, { "epoch": 0.09665170866413532, "grad_norm": 0.6609561443328857, "learning_rate": 1.559633027522936e-06, "log_odds_chosen": 0.40986764430999756, "log_odds_ratio": -0.5126968026161194, "logits/chosen": -0.8081121444702148, "logits/rejected": -0.20409950613975525, "logps/chosen": -1.9795390367507935, "logps/rejected": -2.3391332626342773, "loss": 2.152, "nll_loss": 2.1007797718048096, "rewards/accuracies": 1.0, "rewards/chosen": -0.19795390963554382, "rewards/margins": 0.03595941513776779, "rewards/rejected": -0.23391331732273102, "step": 35 }, { "epoch": 0.09941318605453918, "grad_norm": 0.6118403673171997, "learning_rate": 1.6055045871559635e-06, "log_odds_chosen": 0.603810727596283, "log_odds_ratio": -0.4452260434627533, "logits/chosen": -0.6869708299636841, "logits/rejected": 0.01393081247806549, "logps/chosen": -1.9751290082931519, "logps/rejected": -2.515267848968506, "loss": 2.1348, "nll_loss": 2.0902533531188965, "rewards/accuracies": 1.0, "rewards/chosen": -0.1975128948688507, "rewards/margins": 0.05401391163468361, "rewards/rejected": -0.251526802778244, "step": 36 }, { "epoch": 0.10217466344494304, "grad_norm": 0.6636885404586792, "learning_rate": 1.6513761467889911e-06, "log_odds_chosen": 0.5960386991500854, "log_odds_ratio": -0.44556429982185364, "logits/chosen": -0.5324239134788513, "logits/rejected": -0.1962709128856659, "logps/chosen": -2.1164088249206543, "logps/rejected": -2.6588618755340576, "loss": 2.2904, "nll_loss": 2.2458741664886475, "rewards/accuracies": 1.0, "rewards/chosen": -0.2116408795118332, "rewards/margins": 0.054245319217443466, "rewards/rejected": -0.26588618755340576, "step": 37 }, { "epoch": 0.10493614083534691, "grad_norm": 0.5414693355560303, "learning_rate": 1.6972477064220186e-06, "log_odds_chosen": 0.4305243492126465, "log_odds_ratio": -0.5040389895439148, "logits/chosen": -0.5475431680679321, "logits/rejected": -0.15733303129673004, "logps/chosen": -1.9466073513031006, "logps/rejected": -2.3275275230407715, "loss": 2.1303, "nll_loss": 2.079880475997925, "rewards/accuracies": 1.0, "rewards/chosen": -0.1946607381105423, "rewards/margins": 0.03809202462434769, "rewards/rejected": -0.2327527403831482, "step": 38 }, { "epoch": 0.10769761822575077, "grad_norm": 0.7092136740684509, "learning_rate": 1.743119266055046e-06, "log_odds_chosen": 0.49458950757980347, "log_odds_ratio": -0.499477356672287, "logits/chosen": -0.47480347752571106, "logits/rejected": 0.009076721966266632, "logps/chosen": -2.077540159225464, "logps/rejected": -2.527543067932129, "loss": 2.2296, "nll_loss": 2.1796536445617676, "rewards/accuracies": 0.875, "rewards/chosen": -0.20775403082370758, "rewards/margins": 0.04500027373433113, "rewards/rejected": -0.2527543008327484, "step": 39 }, { "epoch": 0.11045909561615465, "grad_norm": 0.7840990424156189, "learning_rate": 1.7889908256880737e-06, "log_odds_chosen": 0.27720558643341064, "log_odds_ratio": -0.5738192796707153, "logits/chosen": -0.6754797101020813, "logits/rejected": -0.1666824221611023, "logps/chosen": -2.097449779510498, "logps/rejected": -2.346257448196411, "loss": 2.2706, "nll_loss": 2.2131741046905518, "rewards/accuracies": 0.75, "rewards/chosen": -0.20974498987197876, "rewards/margins": 0.02488076500594616, "rewards/rejected": -0.23462577164173126, "step": 40 }, { "epoch": 0.11322057300655851, "grad_norm": 0.49594300985336304, "learning_rate": 1.8348623853211011e-06, "log_odds_chosen": 0.4892931878566742, "log_odds_ratio": -0.4875307083129883, "logits/chosen": -0.5313754081726074, "logits/rejected": -0.11813461780548096, "logps/chosen": -1.9190640449523926, "logps/rejected": -2.3516781330108643, "loss": 2.0847, "nll_loss": 2.035940647125244, "rewards/accuracies": 1.0, "rewards/chosen": -0.1919064074754715, "rewards/margins": 0.043261416256427765, "rewards/rejected": -0.23516784608364105, "step": 41 }, { "epoch": 0.11598205039696237, "grad_norm": 0.6706519722938538, "learning_rate": 1.8807339449541288e-06, "log_odds_chosen": 0.3553638756275177, "log_odds_ratio": -0.5406981706619263, "logits/chosen": -0.6044270992279053, "logits/rejected": -0.1877760887145996, "logps/chosen": -2.110844850540161, "logps/rejected": -2.4292314052581787, "loss": 2.2811, "nll_loss": 2.2269935607910156, "rewards/accuracies": 0.875, "rewards/chosen": -0.2110844999551773, "rewards/margins": 0.031838610768318176, "rewards/rejected": -0.24292311072349548, "step": 42 }, { "epoch": 0.11874352778736624, "grad_norm": 0.569320023059845, "learning_rate": 1.9266055045871564e-06, "log_odds_chosen": 0.4219287633895874, "log_odds_ratio": -0.5136047005653381, "logits/chosen": -0.564371645450592, "logits/rejected": -0.1906774342060089, "logps/chosen": -1.9711859226226807, "logps/rejected": -2.341031312942505, "loss": 2.1533, "nll_loss": 2.101966381072998, "rewards/accuracies": 0.875, "rewards/chosen": -0.1971185803413391, "rewards/margins": 0.036984533071517944, "rewards/rejected": -0.23410312831401825, "step": 43 }, { "epoch": 0.1215050051777701, "grad_norm": 0.6019275188446045, "learning_rate": 1.9724770642201837e-06, "log_odds_chosen": 0.3504191040992737, "log_odds_ratio": -0.5448323488235474, "logits/chosen": -0.5221514105796814, "logits/rejected": -0.2111213058233261, "logps/chosen": -2.0630786418914795, "logps/rejected": -2.3758115768432617, "loss": 2.226, "nll_loss": 2.1714861392974854, "rewards/accuracies": 0.75, "rewards/chosen": -0.20630787312984467, "rewards/margins": 0.03127329424023628, "rewards/rejected": -0.23758116364479065, "step": 44 }, { "epoch": 0.12426648256817398, "grad_norm": 0.5997087955474854, "learning_rate": 2.0183486238532113e-06, "log_odds_chosen": 0.6440756916999817, "log_odds_ratio": -0.42918679118156433, "logits/chosen": -0.6537622213363647, "logits/rejected": -0.3867274224758148, "logps/chosen": -1.964981198310852, "logps/rejected": -2.541144371032715, "loss": 2.1164, "nll_loss": 2.073434352874756, "rewards/accuracies": 1.0, "rewards/chosen": -0.1964981108903885, "rewards/margins": 0.057616353034973145, "rewards/rejected": -0.25411444902420044, "step": 45 }, { "epoch": 0.12702795995857785, "grad_norm": 0.5181168913841248, "learning_rate": 2.064220183486239e-06, "log_odds_chosen": 0.564142107963562, "log_odds_ratio": -0.4582359790802002, "logits/chosen": -0.5351958870887756, "logits/rejected": -0.1501241773366928, "logps/chosen": -1.84321928024292, "logps/rejected": -2.331892967224121, "loss": 2.0016, "nll_loss": 1.9557558298110962, "rewards/accuracies": 1.0, "rewards/chosen": -0.18432192504405975, "rewards/margins": 0.04886738583445549, "rewards/rejected": -0.23318932950496674, "step": 46 }, { "epoch": 0.1297894373489817, "grad_norm": 0.5089595913887024, "learning_rate": 2.110091743119266e-06, "log_odds_chosen": 0.4004448652267456, "log_odds_ratio": -0.5262866616249084, "logits/chosen": -0.4425760507583618, "logits/rejected": -0.17353124916553497, "logps/chosen": -2.045093059539795, "logps/rejected": -2.4056942462921143, "loss": 2.2153, "nll_loss": 2.162646770477295, "rewards/accuracies": 0.875, "rewards/chosen": -0.20450931787490845, "rewards/margins": 0.03606009483337402, "rewards/rejected": -0.24056941270828247, "step": 47 }, { "epoch": 0.13255091473938557, "grad_norm": 0.6208946108818054, "learning_rate": 2.155963302752294e-06, "log_odds_chosen": 0.18348652124404907, "log_odds_ratio": -0.6307563781738281, "logits/chosen": -0.45777827501296997, "logits/rejected": -0.11396709084510803, "logps/chosen": -2.1183199882507324, "logps/rejected": -2.2930657863616943, "loss": 2.2819, "nll_loss": 2.21886944770813, "rewards/accuracies": 0.875, "rewards/chosen": -0.21183200180530548, "rewards/margins": 0.01747458055615425, "rewards/rejected": -0.22930656373500824, "step": 48 }, { "epoch": 0.13531239212978943, "grad_norm": 0.6294072866439819, "learning_rate": 2.2018348623853215e-06, "log_odds_chosen": 0.6346040964126587, "log_odds_ratio": -0.43739554286003113, "logits/chosen": -0.542172908782959, "logits/rejected": -0.039900042116642, "logps/chosen": -2.052166223526001, "logps/rejected": -2.628195285797119, "loss": 2.2026, "nll_loss": 2.1588120460510254, "rewards/accuracies": 1.0, "rewards/chosen": -0.205216646194458, "rewards/margins": 0.05760287865996361, "rewards/rejected": -0.2628195285797119, "step": 49 }, { "epoch": 0.1380738695201933, "grad_norm": 0.6679416298866272, "learning_rate": 2.2477064220183487e-06, "log_odds_chosen": 0.5639493465423584, "log_odds_ratio": -0.47183454036712646, "logits/chosen": -0.6221768856048584, "logits/rejected": -0.0222525242716074, "logps/chosen": -2.010540008544922, "logps/rejected": -2.51875901222229, "loss": 2.1569, "nll_loss": 2.1097309589385986, "rewards/accuracies": 0.875, "rewards/chosen": -0.20105400681495667, "rewards/margins": 0.05082192271947861, "rewards/rejected": -0.25187593698501587, "step": 50 }, { "epoch": 0.14083534691059718, "grad_norm": 0.6739494800567627, "learning_rate": 2.2935779816513764e-06, "log_odds_chosen": 0.4806385040283203, "log_odds_ratio": -0.486289381980896, "logits/chosen": -0.5340446829795837, "logits/rejected": -0.3175644278526306, "logps/chosen": -2.032139301300049, "logps/rejected": -2.4611167907714844, "loss": 2.1976, "nll_loss": 2.1489548683166504, "rewards/accuracies": 1.0, "rewards/chosen": -0.2032139152288437, "rewards/margins": 0.04289776086807251, "rewards/rejected": -0.2461116760969162, "step": 51 }, { "epoch": 0.14359682430100104, "grad_norm": 0.5641347765922546, "learning_rate": 2.339449541284404e-06, "log_odds_chosen": 0.4302893579006195, "log_odds_ratio": -0.5059407353401184, "logits/chosen": -0.4652354121208191, "logits/rejected": -0.14794279634952545, "logps/chosen": -1.98146390914917, "logps/rejected": -2.362124443054199, "loss": 2.1395, "nll_loss": 2.0888633728027344, "rewards/accuracies": 0.875, "rewards/chosen": -0.19814638793468475, "rewards/margins": 0.0380660817027092, "rewards/rejected": -0.23621246218681335, "step": 52 }, { "epoch": 0.1463583016914049, "grad_norm": 0.6226781606674194, "learning_rate": 2.3853211009174317e-06, "log_odds_chosen": 0.38262999057769775, "log_odds_ratio": -0.5264566540718079, "logits/chosen": -0.601564884185791, "logits/rejected": -0.23214676976203918, "logps/chosen": -1.9446452856063843, "logps/rejected": -2.2811543941497803, "loss": 2.1236, "nll_loss": 2.070988178253174, "rewards/accuracies": 0.875, "rewards/chosen": -0.1944645196199417, "rewards/margins": 0.03365091234445572, "rewards/rejected": -0.22811545431613922, "step": 53 }, { "epoch": 0.14911977908180876, "grad_norm": 0.576812207698822, "learning_rate": 2.431192660550459e-06, "log_odds_chosen": 0.3935718238353729, "log_odds_ratio": -0.5239484310150146, "logits/chosen": -0.5574474930763245, "logits/rejected": -0.1828337013721466, "logps/chosen": -1.9322319030761719, "logps/rejected": -2.280305862426758, "loss": 2.1135, "nll_loss": 2.061114549636841, "rewards/accuracies": 1.0, "rewards/chosen": -0.19322317838668823, "rewards/margins": 0.03480737283825874, "rewards/rejected": -0.22803056240081787, "step": 54 }, { "epoch": 0.15188125647221262, "grad_norm": 0.5661758184432983, "learning_rate": 2.4770642201834866e-06, "log_odds_chosen": 0.39002981781959534, "log_odds_ratio": -0.5207180380821228, "logits/chosen": -0.6281000971794128, "logits/rejected": -0.12741385400295258, "logps/chosen": -1.9887436628341675, "logps/rejected": -2.3331456184387207, "loss": 2.1681, "nll_loss": 2.115997314453125, "rewards/accuracies": 1.0, "rewards/chosen": -0.198874369263649, "rewards/margins": 0.03444019705057144, "rewards/rejected": -0.23331457376480103, "step": 55 }, { "epoch": 0.1546427338626165, "grad_norm": 0.6655378937721252, "learning_rate": 2.522935779816514e-06, "log_odds_chosen": 0.6584038138389587, "log_odds_ratio": -0.427642822265625, "logits/chosen": -0.6279682517051697, "logits/rejected": -0.12986338138580322, "logps/chosen": -1.9852056503295898, "logps/rejected": -2.5761494636535645, "loss": 2.1584, "nll_loss": 2.115626335144043, "rewards/accuracies": 1.0, "rewards/chosen": -0.19852055609226227, "rewards/margins": 0.059094399213790894, "rewards/rejected": -0.25761497020721436, "step": 56 }, { "epoch": 0.15740421125302037, "grad_norm": 0.48872971534729004, "learning_rate": 2.5688073394495415e-06, "log_odds_chosen": 0.42161181569099426, "log_odds_ratio": -0.514995276927948, "logits/chosen": -0.49338769912719727, "logits/rejected": -0.07346588373184204, "logps/chosen": -1.8429884910583496, "logps/rejected": -2.2116591930389404, "loss": 2.014, "nll_loss": 1.9625122547149658, "rewards/accuracies": 0.875, "rewards/chosen": -0.18429884314537048, "rewards/margins": 0.036867089569568634, "rewards/rejected": -0.22116592526435852, "step": 57 }, { "epoch": 0.16016568864342423, "grad_norm": 0.6582737565040588, "learning_rate": 2.6146788990825687e-06, "log_odds_chosen": 0.5138990879058838, "log_odds_ratio": -0.4847075939178467, "logits/chosen": -0.5234618186950684, "logits/rejected": -0.12736433744430542, "logps/chosen": -1.9835774898529053, "logps/rejected": -2.4381330013275146, "loss": 2.1515, "nll_loss": 2.1030287742614746, "rewards/accuracies": 0.875, "rewards/chosen": -0.19835779070854187, "rewards/margins": 0.045455530285835266, "rewards/rejected": -0.24381330609321594, "step": 58 }, { "epoch": 0.1629271660338281, "grad_norm": 0.5795738697052002, "learning_rate": 2.6605504587155968e-06, "log_odds_chosen": 0.2905232906341553, "log_odds_ratio": -0.5645147562026978, "logits/chosen": -0.47340482473373413, "logits/rejected": -0.21826684474945068, "logps/chosen": -1.9716663360595703, "logps/rejected": -2.227067470550537, "loss": 2.1514, "nll_loss": 2.0949792861938477, "rewards/accuracies": 1.0, "rewards/chosen": -0.19716663658618927, "rewards/margins": 0.025540094822645187, "rewards/rejected": -0.22270673513412476, "step": 59 }, { "epoch": 0.16568864342423195, "grad_norm": 0.5678736567497253, "learning_rate": 2.706422018348624e-06, "log_odds_chosen": 0.3714035749435425, "log_odds_ratio": -0.5299574136734009, "logits/chosen": -0.45862269401550293, "logits/rejected": 0.08986371755599976, "logps/chosen": -2.004216432571411, "logps/rejected": -2.333292245864868, "loss": 2.1781, "nll_loss": 2.125133514404297, "rewards/accuracies": 0.875, "rewards/chosen": -0.20042164623737335, "rewards/margins": 0.032907579094171524, "rewards/rejected": -0.23332923650741577, "step": 60 }, { "epoch": 0.16845012081463584, "grad_norm": 0.5007438063621521, "learning_rate": 2.7522935779816517e-06, "log_odds_chosen": 0.667113721370697, "log_odds_ratio": -0.4193596839904785, "logits/chosen": -0.5191382765769958, "logits/rejected": -0.19058826565742493, "logps/chosen": -1.917590856552124, "logps/rejected": -2.5098061561584473, "loss": 2.0632, "nll_loss": 2.021275281906128, "rewards/accuracies": 1.0, "rewards/chosen": -0.1917591094970703, "rewards/margins": 0.0592215433716774, "rewards/rejected": -0.2509806454181671, "step": 61 }, { "epoch": 0.1712115982050397, "grad_norm": 0.6429389119148254, "learning_rate": 2.798165137614679e-06, "log_odds_chosen": 0.41257914900779724, "log_odds_ratio": -0.5154306888580322, "logits/chosen": -0.45402857661247253, "logits/rejected": 0.11148527264595032, "logps/chosen": -2.081867218017578, "logps/rejected": -2.45068359375, "loss": 2.2481, "nll_loss": 2.1965107917785645, "rewards/accuracies": 1.0, "rewards/chosen": -0.20818671584129333, "rewards/margins": 0.03688164800405502, "rewards/rejected": -0.24506837129592896, "step": 62 }, { "epoch": 0.17397307559544356, "grad_norm": 0.5388195514678955, "learning_rate": 2.844036697247707e-06, "log_odds_chosen": 0.5105969905853271, "log_odds_ratio": -0.4755265414714813, "logits/chosen": -0.46547558903694153, "logits/rejected": -0.1431473195552826, "logps/chosen": -1.9948811531066895, "logps/rejected": -2.448573589324951, "loss": 2.1454, "nll_loss": 2.0978972911834717, "rewards/accuracies": 1.0, "rewards/chosen": -0.19948811829090118, "rewards/margins": 0.0453692302107811, "rewards/rejected": -0.24485734105110168, "step": 63 }, { "epoch": 0.17673455298584742, "grad_norm": 0.4911787211894989, "learning_rate": 2.8899082568807342e-06, "log_odds_chosen": 0.5747222304344177, "log_odds_ratio": -0.4552631676197052, "logits/chosen": -0.5521761178970337, "logits/rejected": -0.294252872467041, "logps/chosen": -1.9650574922561646, "logps/rejected": -2.4793307781219482, "loss": 2.1135, "nll_loss": 2.067988634109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.19650575518608093, "rewards/margins": 0.051427341997623444, "rewards/rejected": -0.24793308973312378, "step": 64 }, { "epoch": 0.17949603037625128, "grad_norm": 0.533399760723114, "learning_rate": 2.935779816513762e-06, "log_odds_chosen": 0.3648831844329834, "log_odds_ratio": -0.5328130722045898, "logits/chosen": -0.4136759638786316, "logits/rejected": -0.046070147305727005, "logps/chosen": -1.8867706060409546, "logps/rejected": -2.202749729156494, "loss": 2.0708, "nll_loss": 2.0175259113311768, "rewards/accuracies": 1.0, "rewards/chosen": -0.18867707252502441, "rewards/margins": 0.031597916036844254, "rewards/rejected": -0.22027498483657837, "step": 65 }, { "epoch": 0.18225750776665517, "grad_norm": 0.5182326436042786, "learning_rate": 2.981651376146789e-06, "log_odds_chosen": 0.5400457382202148, "log_odds_ratio": -0.48501351475715637, "logits/chosen": -0.6035399436950684, "logits/rejected": -0.1548141986131668, "logps/chosen": -1.8529176712036133, "logps/rejected": -2.325399398803711, "loss": 2.022, "nll_loss": 1.973459243774414, "rewards/accuracies": 0.875, "rewards/chosen": -0.18529179692268372, "rewards/margins": 0.04724816977977753, "rewards/rejected": -0.23253995180130005, "step": 66 }, { "epoch": 0.18501898515705903, "grad_norm": 0.5337976813316345, "learning_rate": 3.0275229357798168e-06, "log_odds_chosen": 0.33741873502731323, "log_odds_ratio": -0.5426305532455444, "logits/chosen": -0.39523714780807495, "logits/rejected": 0.0008284337818622589, "logps/chosen": -1.997262716293335, "logps/rejected": -2.2943520545959473, "loss": 2.1433, "nll_loss": 2.0890326499938965, "rewards/accuracies": 1.0, "rewards/chosen": -0.19972628355026245, "rewards/margins": 0.029708925634622574, "rewards/rejected": -0.22943520545959473, "step": 67 }, { "epoch": 0.1877804625474629, "grad_norm": 0.5520625114440918, "learning_rate": 3.073394495412844e-06, "log_odds_chosen": 0.4403454065322876, "log_odds_ratio": -0.5091642141342163, "logits/chosen": -0.24377387762069702, "logits/rejected": 0.08138424903154373, "logps/chosen": -2.0106263160705566, "logps/rejected": -2.402209758758545, "loss": 2.2053, "nll_loss": 2.1543631553649902, "rewards/accuracies": 1.0, "rewards/chosen": -0.2010626494884491, "rewards/margins": 0.039158351719379425, "rewards/rejected": -0.24022099375724792, "step": 68 }, { "epoch": 0.19054193993786675, "grad_norm": 0.650439441204071, "learning_rate": 3.119266055045872e-06, "log_odds_chosen": 0.5253904461860657, "log_odds_ratio": -0.4741411507129669, "logits/chosen": -0.4677438735961914, "logits/rejected": -0.013416798785328865, "logps/chosen": -2.077601909637451, "logps/rejected": -2.5524415969848633, "loss": 2.2423, "nll_loss": 2.194887161254883, "rewards/accuracies": 1.0, "rewards/chosen": -0.20776019990444183, "rewards/margins": 0.04748394340276718, "rewards/rejected": -0.2552441358566284, "step": 69 }, { "epoch": 0.19330341732827064, "grad_norm": 0.48958319425582886, "learning_rate": 3.1651376146788993e-06, "log_odds_chosen": 0.5917057991027832, "log_odds_ratio": -0.45470088720321655, "logits/chosen": -0.36797553300857544, "logits/rejected": -0.3181126117706299, "logps/chosen": -2.022463321685791, "logps/rejected": -2.5544497966766357, "loss": 2.1697, "nll_loss": 2.124249219894409, "rewards/accuracies": 1.0, "rewards/chosen": -0.20224635303020477, "rewards/margins": 0.05319864675402641, "rewards/rejected": -0.2554450035095215, "step": 70 }, { "epoch": 0.1960648947186745, "grad_norm": 0.6293954253196716, "learning_rate": 3.211009174311927e-06, "log_odds_chosen": 0.44158488512039185, "log_odds_ratio": -0.5029429793357849, "logits/chosen": -0.4253910481929779, "logits/rejected": -0.14389574527740479, "logps/chosen": -1.9857234954833984, "logps/rejected": -2.379268169403076, "loss": 2.17, "nll_loss": 2.119678497314453, "rewards/accuracies": 1.0, "rewards/chosen": -0.19857235252857208, "rewards/margins": 0.03935447335243225, "rewards/rejected": -0.23792681097984314, "step": 71 }, { "epoch": 0.19882637210907836, "grad_norm": 0.6024268269538879, "learning_rate": 3.256880733944954e-06, "log_odds_chosen": 0.5992636680603027, "log_odds_ratio": -0.4583205580711365, "logits/chosen": -0.3624473810195923, "logits/rejected": -0.16807770729064941, "logps/chosen": -1.8936195373535156, "logps/rejected": -2.403137683868408, "loss": 2.0566, "nll_loss": 2.0107438564300537, "rewards/accuracies": 0.875, "rewards/chosen": -0.18936194479465485, "rewards/margins": 0.05095181241631508, "rewards/rejected": -0.24031376838684082, "step": 72 }, { "epoch": 0.20158784949948222, "grad_norm": 0.537467360496521, "learning_rate": 3.3027522935779823e-06, "log_odds_chosen": 0.38986673951148987, "log_odds_ratio": -0.518619179725647, "logits/chosen": -0.28752538561820984, "logits/rejected": -0.08893117308616638, "logps/chosen": -2.056885242462158, "logps/rejected": -2.4047675132751465, "loss": 2.2167, "nll_loss": 2.1648688316345215, "rewards/accuracies": 1.0, "rewards/chosen": -0.2056885063648224, "rewards/margins": 0.03478822112083435, "rewards/rejected": -0.24047674238681793, "step": 73 }, { "epoch": 0.20434932688988608, "grad_norm": 0.48359447717666626, "learning_rate": 3.3486238532110095e-06, "log_odds_chosen": 0.39413708448410034, "log_odds_ratio": -0.5191097259521484, "logits/chosen": -0.28818562626838684, "logits/rejected": -0.17117023468017578, "logps/chosen": -1.9585490226745605, "logps/rejected": -2.3046271800994873, "loss": 2.1453, "nll_loss": 2.093356132507324, "rewards/accuracies": 1.0, "rewards/chosen": -0.19585487246513367, "rewards/margins": 0.03460782766342163, "rewards/rejected": -0.2304627150297165, "step": 74 }, { "epoch": 0.20711080428028997, "grad_norm": 0.6197869777679443, "learning_rate": 3.394495412844037e-06, "log_odds_chosen": 0.4853059649467468, "log_odds_ratio": -0.487282931804657, "logits/chosen": -0.4553055763244629, "logits/rejected": -0.07368504256010056, "logps/chosen": -1.9564940929412842, "logps/rejected": -2.38771653175354, "loss": 2.1426, "nll_loss": 2.093902826309204, "rewards/accuracies": 1.0, "rewards/chosen": -0.1956494152545929, "rewards/margins": 0.04312223941087723, "rewards/rejected": -0.23877164721488953, "step": 75 }, { "epoch": 0.20987228167069383, "grad_norm": 0.5478652119636536, "learning_rate": 3.4403669724770644e-06, "log_odds_chosen": 0.4639507532119751, "log_odds_ratio": -0.4932023882865906, "logits/chosen": -0.30065810680389404, "logits/rejected": -0.25181248784065247, "logps/chosen": -2.044691562652588, "logps/rejected": -2.460970401763916, "loss": 2.1933, "nll_loss": 2.1440277099609375, "rewards/accuracies": 1.0, "rewards/chosen": -0.20446917414665222, "rewards/margins": 0.04162788391113281, "rewards/rejected": -0.24609704315662384, "step": 76 }, { "epoch": 0.2126337590610977, "grad_norm": 0.5865225195884705, "learning_rate": 3.486238532110092e-06, "log_odds_chosen": 0.40864938497543335, "log_odds_ratio": -0.5256964564323425, "logits/chosen": -0.414636492729187, "logits/rejected": -0.1841173619031906, "logps/chosen": -2.0234270095825195, "logps/rejected": -2.3900880813598633, "loss": 2.2079, "nll_loss": 2.155355215072632, "rewards/accuracies": 0.875, "rewards/chosen": -0.2023427039384842, "rewards/margins": 0.03666612505912781, "rewards/rejected": -0.239008828997612, "step": 77 }, { "epoch": 0.21539523645150155, "grad_norm": 0.5245475769042969, "learning_rate": 3.5321100917431193e-06, "log_odds_chosen": 0.3867124617099762, "log_odds_ratio": -0.5356163382530212, "logits/chosen": -0.22849449515342712, "logits/rejected": 0.04786547273397446, "logps/chosen": -2.047503709793091, "logps/rejected": -2.391859531402588, "loss": 2.2124, "nll_loss": 2.1587889194488525, "rewards/accuracies": 0.875, "rewards/chosen": -0.20475035905838013, "rewards/margins": 0.03443560749292374, "rewards/rejected": -0.23918597400188446, "step": 78 }, { "epoch": 0.2181567138419054, "grad_norm": 0.4416508972644806, "learning_rate": 3.5779816513761473e-06, "log_odds_chosen": 0.457964152097702, "log_odds_ratio": -0.49229007959365845, "logits/chosen": -0.24249830842018127, "logits/rejected": -0.1156221255660057, "logps/chosen": -1.9371097087860107, "logps/rejected": -2.3405327796936035, "loss": 2.1264, "nll_loss": 2.077136516571045, "rewards/accuracies": 1.0, "rewards/chosen": -0.19371098279953003, "rewards/margins": 0.0403422936797142, "rewards/rejected": -0.23405326902866364, "step": 79 }, { "epoch": 0.2209181912323093, "grad_norm": 0.511163055896759, "learning_rate": 3.6238532110091746e-06, "log_odds_chosen": 0.3918084502220154, "log_odds_ratio": -0.527028501033783, "logits/chosen": -0.3217644989490509, "logits/rejected": -0.12379680573940277, "logps/chosen": -1.9086521863937378, "logps/rejected": -2.250434160232544, "loss": 2.0875, "nll_loss": 2.034804105758667, "rewards/accuracies": 1.0, "rewards/chosen": -0.19086521863937378, "rewards/margins": 0.03417817875742912, "rewards/rejected": -0.2250434011220932, "step": 80 }, { "epoch": 0.22367966862271316, "grad_norm": 0.48532167077064514, "learning_rate": 3.6697247706422022e-06, "log_odds_chosen": 0.38070449233055115, "log_odds_ratio": -0.5242205858230591, "logits/chosen": -0.24639271199703217, "logits/rejected": -0.02295789122581482, "logps/chosen": -1.9235130548477173, "logps/rejected": -2.256152868270874, "loss": 2.0966, "nll_loss": 2.044215440750122, "rewards/accuracies": 1.0, "rewards/chosen": -0.192351296544075, "rewards/margins": 0.03326397389173508, "rewards/rejected": -0.22561527788639069, "step": 81 }, { "epoch": 0.22644114601311702, "grad_norm": 0.4871878921985626, "learning_rate": 3.7155963302752295e-06, "log_odds_chosen": 0.3706399202346802, "log_odds_ratio": -0.5302938222885132, "logits/chosen": -0.32461822032928467, "logits/rejected": -0.07178197801113129, "logps/chosen": -2.002980947494507, "logps/rejected": -2.3308701515197754, "loss": 2.1902, "nll_loss": 2.1371781826019287, "rewards/accuracies": 1.0, "rewards/chosen": -0.20029808580875397, "rewards/margins": 0.03278890624642372, "rewards/rejected": -0.2330869883298874, "step": 82 }, { "epoch": 0.22920262340352088, "grad_norm": 0.5049198865890503, "learning_rate": 3.7614678899082575e-06, "log_odds_chosen": 0.6436396241188049, "log_odds_ratio": -0.4284180700778961, "logits/chosen": -0.3081129789352417, "logits/rejected": -0.1997162252664566, "logps/chosen": -1.9448068141937256, "logps/rejected": -2.5192015171051025, "loss": 2.1169, "nll_loss": 2.0740115642547607, "rewards/accuracies": 1.0, "rewards/chosen": -0.19448068737983704, "rewards/margins": 0.057439468801021576, "rewards/rejected": -0.2519201636314392, "step": 83 }, { "epoch": 0.23196410079392474, "grad_norm": 0.5246726870536804, "learning_rate": 3.8073394495412848e-06, "log_odds_chosen": 0.43793147802352905, "log_odds_ratio": -0.5015705823898315, "logits/chosen": -0.29296931624412537, "logits/rejected": -0.13174469769001007, "logps/chosen": -1.941349744796753, "logps/rejected": -2.3253884315490723, "loss": 2.1333, "nll_loss": 2.08317232131958, "rewards/accuracies": 1.0, "rewards/chosen": -0.19413498044013977, "rewards/margins": 0.03840385749936104, "rewards/rejected": -0.2325388491153717, "step": 84 }, { "epoch": 0.23472557818432863, "grad_norm": 0.41432732343673706, "learning_rate": 3.853211009174313e-06, "log_odds_chosen": 0.5745496153831482, "log_odds_ratio": -0.4579155147075653, "logits/chosen": -0.3449418544769287, "logits/rejected": -0.19380588829517365, "logps/chosen": -1.8625164031982422, "logps/rejected": -2.3706510066986084, "loss": 2.0375, "nll_loss": 1.9917008876800537, "rewards/accuracies": 1.0, "rewards/chosen": -0.1862516552209854, "rewards/margins": 0.050813477486371994, "rewards/rejected": -0.2370651215314865, "step": 85 }, { "epoch": 0.2374870555747325, "grad_norm": 0.438418984413147, "learning_rate": 3.89908256880734e-06, "log_odds_chosen": 0.580481231212616, "log_odds_ratio": -0.4620014429092407, "logits/chosen": -0.14796528220176697, "logits/rejected": -0.32091909646987915, "logps/chosen": -1.9756391048431396, "logps/rejected": -2.4986729621887207, "loss": 2.1484, "nll_loss": 2.102167844772339, "rewards/accuracies": 1.0, "rewards/chosen": -0.19756391644477844, "rewards/margins": 0.052303388714790344, "rewards/rejected": -0.2498673051595688, "step": 86 }, { "epoch": 0.24024853296513635, "grad_norm": 0.4355674684047699, "learning_rate": 3.944954128440367e-06, "log_odds_chosen": 0.5157158970832825, "log_odds_ratio": -0.4764195382595062, "logits/chosen": -0.16625456511974335, "logits/rejected": -0.22959333658218384, "logps/chosen": -1.8700374364852905, "logps/rejected": -2.3208043575286865, "loss": 2.0401, "nll_loss": 1.9924601316452026, "rewards/accuracies": 0.875, "rewards/chosen": -0.18700376152992249, "rewards/margins": 0.04507668316364288, "rewards/rejected": -0.23208042979240417, "step": 87 }, { "epoch": 0.2430100103555402, "grad_norm": 0.4138505756855011, "learning_rate": 3.9908256880733945e-06, "log_odds_chosen": 0.5811448693275452, "log_odds_ratio": -0.48126012086868286, "logits/chosen": -0.3125801980495453, "logits/rejected": -0.16787581145763397, "logps/chosen": -1.79574453830719, "logps/rejected": -2.3057985305786133, "loss": 1.9834, "nll_loss": 1.9353020191192627, "rewards/accuracies": 0.75, "rewards/chosen": -0.17957444489002228, "rewards/margins": 0.051005423069000244, "rewards/rejected": -0.23057986795902252, "step": 88 }, { "epoch": 0.24577148774594407, "grad_norm": 0.3883982002735138, "learning_rate": 4.036697247706423e-06, "log_odds_chosen": 0.6218655705451965, "log_odds_ratio": -0.43868428468704224, "logits/chosen": -0.16164137423038483, "logits/rejected": -0.2850674092769623, "logps/chosen": -1.8971762657165527, "logps/rejected": -2.4469919204711914, "loss": 2.0667, "nll_loss": 2.0228123664855957, "rewards/accuracies": 1.0, "rewards/chosen": -0.189717635512352, "rewards/margins": 0.05498155951499939, "rewards/rejected": -0.24469918012619019, "step": 89 }, { "epoch": 0.24853296513634796, "grad_norm": 0.441839337348938, "learning_rate": 4.08256880733945e-06, "log_odds_chosen": 0.5872446894645691, "log_odds_ratio": -0.44401073455810547, "logits/chosen": -0.2670595347881317, "logits/rejected": -0.07647659629583359, "logps/chosen": -1.8952282667160034, "logps/rejected": -2.412400245666504, "loss": 2.066, "nll_loss": 2.021595001220703, "rewards/accuracies": 1.0, "rewards/chosen": -0.18952281773090363, "rewards/margins": 0.051717206835746765, "rewards/rejected": -0.24124003946781158, "step": 90 }, { "epoch": 0.2512944425267518, "grad_norm": 0.4077044427394867, "learning_rate": 4.128440366972478e-06, "log_odds_chosen": 0.4384881556034088, "log_odds_ratio": -0.5076763033866882, "logits/chosen": -0.15786704421043396, "logits/rejected": -0.090408094227314, "logps/chosen": -1.878911018371582, "logps/rejected": -2.262026786804199, "loss": 2.047, "nll_loss": 1.9962477684020996, "rewards/accuracies": 0.875, "rewards/chosen": -0.18789111077785492, "rewards/margins": 0.03831159323453903, "rewards/rejected": -0.22620268166065216, "step": 91 }, { "epoch": 0.2540559199171557, "grad_norm": 0.3839576244354248, "learning_rate": 4.174311926605505e-06, "log_odds_chosen": 0.42869681119918823, "log_odds_ratio": -0.5070521235466003, "logits/chosen": -0.08416657149791718, "logits/rejected": -0.4555465579032898, "logps/chosen": -1.9147799015045166, "logps/rejected": -2.292423725128174, "loss": 2.0814, "nll_loss": 2.0306570529937744, "rewards/accuracies": 1.0, "rewards/chosen": -0.19147798418998718, "rewards/margins": 0.03776439279317856, "rewards/rejected": -0.22924238443374634, "step": 92 }, { "epoch": 0.25681739730755954, "grad_norm": 0.42368394136428833, "learning_rate": 4.220183486238532e-06, "log_odds_chosen": 0.5149534344673157, "log_odds_ratio": -0.47629106044769287, "logits/chosen": -0.13718552887439728, "logits/rejected": -0.20033138990402222, "logps/chosen": -1.9013878107070923, "logps/rejected": -2.3537254333496094, "loss": 2.0789, "nll_loss": 2.0312626361846924, "rewards/accuracies": 1.0, "rewards/chosen": -0.1901387870311737, "rewards/margins": 0.04523376002907753, "rewards/rejected": -0.23537255823612213, "step": 93 }, { "epoch": 0.2595788746979634, "grad_norm": 0.38014668226242065, "learning_rate": 4.26605504587156e-06, "log_odds_chosen": 0.4552799165248871, "log_odds_ratio": -0.49941790103912354, "logits/chosen": -0.2649000883102417, "logits/rejected": -0.20182102918624878, "logps/chosen": -1.8007447719573975, "logps/rejected": -2.1948485374450684, "loss": 1.9637, "nll_loss": 1.9138000011444092, "rewards/accuracies": 1.0, "rewards/chosen": -0.18007448315620422, "rewards/margins": 0.03941037505865097, "rewards/rejected": -0.2194848358631134, "step": 94 }, { "epoch": 0.26234035208836726, "grad_norm": 0.3550948202610016, "learning_rate": 4.311926605504588e-06, "log_odds_chosen": 0.36030256748199463, "log_odds_ratio": -0.533312976360321, "logits/chosen": -0.0772688165307045, "logits/rejected": -0.16052654385566711, "logps/chosen": -1.8254213333129883, "logps/rejected": -2.136554002761841, "loss": 2.0005, "nll_loss": 1.9472124576568604, "rewards/accuracies": 1.0, "rewards/chosen": -0.1825421154499054, "rewards/margins": 0.03111325576901436, "rewards/rejected": -0.21365538239479065, "step": 95 }, { "epoch": 0.26510182947877114, "grad_norm": 0.40300413966178894, "learning_rate": 4.357798165137615e-06, "log_odds_chosen": 0.2818768620491028, "log_odds_ratio": -0.5676061511039734, "logits/chosen": -0.10504347085952759, "logits/rejected": -0.05843639373779297, "logps/chosen": -1.9888579845428467, "logps/rejected": -2.2374351024627686, "loss": 2.1718, "nll_loss": 2.11505126953125, "rewards/accuracies": 1.0, "rewards/chosen": -0.19888579845428467, "rewards/margins": 0.02485768496990204, "rewards/rejected": -0.2237434834241867, "step": 96 }, { "epoch": 0.26786330686917503, "grad_norm": 0.398783802986145, "learning_rate": 4.403669724770643e-06, "log_odds_chosen": 0.5546217560768127, "log_odds_ratio": -0.46683651208877563, "logits/chosen": -0.008073419332504272, "logits/rejected": -0.08634719252586365, "logps/chosen": -1.8501688241958618, "logps/rejected": -2.342288017272949, "loss": 2.0318, "nll_loss": 1.9851207733154297, "rewards/accuracies": 1.0, "rewards/chosen": -0.18501687049865723, "rewards/margins": 0.04921191930770874, "rewards/rejected": -0.23422878980636597, "step": 97 }, { "epoch": 0.27062478425957887, "grad_norm": 0.39449602365493774, "learning_rate": 4.44954128440367e-06, "log_odds_chosen": 0.4133303165435791, "log_odds_ratio": -0.5122652649879456, "logits/chosen": -0.05717798322439194, "logits/rejected": -0.1295921504497528, "logps/chosen": -1.9054274559020996, "logps/rejected": -2.266583204269409, "loss": 2.0813, "nll_loss": 2.030045509338379, "rewards/accuracies": 1.0, "rewards/chosen": -0.19054275751113892, "rewards/margins": 0.03611557558178902, "rewards/rejected": -0.22665831446647644, "step": 98 }, { "epoch": 0.27338626164998275, "grad_norm": 0.39879634976387024, "learning_rate": 4.4954128440366975e-06, "log_odds_chosen": 0.547784149646759, "log_odds_ratio": -0.46036428213119507, "logits/chosen": -0.09109216928482056, "logits/rejected": -0.3699495494365692, "logps/chosen": -1.9557167291641235, "logps/rejected": -2.4426751136779785, "loss": 2.1161, "nll_loss": 2.070013999938965, "rewards/accuracies": 1.0, "rewards/chosen": -0.1955716609954834, "rewards/margins": 0.04869585484266281, "rewards/rejected": -0.2442675083875656, "step": 99 }, { "epoch": 0.2761477390403866, "grad_norm": 0.3873852491378784, "learning_rate": 4.541284403669725e-06, "log_odds_chosen": 0.3448026478290558, "log_odds_ratio": -0.5393580198287964, "logits/chosen": -0.1656271517276764, "logits/rejected": -0.07856383174657822, "logps/chosen": -1.8394955396652222, "logps/rejected": -2.1358819007873535, "loss": 2.0365, "nll_loss": 1.9825859069824219, "rewards/accuracies": 1.0, "rewards/chosen": -0.1839495450258255, "rewards/margins": 0.029638633131980896, "rewards/rejected": -0.2135881781578064, "step": 100 }, { "epoch": 0.2789092164307905, "grad_norm": 0.3506630063056946, "learning_rate": 4.587155963302753e-06, "log_odds_chosen": 0.33486518263816833, "log_odds_ratio": -0.5475890636444092, "logits/chosen": 0.009370687417685986, "logits/rejected": -0.18966984748840332, "logps/chosen": -1.9532427787780762, "logps/rejected": -2.2507667541503906, "loss": 2.132, "nll_loss": 2.077195405960083, "rewards/accuracies": 0.875, "rewards/chosen": -0.19532427191734314, "rewards/margins": 0.02975238859653473, "rewards/rejected": -0.22507666051387787, "step": 101 }, { "epoch": 0.28167069382119436, "grad_norm": 0.33624136447906494, "learning_rate": 4.63302752293578e-06, "log_odds_chosen": 0.5504237413406372, "log_odds_ratio": -0.4613422155380249, "logits/chosen": -0.06238182261586189, "logits/rejected": -0.22859227657318115, "logps/chosen": -1.791447401046753, "logps/rejected": -2.2658450603485107, "loss": 1.9877, "nll_loss": 1.9415662288665771, "rewards/accuracies": 1.0, "rewards/chosen": -0.1791447550058365, "rewards/margins": 0.04743976891040802, "rewards/rejected": -0.2265845388174057, "step": 102 }, { "epoch": 0.2844321712115982, "grad_norm": 0.3664388656616211, "learning_rate": 4.678899082568808e-06, "log_odds_chosen": 0.3437744081020355, "log_odds_ratio": -0.5439410209655762, "logits/chosen": -0.1386101245880127, "logits/rejected": -0.17609171569347382, "logps/chosen": -1.8379578590393066, "logps/rejected": -2.1358203887939453, "loss": 2.022, "nll_loss": 1.9676285982131958, "rewards/accuracies": 0.875, "rewards/chosen": -0.1837957799434662, "rewards/margins": 0.0297862458974123, "rewards/rejected": -0.21358203887939453, "step": 103 }, { "epoch": 0.2871936486020021, "grad_norm": 0.3293968439102173, "learning_rate": 4.724770642201835e-06, "log_odds_chosen": 0.3870212137699127, "log_odds_ratio": -0.5231497883796692, "logits/chosen": -0.07520203292369843, "logits/rejected": -0.15402644872665405, "logps/chosen": -1.756553053855896, "logps/rejected": -2.086921453475952, "loss": 1.9488, "nll_loss": 1.8965202569961548, "rewards/accuracies": 1.0, "rewards/chosen": -0.1756553202867508, "rewards/margins": 0.033036813139915466, "rewards/rejected": -0.20869213342666626, "step": 104 }, { "epoch": 0.2899551259924059, "grad_norm": 0.33918339014053345, "learning_rate": 4.770642201834863e-06, "log_odds_chosen": 0.4852727949619293, "log_odds_ratio": -0.4833363890647888, "logits/chosen": -0.004435461014509201, "logits/rejected": -0.1534399837255478, "logps/chosen": -1.7944328784942627, "logps/rejected": -2.2146499156951904, "loss": 1.9627, "nll_loss": 1.9144006967544556, "rewards/accuracies": 1.0, "rewards/chosen": -0.17944329977035522, "rewards/margins": 0.042021699249744415, "rewards/rejected": -0.22146499156951904, "step": 105 }, { "epoch": 0.2927166033828098, "grad_norm": 0.3346327841281891, "learning_rate": 4.816513761467891e-06, "log_odds_chosen": 0.5177164673805237, "log_odds_ratio": -0.47983041405677795, "logits/chosen": -0.00399226788431406, "logits/rejected": -0.10182341933250427, "logps/chosen": -1.7794350385665894, "logps/rejected": -2.226297616958618, "loss": 1.9511, "nll_loss": 1.9030946493148804, "rewards/accuracies": 0.875, "rewards/chosen": -0.17794349789619446, "rewards/margins": 0.044686250388622284, "rewards/rejected": -0.22262977063655853, "step": 106 }, { "epoch": 0.2954780807732137, "grad_norm": 0.3454085886478424, "learning_rate": 4.862385321100918e-06, "log_odds_chosen": 0.3980882465839386, "log_odds_ratio": -0.5184234380722046, "logits/chosen": 0.01898978091776371, "logits/rejected": -0.21249954402446747, "logps/chosen": -1.8531229496002197, "logps/rejected": -2.197577953338623, "loss": 2.0242, "nll_loss": 1.9724009037017822, "rewards/accuracies": 0.875, "rewards/chosen": -0.18531231582164764, "rewards/margins": 0.03444547951221466, "rewards/rejected": -0.2197577804327011, "step": 107 }, { "epoch": 0.2982395581636175, "grad_norm": 0.3301767408847809, "learning_rate": 4.908256880733945e-06, "log_odds_chosen": 0.44768860936164856, "log_odds_ratio": -0.5000944137573242, "logits/chosen": 0.06930336356163025, "logits/rejected": -0.24058011174201965, "logps/chosen": -1.8849480152130127, "logps/rejected": -2.278297185897827, "loss": 2.0446, "nll_loss": 1.9945893287658691, "rewards/accuracies": 0.875, "rewards/chosen": -0.18849481642246246, "rewards/margins": 0.039334915578365326, "rewards/rejected": -0.2278297394514084, "step": 108 }, { "epoch": 0.3010010355540214, "grad_norm": 0.3244481086730957, "learning_rate": 4.954128440366973e-06, "log_odds_chosen": 0.6234428286552429, "log_odds_ratio": -0.44249945878982544, "logits/chosen": 0.045590970665216446, "logits/rejected": -0.3307313919067383, "logps/chosen": -1.8896514177322388, "logps/rejected": -2.4449257850646973, "loss": 2.0431, "nll_loss": 1.9988850355148315, "rewards/accuracies": 1.0, "rewards/chosen": -0.18896515667438507, "rewards/margins": 0.0555274523794651, "rewards/rejected": -0.24449260532855988, "step": 109 }, { "epoch": 0.30376251294442524, "grad_norm": 0.31317761540412903, "learning_rate": 5e-06, "log_odds_chosen": 0.2916356325149536, "log_odds_ratio": -0.5743635892868042, "logits/chosen": 0.144297257065773, "logits/rejected": -0.26908552646636963, "logps/chosen": -1.8418867588043213, "logps/rejected": -2.0925679206848145, "loss": 2.0441, "nll_loss": 1.9866299629211426, "rewards/accuracies": 0.75, "rewards/chosen": -0.18418867886066437, "rewards/margins": 0.02506813406944275, "rewards/rejected": -0.20925679802894592, "step": 110 }, { "epoch": 0.30652399033482913, "grad_norm": 0.29466620087623596, "learning_rate": 4.999987154315977e-06, "log_odds_chosen": 0.42353084683418274, "log_odds_ratio": -0.5111951231956482, "logits/chosen": 0.05306434631347656, "logits/rejected": -0.15503177046775818, "logps/chosen": -1.7407968044281006, "logps/rejected": -2.0989816188812256, "loss": 1.924, "nll_loss": 1.87285315990448, "rewards/accuracies": 1.0, "rewards/chosen": -0.17407968640327454, "rewards/margins": 0.03581848740577698, "rewards/rejected": -0.2098981738090515, "step": 111 }, { "epoch": 0.309285467725233, "grad_norm": 0.3188883364200592, "learning_rate": 4.999948617395916e-06, "log_odds_chosen": 0.4043401777744293, "log_odds_ratio": -0.5218645930290222, "logits/chosen": 0.16884373128414154, "logits/rejected": -0.07719510793685913, "logps/chosen": -1.8007270097732544, "logps/rejected": -2.151782274246216, "loss": 1.9777, "nll_loss": 1.9254791736602783, "rewards/accuracies": 1.0, "rewards/chosen": -0.18007270991802216, "rewards/margins": 0.035105518996715546, "rewards/rejected": -0.2151782363653183, "step": 112 }, { "epoch": 0.31204694511563685, "grad_norm": 0.3535216450691223, "learning_rate": 4.999884389635843e-06, "log_odds_chosen": 0.41415858268737793, "log_odds_ratio": -0.5124688744544983, "logits/chosen": 0.0558352991938591, "logits/rejected": -0.09803693741559982, "logps/chosen": -1.8450539112091064, "logps/rejected": -2.2039334774017334, "loss": 2.0336, "nll_loss": 1.9823946952819824, "rewards/accuracies": 1.0, "rewards/chosen": -0.1845053881406784, "rewards/margins": 0.035887960344552994, "rewards/rejected": -0.2203933596611023, "step": 113 }, { "epoch": 0.31480842250604074, "grad_norm": 0.2991940379142761, "learning_rate": 4.9997944716957985e-06, "log_odds_chosen": 0.514029324054718, "log_odds_ratio": -0.47918182611465454, "logits/chosen": 0.16882193088531494, "logits/rejected": -0.24593651294708252, "logps/chosen": -1.8493305444717407, "logps/rejected": -2.298590898513794, "loss": 2.0272, "nll_loss": 1.9792673587799072, "rewards/accuracies": 0.875, "rewards/chosen": -0.18493306636810303, "rewards/margins": 0.044926032423973083, "rewards/rejected": -0.22985908389091492, "step": 114 }, { "epoch": 0.3175698998964446, "grad_norm": 0.28921976685523987, "learning_rate": 4.999678864499828e-06, "log_odds_chosen": 0.38468483090400696, "log_odds_ratio": -0.5222321152687073, "logits/chosen": 0.2794942557811737, "logits/rejected": -0.21004724502563477, "logps/chosen": -1.8734655380249023, "logps/rejected": -2.2063117027282715, "loss": 2.0344, "nll_loss": 1.9822089672088623, "rewards/accuracies": 1.0, "rewards/chosen": -0.18734657764434814, "rewards/margins": 0.03328459709882736, "rewards/rejected": -0.2206311821937561, "step": 115 }, { "epoch": 0.32033137728684846, "grad_norm": 0.30199047923088074, "learning_rate": 4.999537569235975e-06, "log_odds_chosen": 0.35607457160949707, "log_odds_ratio": -0.5390717387199402, "logits/chosen": 0.17685924470424652, "logits/rejected": -0.16482022404670715, "logps/chosen": -1.8530224561691284, "logps/rejected": -2.1583948135375977, "loss": 2.0285, "nll_loss": 1.9746413230895996, "rewards/accuracies": 0.875, "rewards/chosen": -0.1853022426366806, "rewards/margins": 0.030537229031324387, "rewards/rejected": -0.21583949029445648, "step": 116 }, { "epoch": 0.32309285467725235, "grad_norm": 0.305818647146225, "learning_rate": 4.999370587356267e-06, "log_odds_chosen": 0.3153924345970154, "log_odds_ratio": -0.5565903782844543, "logits/chosen": 0.17289672791957855, "logits/rejected": -0.3006405234336853, "logps/chosen": -1.9371519088745117, "logps/rejected": -2.214411497116089, "loss": 2.1096, "nll_loss": 2.0539159774780273, "rewards/accuracies": 0.875, "rewards/chosen": -0.1937151849269867, "rewards/margins": 0.027725949883461, "rewards/rejected": -0.2214411348104477, "step": 117 }, { "epoch": 0.3258543320676562, "grad_norm": 0.324216365814209, "learning_rate": 4.9991779205767e-06, "log_odds_chosen": 0.29256871342658997, "log_odds_ratio": -0.5621715784072876, "logits/chosen": 0.055469777435064316, "logits/rejected": -0.5593094229698181, "logps/chosen": -1.808518886566162, "logps/rejected": -2.060049295425415, "loss": 1.9916, "nll_loss": 1.9353588819503784, "rewards/accuracies": 0.875, "rewards/chosen": -0.18085187673568726, "rewards/margins": 0.02515305206179619, "rewards/rejected": -0.20600494742393494, "step": 118 }, { "epoch": 0.32861580945806007, "grad_norm": 0.316292941570282, "learning_rate": 4.998959570877224e-06, "log_odds_chosen": 0.391570121049881, "log_odds_ratio": -0.519210159778595, "logits/chosen": 0.29701903462409973, "logits/rejected": -0.248467817902565, "logps/chosen": -1.950477123260498, "logps/rejected": -2.2915000915527344, "loss": 2.1349, "nll_loss": 2.0829989910125732, "rewards/accuracies": 1.0, "rewards/chosen": -0.19504770636558533, "rewards/margins": 0.034102290868759155, "rewards/rejected": -0.22914999723434448, "step": 119 }, { "epoch": 0.3313772868484639, "grad_norm": 0.29707539081573486, "learning_rate": 4.99871554050172e-06, "log_odds_chosen": 0.49879857897758484, "log_odds_ratio": -0.4770265221595764, "logits/chosen": 0.1394173949956894, "logits/rejected": -0.11445163935422897, "logps/chosen": -1.7406611442565918, "logps/rejected": -2.167613983154297, "loss": 1.9065, "nll_loss": 1.8588043451309204, "rewards/accuracies": 1.0, "rewards/chosen": -0.17406611144542694, "rewards/margins": 0.042695268988609314, "rewards/rejected": -0.21676141023635864, "step": 120 }, { "epoch": 0.3341387642388678, "grad_norm": 0.3011818528175354, "learning_rate": 4.9984458319579775e-06, "log_odds_chosen": 0.4734145998954773, "log_odds_ratio": -0.48969942331314087, "logits/chosen": 0.10353265702724457, "logits/rejected": -0.22807854413986206, "logps/chosen": -1.7799421548843384, "logps/rejected": -2.1871492862701416, "loss": 1.9363, "nll_loss": 1.8872833251953125, "rewards/accuracies": 1.0, "rewards/chosen": -0.17799422144889832, "rewards/margins": 0.04072071984410286, "rewards/rejected": -0.21871493756771088, "step": 121 }, { "epoch": 0.3369002416292717, "grad_norm": 0.28915688395500183, "learning_rate": 4.99815044801767e-06, "log_odds_chosen": 0.3440546989440918, "log_odds_ratio": -0.5443906784057617, "logits/chosen": 0.22890803217887878, "logits/rejected": -0.20038765668869019, "logps/chosen": -1.8269233703613281, "logps/rejected": -2.1230876445770264, "loss": 2.0144, "nll_loss": 1.959984540939331, "rewards/accuracies": 0.875, "rewards/chosen": -0.18269234895706177, "rewards/margins": 0.02961641550064087, "rewards/rejected": -0.21230876445770264, "step": 122 }, { "epoch": 0.3396617190196755, "grad_norm": 0.31248003244400024, "learning_rate": 4.9978293917163225e-06, "log_odds_chosen": 0.21560871601104736, "log_odds_ratio": -0.5958309769630432, "logits/chosen": 0.2199111431837082, "logits/rejected": -0.15359394252300262, "logps/chosen": -1.9724359512329102, "logps/rejected": -2.162384033203125, "loss": 2.1306, "nll_loss": 2.0710067749023438, "rewards/accuracies": 0.75, "rewards/chosen": -0.1972435861825943, "rewards/margins": 0.01899481564760208, "rewards/rejected": -0.21623840928077698, "step": 123 }, { "epoch": 0.3424231964100794, "grad_norm": 0.3086913824081421, "learning_rate": 4.997482666353287e-06, "log_odds_chosen": 0.36193954944610596, "log_odds_ratio": -0.5371627807617188, "logits/chosen": 0.07905742526054382, "logits/rejected": -0.047993652522563934, "logps/chosen": -1.860518217086792, "logps/rejected": -2.175654649734497, "loss": 2.0362, "nll_loss": 1.982520341873169, "rewards/accuracies": 0.875, "rewards/chosen": -0.18605183064937592, "rewards/margins": 0.03151364251971245, "rewards/rejected": -0.21756546199321747, "step": 124 }, { "epoch": 0.34518467380048323, "grad_norm": 0.2956501543521881, "learning_rate": 4.997110275491702e-06, "log_odds_chosen": 0.5367758274078369, "log_odds_ratio": -0.4794245958328247, "logits/chosen": 0.17313790321350098, "logits/rejected": -0.5377508401870728, "logps/chosen": -1.7679404020309448, "logps/rejected": -2.2323243618011475, "loss": 1.9515, "nll_loss": 1.9035319089889526, "rewards/accuracies": 1.0, "rewards/chosen": -0.17679405212402344, "rewards/margins": 0.04643838852643967, "rewards/rejected": -0.2232324331998825, "step": 125 }, { "epoch": 0.3479461511908871, "grad_norm": 0.2930620014667511, "learning_rate": 4.9967122229584614e-06, "log_odds_chosen": 0.5086734890937805, "log_odds_ratio": -0.47367364168167114, "logits/chosen": 0.20121866464614868, "logits/rejected": -0.3580709397792816, "logps/chosen": -1.772174596786499, "logps/rejected": -2.211575746536255, "loss": 1.9502, "nll_loss": 1.9028148651123047, "rewards/accuracies": 1.0, "rewards/chosen": -0.17721746861934662, "rewards/margins": 0.04394011199474335, "rewards/rejected": -0.22115758061408997, "step": 126 }, { "epoch": 0.350707628581291, "grad_norm": 0.313006728887558, "learning_rate": 4.996288512844169e-06, "log_odds_chosen": 0.26984095573425293, "log_odds_ratio": -0.5770066380500793, "logits/chosen": 0.30613642930984497, "logits/rejected": -0.0801864042878151, "logps/chosen": -1.9204916954040527, "logps/rejected": -2.153423547744751, "loss": 2.1089, "nll_loss": 2.051222324371338, "rewards/accuracies": 0.75, "rewards/chosen": -0.19204919040203094, "rewards/margins": 0.023293154314160347, "rewards/rejected": -0.21534234285354614, "step": 127 }, { "epoch": 0.35346910597169484, "grad_norm": 0.26883664727211, "learning_rate": 4.995839149503103e-06, "log_odds_chosen": 0.4242061972618103, "log_odds_ratio": -0.5166581273078918, "logits/chosen": 0.287309467792511, "logits/rejected": -0.28880101442337036, "logps/chosen": -1.7962003946304321, "logps/rejected": -2.1639606952667236, "loss": 1.9745, "nll_loss": 1.9228155612945557, "rewards/accuracies": 1.0, "rewards/chosen": -0.17962004244327545, "rewards/margins": 0.036776017397642136, "rewards/rejected": -0.2163960486650467, "step": 128 }, { "epoch": 0.35623058336209873, "grad_norm": 0.3000660240650177, "learning_rate": 4.995364137553166e-06, "log_odds_chosen": 0.2731889486312866, "log_odds_ratio": -0.5769234299659729, "logits/chosen": 0.17702779173851013, "logits/rejected": -0.16427305340766907, "logps/chosen": -1.8508391380310059, "logps/rejected": -2.0855119228363037, "loss": 2.0272, "nll_loss": 1.9694753885269165, "rewards/accuracies": 0.875, "rewards/chosen": -0.18508392572402954, "rewards/margins": 0.023467278108000755, "rewards/rejected": -0.20855121314525604, "step": 129 }, { "epoch": 0.35899206075250256, "grad_norm": 0.27979782223701477, "learning_rate": 4.994863481875842e-06, "log_odds_chosen": 0.539387047290802, "log_odds_ratio": -0.4670449495315552, "logits/chosen": 0.19368596374988556, "logits/rejected": -0.4078698754310608, "logps/chosen": -1.7874817848205566, "logps/rejected": -2.2576088905334473, "loss": 1.9686, "nll_loss": 1.9218828678131104, "rewards/accuracies": 1.0, "rewards/chosen": -0.17874819040298462, "rewards/margins": 0.04701270908117294, "rewards/rejected": -0.22576089203357697, "step": 130 }, { "epoch": 0.36175353814290645, "grad_norm": 0.2961781322956085, "learning_rate": 4.99433718761614e-06, "log_odds_chosen": 0.482613205909729, "log_odds_ratio": -0.48607659339904785, "logits/chosen": 0.3117329776287079, "logits/rejected": -0.19324824213981628, "logps/chosen": -1.8348233699798584, "logps/rejected": -2.2560348510742188, "loss": 1.9992, "nll_loss": 1.9505534172058105, "rewards/accuracies": 1.0, "rewards/chosen": -0.1834823340177536, "rewards/margins": 0.042121127247810364, "rewards/rejected": -0.22560347616672516, "step": 131 }, { "epoch": 0.36451501553331034, "grad_norm": 0.27247706055641174, "learning_rate": 4.993785260182552e-06, "log_odds_chosen": 0.5040154457092285, "log_odds_ratio": -0.47964078187942505, "logits/chosen": 0.26086652278900146, "logits/rejected": -0.39971601963043213, "logps/chosen": -1.7693819999694824, "logps/rejected": -2.206263780593872, "loss": 1.9461, "nll_loss": 1.898113489151001, "rewards/accuracies": 1.0, "rewards/chosen": -0.17693820595741272, "rewards/margins": 0.043688178062438965, "rewards/rejected": -0.2206263691186905, "step": 132 }, { "epoch": 0.36727649292371417, "grad_norm": 0.2559458613395691, "learning_rate": 4.993207705246983e-06, "log_odds_chosen": 0.37582507729530334, "log_odds_ratio": -0.529410719871521, "logits/chosen": 0.24356283247470856, "logits/rejected": -0.23510105907917023, "logps/chosen": -1.8250073194503784, "logps/rejected": -2.1527535915374756, "loss": 1.9998, "nll_loss": 1.9468413591384888, "rewards/accuracies": 1.0, "rewards/chosen": -0.18250073492527008, "rewards/margins": 0.03277461230754852, "rewards/rejected": -0.2152753621339798, "step": 133 }, { "epoch": 0.37003797031411806, "grad_norm": 0.2661672532558441, "learning_rate": 4.992604528744705e-06, "log_odds_chosen": 0.4147174656391144, "log_odds_ratio": -0.5111517906188965, "logits/chosen": 0.23473379015922546, "logits/rejected": -0.5313011407852173, "logps/chosen": -1.8088786602020264, "logps/rejected": -2.1657345294952393, "loss": 1.985, "nll_loss": 1.9338966608047485, "rewards/accuracies": 1.0, "rewards/chosen": -0.1808878630399704, "rewards/margins": 0.035685598850250244, "rewards/rejected": -0.21657346189022064, "step": 134 }, { "epoch": 0.37279944770452195, "grad_norm": 0.26244473457336426, "learning_rate": 4.9919757368742895e-06, "log_odds_chosen": 0.4870528280735016, "log_odds_ratio": -0.4892481565475464, "logits/chosen": 0.2686365842819214, "logits/rejected": -0.3376864194869995, "logps/chosen": -1.7298060655593872, "logps/rejected": -2.145728349685669, "loss": 1.9046, "nll_loss": 1.8556550741195679, "rewards/accuracies": 1.0, "rewards/chosen": -0.17298059165477753, "rewards/margins": 0.04159224033355713, "rewards/rejected": -0.21457283198833466, "step": 135 }, { "epoch": 0.3755609250949258, "grad_norm": 0.26679527759552, "learning_rate": 4.991321336097546e-06, "log_odds_chosen": 0.4111718535423279, "log_odds_ratio": -0.512808084487915, "logits/chosen": 0.22426243126392365, "logits/rejected": -0.2763628363609314, "logps/chosen": -1.8473613262176514, "logps/rejected": -2.2056474685668945, "loss": 1.9977, "nll_loss": 1.9464530944824219, "rewards/accuracies": 1.0, "rewards/chosen": -0.18473613262176514, "rewards/margins": 0.03582862764596939, "rewards/rejected": -0.22056476771831512, "step": 136 }, { "epoch": 0.37832240248532967, "grad_norm": 0.25576797127723694, "learning_rate": 4.990641333139455e-06, "log_odds_chosen": 0.435663640499115, "log_odds_ratio": -0.5039057731628418, "logits/chosen": 0.29907894134521484, "logits/rejected": -0.34149447083473206, "logps/chosen": -1.7228469848632812, "logps/rejected": -2.093015670776367, "loss": 1.8956, "nll_loss": 1.8452366590499878, "rewards/accuracies": 1.0, "rewards/chosen": -0.17228470742702484, "rewards/margins": 0.037016861140728, "rewards/rejected": -0.20930157601833344, "step": 137 }, { "epoch": 0.3810838798757335, "grad_norm": 0.2576519250869751, "learning_rate": 4.989935734988098e-06, "log_odds_chosen": 0.32478412985801697, "log_odds_ratio": -0.5477995872497559, "logits/chosen": 0.3516974151134491, "logits/rejected": -0.4254434406757355, "logps/chosen": -1.8142131567001343, "logps/rejected": -2.0922889709472656, "loss": 1.9776, "nll_loss": 1.922855257987976, "rewards/accuracies": 1.0, "rewards/chosen": -0.18142130970954895, "rewards/margins": 0.02780758962035179, "rewards/rejected": -0.20922890305519104, "step": 138 }, { "epoch": 0.3838453572661374, "grad_norm": 0.2815987765789032, "learning_rate": 4.989204548894589e-06, "log_odds_chosen": 0.32539641857147217, "log_odds_ratio": -0.5531569719314575, "logits/chosen": 0.4096101224422455, "logits/rejected": -0.19687263667583466, "logps/chosen": -1.8913159370422363, "logps/rejected": -2.1725118160247803, "loss": 2.0515, "nll_loss": 1.99615478515625, "rewards/accuracies": 0.875, "rewards/chosen": -0.18913161754608154, "rewards/margins": 0.028119584545493126, "rewards/rejected": -0.21725118160247803, "step": 139 }, { "epoch": 0.3866068346565413, "grad_norm": 0.25471100211143494, "learning_rate": 4.988447782372996e-06, "log_odds_chosen": 0.3162972331047058, "log_odds_ratio": -0.553735077381134, "logits/chosen": 0.2287934273481369, "logits/rejected": -0.36916857957839966, "logps/chosen": -1.717573642730713, "logps/rejected": -1.986279010772705, "loss": 1.8883, "nll_loss": 1.8329135179519653, "rewards/accuracies": 0.875, "rewards/chosen": -0.17175735533237457, "rewards/margins": 0.026870528236031532, "rewards/rejected": -0.19862788915634155, "step": 140 }, { "epoch": 0.3893683120469451, "grad_norm": 0.23676469922065735, "learning_rate": 4.9876654432002655e-06, "log_odds_chosen": 0.41759181022644043, "log_odds_ratio": -0.5150130391120911, "logits/chosen": 0.3617481589317322, "logits/rejected": -0.5005238056182861, "logps/chosen": -1.7590631246566772, "logps/rejected": -2.120802640914917, "loss": 1.9118, "nll_loss": 1.8603452444076538, "rewards/accuracies": 0.875, "rewards/chosen": -0.17590634524822235, "rewards/margins": 0.03617396205663681, "rewards/rejected": -0.21208029985427856, "step": 141 }, { "epoch": 0.392129789437349, "grad_norm": 0.23819445073604584, "learning_rate": 4.986857539416144e-06, "log_odds_chosen": 0.514154851436615, "log_odds_ratio": -0.47449907660484314, "logits/chosen": 0.2892986834049225, "logits/rejected": -0.5338828563690186, "logps/chosen": -1.732762336730957, "logps/rejected": -2.1715476512908936, "loss": 1.8923, "nll_loss": 1.8448883295059204, "rewards/accuracies": 0.875, "rewards/chosen": -0.17327623069286346, "rewards/margins": 0.04387851804494858, "rewards/rejected": -0.21715475618839264, "step": 142 }, { "epoch": 0.39489126682775283, "grad_norm": 0.2597619891166687, "learning_rate": 4.986024079323092e-06, "log_odds_chosen": 0.21202370524406433, "log_odds_ratio": -0.5989823341369629, "logits/chosen": 0.31219571828842163, "logits/rejected": -0.2251828908920288, "logps/chosen": -1.7001993656158447, "logps/rejected": -1.8785269260406494, "loss": 1.9001, "nll_loss": 1.840247392654419, "rewards/accuracies": 0.75, "rewards/chosen": -0.17001992464065552, "rewards/margins": 0.017832759767770767, "rewards/rejected": -0.18785269558429718, "step": 143 }, { "epoch": 0.3976527442181567, "grad_norm": 0.24414244294166565, "learning_rate": 4.985165071486201e-06, "log_odds_chosen": 0.42103180289268494, "log_odds_ratio": -0.506747305393219, "logits/chosen": 0.24320174753665924, "logits/rejected": -0.47630739212036133, "logps/chosen": -1.813495397567749, "logps/rejected": -2.177159309387207, "loss": 1.9628, "nll_loss": 1.9121257066726685, "rewards/accuracies": 1.0, "rewards/chosen": -0.18134953081607819, "rewards/margins": 0.03636639565229416, "rewards/rejected": -0.21771591901779175, "step": 144 }, { "epoch": 0.4004142216085606, "grad_norm": 0.25120964646339417, "learning_rate": 4.984280524733107e-06, "log_odds_chosen": 0.4766540825366974, "log_odds_ratio": -0.4867539405822754, "logits/chosen": 0.23771557211875916, "logits/rejected": -0.3588354289531708, "logps/chosen": -1.7543095350265503, "logps/rejected": -2.1605966091156006, "loss": 1.92, "nll_loss": 1.8713701963424683, "rewards/accuracies": 1.0, "rewards/chosen": -0.17543095350265503, "rewards/margins": 0.040628716349601746, "rewards/rejected": -0.21605966985225677, "step": 145 }, { "epoch": 0.40317569899896444, "grad_norm": 0.23762984573841095, "learning_rate": 4.983370448153896e-06, "log_odds_chosen": 0.3954075574874878, "log_odds_ratio": -0.5178768038749695, "logits/chosen": 0.2783900201320648, "logits/rejected": -0.3363473117351532, "logps/chosen": -1.7586162090301514, "logps/rejected": -2.097499370574951, "loss": 1.9169, "nll_loss": 1.8651440143585205, "rewards/accuracies": 1.0, "rewards/chosen": -0.1758616417646408, "rewards/margins": 0.033888280391693115, "rewards/rejected": -0.20974992215633392, "step": 146 }, { "epoch": 0.4059371763893683, "grad_norm": 0.250926673412323, "learning_rate": 4.9824348511010115e-06, "log_odds_chosen": 0.37487757205963135, "log_odds_ratio": -0.5328046083450317, "logits/chosen": 0.3899734616279602, "logits/rejected": -0.3820725679397583, "logps/chosen": -1.8538525104522705, "logps/rejected": -2.1806483268737793, "loss": 2.0058, "nll_loss": 1.9525387287139893, "rewards/accuracies": 1.0, "rewards/chosen": -0.18538527190685272, "rewards/margins": 0.03267957270145416, "rewards/rejected": -0.2180648297071457, "step": 147 }, { "epoch": 0.40869865377977216, "grad_norm": 0.24456721544265747, "learning_rate": 4.981473743189163e-06, "log_odds_chosen": 0.6152381300926208, "log_odds_ratio": -0.4415377974510193, "logits/chosen": 0.15124794840812683, "logits/rejected": -0.6728506684303284, "logps/chosen": -1.630881905555725, "logps/rejected": -2.1548542976379395, "loss": 1.8038, "nll_loss": 1.7596325874328613, "rewards/accuracies": 1.0, "rewards/chosen": -0.16308818757534027, "rewards/margins": 0.05239725112915039, "rewards/rejected": -0.21548543870449066, "step": 148 }, { "epoch": 0.41146013117017605, "grad_norm": 0.2503897547721863, "learning_rate": 4.98048713429522e-06, "log_odds_chosen": 0.4051974415779114, "log_odds_ratio": -0.5133127570152283, "logits/chosen": 0.3494684100151062, "logits/rejected": -0.11723777651786804, "logps/chosen": -1.7437934875488281, "logps/rejected": -2.086435079574585, "loss": 1.9057, "nll_loss": 1.8543728590011597, "rewards/accuracies": 1.0, "rewards/chosen": -0.1743793487548828, "rewards/margins": 0.034264158457517624, "rewards/rejected": -0.20864351093769073, "step": 149 }, { "epoch": 0.41422160856057993, "grad_norm": 0.26401370763778687, "learning_rate": 4.979475034558115e-06, "log_odds_chosen": 0.35506629943847656, "log_odds_ratio": -0.534768283367157, "logits/chosen": 0.4498825669288635, "logits/rejected": -0.32746273279190063, "logps/chosen": -1.8957396745681763, "logps/rejected": -2.205925464630127, "loss": 2.042, "nll_loss": 1.988513469696045, "rewards/accuracies": 1.0, "rewards/chosen": -0.1895739734172821, "rewards/margins": 0.0310185756534338, "rewards/rejected": -0.22059254348278046, "step": 150 }, { "epoch": 0.41698308595098377, "grad_norm": 0.23593436181545258, "learning_rate": 4.978437454378741e-06, "log_odds_chosen": 0.36985495686531067, "log_odds_ratio": -0.5328302979469299, "logits/chosen": 0.4250433146953583, "logits/rejected": -0.38277071714401245, "logps/chosen": -1.7761732339859009, "logps/rejected": -2.0909667015075684, "loss": 1.942, "nll_loss": 1.8887526988983154, "rewards/accuracies": 0.875, "rewards/chosen": -0.17761734127998352, "rewards/margins": 0.03147934749722481, "rewards/rejected": -0.20909668505191803, "step": 151 }, { "epoch": 0.41974456334138766, "grad_norm": 0.26158878207206726, "learning_rate": 4.977374404419838e-06, "log_odds_chosen": 0.39601171016693115, "log_odds_ratio": -0.5228374004364014, "logits/chosen": 0.42059236764907837, "logits/rejected": -0.3891626000404358, "logps/chosen": -1.7959572076797485, "logps/rejected": -2.140069007873535, "loss": 1.9499, "nll_loss": 1.8975740671157837, "rewards/accuracies": 1.0, "rewards/chosen": -0.1795957237482071, "rewards/margins": 0.03441117703914642, "rewards/rejected": -0.21400688588619232, "step": 152 }, { "epoch": 0.4225060407317915, "grad_norm": 0.2561565339565277, "learning_rate": 4.976285895605888e-06, "log_odds_chosen": 0.4899553656578064, "log_odds_ratio": -0.4833296239376068, "logits/chosen": 0.37404921650886536, "logits/rejected": -0.5243082046508789, "logps/chosen": -1.7399574518203735, "logps/rejected": -2.1531152725219727, "loss": 1.8971, "nll_loss": 1.8487193584442139, "rewards/accuracies": 1.0, "rewards/chosen": -0.1739957481622696, "rewards/margins": 0.04131579399108887, "rewards/rejected": -0.21531155705451965, "step": 153 }, { "epoch": 0.4252675181221954, "grad_norm": 0.2550884485244751, "learning_rate": 4.9751719391230055e-06, "log_odds_chosen": 0.25457680225372314, "log_odds_ratio": -0.5827968120574951, "logits/chosen": 0.426510751247406, "logits/rejected": -0.38418132066726685, "logps/chosen": -1.710974931716919, "logps/rejected": -1.9265732765197754, "loss": 1.8985, "nll_loss": 1.840250015258789, "rewards/accuracies": 0.875, "rewards/chosen": -0.17109748721122742, "rewards/margins": 0.02155984938144684, "rewards/rejected": -0.19265732169151306, "step": 154 }, { "epoch": 0.42802899551259926, "grad_norm": 0.25417274236679077, "learning_rate": 4.974032546418816e-06, "log_odds_chosen": 0.47524771094322205, "log_odds_ratio": -0.49181026220321655, "logits/chosen": 0.43296438455581665, "logits/rejected": -0.41207337379455566, "logps/chosen": -1.756546139717102, "logps/rejected": -2.1634457111358643, "loss": 1.9365, "nll_loss": 1.8873004913330078, "rewards/accuracies": 1.0, "rewards/chosen": -0.17565463483333588, "rewards/margins": 0.04068994149565697, "rewards/rejected": -0.21634456515312195, "step": 155 }, { "epoch": 0.4307904729030031, "grad_norm": 0.24467967450618744, "learning_rate": 4.9728677292023405e-06, "log_odds_chosen": 0.19028525054454803, "log_odds_ratio": -0.6092196702957153, "logits/chosen": 0.5236613750457764, "logits/rejected": -0.3672065734863281, "logps/chosen": -1.851803183555603, "logps/rejected": -2.0145559310913086, "loss": 2.0125, "nll_loss": 1.9516232013702393, "rewards/accuracies": 0.875, "rewards/chosen": -0.18518033623695374, "rewards/margins": 0.016275260597467422, "rewards/rejected": -0.20145559310913086, "step": 156 }, { "epoch": 0.433551950293407, "grad_norm": 0.24561214447021484, "learning_rate": 4.971677499443882e-06, "log_odds_chosen": 0.34714600443840027, "log_odds_ratio": -0.538092315196991, "logits/chosen": 0.42429813742637634, "logits/rejected": -0.4320365786552429, "logps/chosen": -1.7550561428070068, "logps/rejected": -2.0503358840942383, "loss": 1.9273, "nll_loss": 1.873533010482788, "rewards/accuracies": 1.0, "rewards/chosen": -0.1755056083202362, "rewards/margins": 0.029527965933084488, "rewards/rejected": -0.2050335705280304, "step": 157 }, { "epoch": 0.4363134276838108, "grad_norm": 0.23480936884880066, "learning_rate": 4.97046186937489e-06, "log_odds_chosen": 0.3259715735912323, "log_odds_ratio": -0.5574356913566589, "logits/chosen": 0.33684611320495605, "logits/rejected": -0.48997414112091064, "logps/chosen": -1.733799934387207, "logps/rejected": -2.011017084121704, "loss": 1.8941, "nll_loss": 1.8383519649505615, "rewards/accuracies": 0.875, "rewards/chosen": -0.17338000237941742, "rewards/margins": 0.027721701189875603, "rewards/rejected": -0.20110172033309937, "step": 158 }, { "epoch": 0.4390749050742147, "grad_norm": 0.23975060880184174, "learning_rate": 4.9692208514878445e-06, "log_odds_chosen": 0.2344578355550766, "log_odds_ratio": -0.5858049392700195, "logits/chosen": 0.4837522804737091, "logits/rejected": -0.2647075951099396, "logps/chosen": -1.8640694618225098, "logps/rejected": -2.064138412475586, "loss": 2.0361, "nll_loss": 1.9775654077529907, "rewards/accuracies": 0.875, "rewards/chosen": -0.1864069551229477, "rewards/margins": 0.02000689134001732, "rewards/rejected": -0.2064138650894165, "step": 159 }, { "epoch": 0.4418363824646186, "grad_norm": 0.25472894310951233, "learning_rate": 4.967954458536126e-06, "log_odds_chosen": 0.34545353055000305, "log_odds_ratio": -0.5416699051856995, "logits/chosen": 0.4849855303764343, "logits/rejected": -0.14890551567077637, "logps/chosen": -1.6954622268676758, "logps/rejected": -1.9883880615234375, "loss": 1.8756, "nll_loss": 1.821388602256775, "rewards/accuracies": 1.0, "rewards/chosen": -0.1695462316274643, "rewards/margins": 0.029292574152350426, "rewards/rejected": -0.19883880019187927, "step": 160 }, { "epoch": 0.4445978598550224, "grad_norm": 0.2551233172416687, "learning_rate": 4.96666270353388e-06, "log_odds_chosen": 0.34612518548965454, "log_odds_ratio": -0.5391160845756531, "logits/chosen": 0.4446476995944977, "logits/rejected": -0.4060593545436859, "logps/chosen": -1.7577486038208008, "logps/rejected": -2.0534539222717285, "loss": 1.9068, "nll_loss": 1.8529114723205566, "rewards/accuracies": 1.0, "rewards/chosen": -0.17577485740184784, "rewards/margins": 0.029570531100034714, "rewards/rejected": -0.20534539222717285, "step": 161 }, { "epoch": 0.4473593372454263, "grad_norm": 0.2522425651550293, "learning_rate": 4.965345599755888e-06, "log_odds_chosen": 0.3961338400840759, "log_odds_ratio": -0.520170271396637, "logits/chosen": 0.38032418489456177, "logits/rejected": -0.48995065689086914, "logps/chosen": -1.815320372581482, "logps/rejected": -2.156792640686035, "loss": 1.9657, "nll_loss": 1.9137252569198608, "rewards/accuracies": 1.0, "rewards/chosen": -0.18153204023838043, "rewards/margins": 0.0341472253203392, "rewards/rejected": -0.21567925810813904, "step": 162 }, { "epoch": 0.45012081463583015, "grad_norm": 0.23575294017791748, "learning_rate": 4.964003160737429e-06, "log_odds_chosen": 0.42496663331985474, "log_odds_ratio": -0.5144410133361816, "logits/chosen": 0.4265897274017334, "logits/rejected": -0.6324371695518494, "logps/chosen": -1.7239199876785278, "logps/rejected": -2.0822784900665283, "loss": 1.8878, "nll_loss": 1.8363168239593506, "rewards/accuracies": 0.875, "rewards/chosen": -0.17239199578762054, "rewards/margins": 0.03583585098385811, "rewards/rejected": -0.20822784304618835, "step": 163 }, { "epoch": 0.45288229202623403, "grad_norm": 0.22830626368522644, "learning_rate": 4.9626354002741424e-06, "log_odds_chosen": 0.39489883184432983, "log_odds_ratio": -0.519772469997406, "logits/chosen": 0.43731188774108887, "logits/rejected": -0.5182772874832153, "logps/chosen": -1.6721280813217163, "logps/rejected": -2.0018677711486816, "loss": 1.8343, "nll_loss": 1.782306432723999, "rewards/accuracies": 1.0, "rewards/chosen": -0.1672128140926361, "rewards/margins": 0.032973967492580414, "rewards/rejected": -0.20018678903579712, "step": 164 }, { "epoch": 0.4556437694166379, "grad_norm": 0.25069254636764526, "learning_rate": 4.9612423324218816e-06, "log_odds_chosen": 0.4574624300003052, "log_odds_ratio": -0.4939710795879364, "logits/chosen": 0.4768182933330536, "logits/rejected": -0.6430546045303345, "logps/chosen": -1.8401563167572021, "logps/rejected": -2.23877215385437, "loss": 1.9978, "nll_loss": 1.9484000205993652, "rewards/accuracies": 1.0, "rewards/chosen": -0.18401561677455902, "rewards/margins": 0.03986157849431038, "rewards/rejected": -0.2238771915435791, "step": 165 }, { "epoch": 0.45840524680704176, "grad_norm": 0.25569233298301697, "learning_rate": 4.959823971496575e-06, "log_odds_chosen": 0.31097978353500366, "log_odds_ratio": -0.5631955862045288, "logits/chosen": 0.39253953099250793, "logits/rejected": -0.3464045226573944, "logps/chosen": -1.7822060585021973, "logps/rejected": -2.0535507202148438, "loss": 1.9515, "nll_loss": 1.8951623439788818, "rewards/accuracies": 0.75, "rewards/chosen": -0.17822058498859406, "rewards/margins": 0.027134478092193604, "rewards/rejected": -0.20535509288311005, "step": 166 }, { "epoch": 0.46116672419744564, "grad_norm": 0.2754010260105133, "learning_rate": 4.958380332074074e-06, "log_odds_chosen": 0.38330915570259094, "log_odds_ratio": -0.5226565003395081, "logits/chosen": 0.5406702756881714, "logits/rejected": -0.2918284833431244, "logps/chosen": -1.744588851928711, "logps/rejected": -2.0728600025177, "loss": 1.9256, "nll_loss": 1.873305320739746, "rewards/accuracies": 1.0, "rewards/chosen": -0.17445889115333557, "rewards/margins": 0.03282713145017624, "rewards/rejected": -0.2072860151529312, "step": 167 }, { "epoch": 0.4639282015878495, "grad_norm": 0.24440248310565948, "learning_rate": 4.95691142899001e-06, "log_odds_chosen": 0.4813528060913086, "log_odds_ratio": -0.49559223651885986, "logits/chosen": 0.42939212918281555, "logits/rejected": -0.5100030303001404, "logps/chosen": -1.7333720922470093, "logps/rejected": -2.148804187774658, "loss": 1.8988, "nll_loss": 1.8492889404296875, "rewards/accuracies": 1.0, "rewards/chosen": -0.17333722114562988, "rewards/margins": 0.04154320806264877, "rewards/rejected": -0.21488040685653687, "step": 168 }, { "epoch": 0.46668967897825336, "grad_norm": 0.24897295236587524, "learning_rate": 4.955417277339633e-06, "log_odds_chosen": 0.3721994459629059, "log_odds_ratio": -0.5288943648338318, "logits/chosen": 0.4496070444583893, "logits/rejected": -0.5069053173065186, "logps/chosen": -1.7618987560272217, "logps/rejected": -2.0793538093566895, "loss": 1.9112, "nll_loss": 1.858320713043213, "rewards/accuracies": 1.0, "rewards/chosen": -0.17618988454341888, "rewards/margins": 0.03174550086259842, "rewards/rejected": -0.2079353779554367, "step": 169 }, { "epoch": 0.46945115636865725, "grad_norm": 0.24506047368049622, "learning_rate": 4.953897892477664e-06, "log_odds_chosen": 0.4158302843570709, "log_odds_ratio": -0.524972677230835, "logits/chosen": 0.39215707778930664, "logits/rejected": -0.6964855790138245, "logps/chosen": -1.6938952207565308, "logps/rejected": -2.0564043521881104, "loss": 1.8573, "nll_loss": 1.8047559261322021, "rewards/accuracies": 0.75, "rewards/chosen": -0.1693895161151886, "rewards/margins": 0.03625092655420303, "rewards/rejected": -0.20564045011997223, "step": 170 }, { "epoch": 0.4722126337590611, "grad_norm": 0.25454598665237427, "learning_rate": 4.952353290018132e-06, "log_odds_chosen": 0.39419156312942505, "log_odds_ratio": -0.5228413939476013, "logits/chosen": 0.4652084410190582, "logits/rejected": -0.09862995892763138, "logps/chosen": -1.7650055885314941, "logps/rejected": -2.102964162826538, "loss": 1.9365, "nll_loss": 1.8842390775680542, "rewards/accuracies": 1.0, "rewards/chosen": -0.17650054395198822, "rewards/margins": 0.033795878291130066, "rewards/rejected": -0.2102964073419571, "step": 171 }, { "epoch": 0.474974111149465, "grad_norm": 0.23166699707508087, "learning_rate": 4.950783485834218e-06, "log_odds_chosen": 0.45087021589279175, "log_odds_ratio": -0.4937146306037903, "logits/chosen": 0.43644362688064575, "logits/rejected": -0.45175108313560486, "logps/chosen": -1.6984238624572754, "logps/rejected": -2.0791146755218506, "loss": 1.8668, "nll_loss": 1.817420482635498, "rewards/accuracies": 1.0, "rewards/chosen": -0.16984236240386963, "rewards/margins": 0.03806909918785095, "rewards/rejected": -0.20791146159172058, "step": 172 }, { "epoch": 0.4777355885398688, "grad_norm": 0.22430026531219482, "learning_rate": 4.949188496058089e-06, "log_odds_chosen": 0.3452419340610504, "log_odds_ratio": -0.5469062328338623, "logits/chosen": 0.42078280448913574, "logits/rejected": -0.4557611346244812, "logps/chosen": -1.6979789733886719, "logps/rejected": -1.9911997318267822, "loss": 1.8648, "nll_loss": 1.8101435899734497, "rewards/accuracies": 0.875, "rewards/chosen": -0.1697978973388672, "rewards/margins": 0.029322080314159393, "rewards/rejected": -0.19911997020244598, "step": 173 }, { "epoch": 0.4804970659302727, "grad_norm": 0.25257107615470886, "learning_rate": 4.947568337080733e-06, "log_odds_chosen": 0.1399567574262619, "log_odds_ratio": -0.6351712942123413, "logits/chosen": 0.3786250352859497, "logits/rejected": -0.433152973651886, "logps/chosen": -1.8261394500732422, "logps/rejected": -1.9481170177459717, "loss": 1.9906, "nll_loss": 1.9270497560501099, "rewards/accuracies": 0.625, "rewards/chosen": -0.18261395394802094, "rewards/margins": 0.012197760865092278, "rewards/rejected": -0.19481170177459717, "step": 174 }, { "epoch": 0.4832585433206766, "grad_norm": 0.25380203127861023, "learning_rate": 4.945923025551789e-06, "log_odds_chosen": 0.37491294741630554, "log_odds_ratio": -0.5253455638885498, "logits/chosen": 0.44007402658462524, "logits/rejected": -0.5158473253250122, "logps/chosen": -1.7649321556091309, "logps/rejected": -2.0859415531158447, "loss": 1.9061, "nll_loss": 1.8535852432250977, "rewards/accuracies": 1.0, "rewards/chosen": -0.17649321258068085, "rewards/margins": 0.032100923359394073, "rewards/rejected": -0.20859414339065552, "step": 175 }, { "epoch": 0.4860200207110804, "grad_norm": 0.23105277121067047, "learning_rate": 4.944252578379379e-06, "log_odds_chosen": 0.2923380434513092, "log_odds_ratio": -0.5638449788093567, "logits/chosen": 0.3620964288711548, "logits/rejected": -0.7222499847412109, "logps/chosen": -1.6879165172576904, "logps/rejected": -1.9339208602905273, "loss": 1.8481, "nll_loss": 1.7917577028274536, "rewards/accuracies": 1.0, "rewards/chosen": -0.16879163682460785, "rewards/margins": 0.024600449949502945, "rewards/rejected": -0.1933920979499817, "step": 176 }, { "epoch": 0.4887814981014843, "grad_norm": 0.2548852264881134, "learning_rate": 4.942557012729933e-06, "log_odds_chosen": 0.37703031301498413, "log_odds_ratio": -0.5374451875686646, "logits/chosen": 0.451770544052124, "logits/rejected": -0.6359795331954956, "logps/chosen": -1.7573479413986206, "logps/rejected": -2.0781285762786865, "loss": 1.9276, "nll_loss": 1.873888373374939, "rewards/accuracies": 0.875, "rewards/chosen": -0.17573478817939758, "rewards/margins": 0.03207805007696152, "rewards/rejected": -0.2078128606081009, "step": 177 }, { "epoch": 0.49154297549188813, "grad_norm": 0.24361710250377655, "learning_rate": 4.940836346028011e-06, "log_odds_chosen": 0.564251184463501, "log_odds_ratio": -0.4567793309688568, "logits/chosen": 0.36331912875175476, "logits/rejected": -0.6997017860412598, "logps/chosen": -1.7021052837371826, "logps/rejected": -2.1804330348968506, "loss": 1.8632, "nll_loss": 1.8174842596054077, "rewards/accuracies": 1.0, "rewards/chosen": -0.17021054029464722, "rewards/margins": 0.047832753509283066, "rewards/rejected": -0.2180432826280594, "step": 178 }, { "epoch": 0.494304452882292, "grad_norm": 0.25140658020973206, "learning_rate": 4.9390905959561254e-06, "log_odds_chosen": 0.42371851205825806, "log_odds_ratio": -0.5053101778030396, "logits/chosen": 0.4960322678089142, "logits/rejected": -0.7143914699554443, "logps/chosen": -1.8060599565505981, "logps/rejected": -2.168437957763672, "loss": 1.9361, "nll_loss": 1.8855350017547607, "rewards/accuracies": 1.0, "rewards/chosen": -0.18060600757598877, "rewards/margins": 0.03623779118061066, "rewards/rejected": -0.21684379875659943, "step": 179 }, { "epoch": 0.4970659302726959, "grad_norm": 0.25267475843429565, "learning_rate": 4.937319780454559e-06, "log_odds_chosen": 0.24488888680934906, "log_odds_ratio": -0.5824177861213684, "logits/chosen": 0.43725982308387756, "logits/rejected": -0.5809499621391296, "logps/chosen": -1.7686806917190552, "logps/rejected": -1.9778132438659668, "loss": 1.9319, "nll_loss": 1.873632550239563, "rewards/accuracies": 0.75, "rewards/chosen": -0.17686808109283447, "rewards/margins": 0.020913248881697655, "rewards/rejected": -0.19778132438659668, "step": 180 }, { "epoch": 0.49982740766309974, "grad_norm": 0.2517687976360321, "learning_rate": 4.935523917721182e-06, "log_odds_chosen": 0.3661497235298157, "log_odds_ratio": -0.5325534343719482, "logits/chosen": 0.4761643409729004, "logits/rejected": -0.4684900641441345, "logps/chosen": -1.7682512998580933, "logps/rejected": -2.079028844833374, "loss": 1.9513, "nll_loss": 1.8980205059051514, "rewards/accuracies": 0.875, "rewards/chosen": -0.1768251359462738, "rewards/margins": 0.031077751889824867, "rewards/rejected": -0.20790287852287292, "step": 181 }, { "epoch": 0.5025888850535036, "grad_norm": 0.23882247507572174, "learning_rate": 4.933703026211262e-06, "log_odds_chosen": 0.40052229166030884, "log_odds_ratio": -0.5199273824691772, "logits/chosen": 0.5116904377937317, "logits/rejected": -0.8737332820892334, "logps/chosen": -1.7753740549087524, "logps/rejected": -2.1211981773376465, "loss": 1.9275, "nll_loss": 1.8754714727401733, "rewards/accuracies": 1.0, "rewards/chosen": -0.17753739655017853, "rewards/margins": 0.03458239883184433, "rewards/rejected": -0.21211980283260345, "step": 182 }, { "epoch": 0.5053503624439075, "grad_norm": 0.23901337385177612, "learning_rate": 4.931857124637276e-06, "log_odds_chosen": 0.3831828832626343, "log_odds_ratio": -0.5294877886772156, "logits/chosen": 0.44559329748153687, "logits/rejected": -0.512153685092926, "logps/chosen": -1.660269021987915, "logps/rejected": -1.9844969511032104, "loss": 1.8228, "nll_loss": 1.7698148488998413, "rewards/accuracies": 0.875, "rewards/chosen": -0.16602689027786255, "rewards/margins": 0.032422810792922974, "rewards/rejected": -0.19844970107078552, "step": 183 }, { "epoch": 0.5081118398343114, "grad_norm": 0.26273974776268005, "learning_rate": 4.92998623196872e-06, "log_odds_chosen": 0.40838003158569336, "log_odds_ratio": -0.5120916366577148, "logits/chosen": 0.36793801188468933, "logits/rejected": -0.883270263671875, "logps/chosen": -1.7183860540390015, "logps/rejected": -2.0646042823791504, "loss": 1.8679, "nll_loss": 1.8166720867156982, "rewards/accuracies": 1.0, "rewards/chosen": -0.17183861136436462, "rewards/margins": 0.034621842205524445, "rewards/rejected": -0.20646044611930847, "step": 184 }, { "epoch": 0.5108733172247152, "grad_norm": 0.24954968690872192, "learning_rate": 4.92809036743191e-06, "log_odds_chosen": 0.44134509563446045, "log_odds_ratio": -0.5009733438491821, "logits/chosen": 0.38025662302970886, "logits/rejected": -0.6959909200668335, "logps/chosen": -1.6237648725509644, "logps/rejected": -1.991347312927246, "loss": 1.7726, "nll_loss": 1.7225314378738403, "rewards/accuracies": 1.0, "rewards/chosen": -0.1623764932155609, "rewards/margins": 0.03675824776291847, "rewards/rejected": -0.19913475215435028, "step": 185 }, { "epoch": 0.5136347946151191, "grad_norm": 0.24785040318965912, "learning_rate": 4.926169550509787e-06, "log_odds_chosen": 0.304913192987442, "log_odds_ratio": -0.5551115274429321, "logits/chosen": 0.49904897809028625, "logits/rejected": -0.5060792565345764, "logps/chosen": -1.752536416053772, "logps/rejected": -2.009927749633789, "loss": 1.8926, "nll_loss": 1.837074875831604, "rewards/accuracies": 0.875, "rewards/chosen": -0.17525365948677063, "rewards/margins": 0.0257391519844532, "rewards/rejected": -0.20099279284477234, "step": 186 }, { "epoch": 0.516396272005523, "grad_norm": 0.25260496139526367, "learning_rate": 4.924223800941718e-06, "log_odds_chosen": 0.18274395167827606, "log_odds_ratio": -0.6112073659896851, "logits/chosen": 0.4272249937057495, "logits/rejected": -0.5964298248291016, "logps/chosen": -1.7245608568191528, "logps/rejected": -1.87729012966156, "loss": 1.8858, "nll_loss": 1.824722170829773, "rewards/accuracies": 0.75, "rewards/chosen": -0.17245608568191528, "rewards/margins": 0.015272947028279305, "rewards/rejected": -0.18772903084754944, "step": 187 }, { "epoch": 0.5191577493959268, "grad_norm": 0.25974321365356445, "learning_rate": 4.9222531387232885e-06, "log_odds_chosen": 0.2772579491138458, "log_odds_ratio": -0.5700020790100098, "logits/chosen": 0.5035889148712158, "logits/rejected": -0.6021788120269775, "logps/chosen": -1.810874581336975, "logps/rejected": -2.048560380935669, "loss": 1.9601, "nll_loss": 1.903147578239441, "rewards/accuracies": 1.0, "rewards/chosen": -0.181087464094162, "rewards/margins": 0.023768583312630653, "rewards/rejected": -0.2048560529947281, "step": 188 }, { "epoch": 0.5219192267863307, "grad_norm": 0.2509472668170929, "learning_rate": 4.920257584106104e-06, "log_odds_chosen": 0.2922664284706116, "log_odds_ratio": -0.5610611438751221, "logits/chosen": 0.4223700761795044, "logits/rejected": -0.8161606192588806, "logps/chosen": -1.7685585021972656, "logps/rejected": -2.017383575439453, "loss": 1.9162, "nll_loss": 1.8600515127182007, "rewards/accuracies": 1.0, "rewards/chosen": -0.17685584723949432, "rewards/margins": 0.02488252893090248, "rewards/rejected": -0.2017383873462677, "step": 189 }, { "epoch": 0.5246807041767345, "grad_norm": 0.26333701610565186, "learning_rate": 4.918237157597574e-06, "log_odds_chosen": 0.367781400680542, "log_odds_ratio": -0.5288804769515991, "logits/chosen": 0.34821969270706177, "logits/rejected": -0.5903292894363403, "logps/chosen": -1.7087079286575317, "logps/rejected": -2.0187838077545166, "loss": 1.8792, "nll_loss": 1.8263163566589355, "rewards/accuracies": 1.0, "rewards/chosen": -0.1708707958459854, "rewards/margins": 0.03100760281085968, "rewards/rejected": -0.2018783837556839, "step": 190 }, { "epoch": 0.5274421815671384, "grad_norm": 0.2659012973308563, "learning_rate": 4.916191879960708e-06, "log_odds_chosen": 0.41825154423713684, "log_odds_ratio": -0.5096725225448608, "logits/chosen": 0.3918278217315674, "logits/rejected": -0.7869745492935181, "logps/chosen": -1.819606900215149, "logps/rejected": -2.178121328353882, "loss": 1.9644, "nll_loss": 1.9134495258331299, "rewards/accuracies": 1.0, "rewards/chosen": -0.18196068704128265, "rewards/margins": 0.03585144132375717, "rewards/rejected": -0.21781213581562042, "step": 191 }, { "epoch": 0.5302036589575423, "grad_norm": 0.2279636263847351, "learning_rate": 4.914121772213898e-06, "log_odds_chosen": 0.3906615972518921, "log_odds_ratio": -0.5185546875, "logits/chosen": 0.4513569176197052, "logits/rejected": -0.5819852948188782, "logps/chosen": -1.6937000751495361, "logps/rejected": -2.0224616527557373, "loss": 1.8302, "nll_loss": 1.778322458267212, "rewards/accuracies": 1.0, "rewards/chosen": -0.16937001049518585, "rewards/margins": 0.03287617862224579, "rewards/rejected": -0.20224617421627045, "step": 192 }, { "epoch": 0.5329651363479462, "grad_norm": 0.25850534439086914, "learning_rate": 4.912026855630703e-06, "log_odds_chosen": 0.4198724031448364, "log_odds_ratio": -0.5083851218223572, "logits/chosen": 0.44823789596557617, "logits/rejected": -0.45316439867019653, "logps/chosen": -1.7062734365463257, "logps/rejected": -2.061528205871582, "loss": 1.8534, "nll_loss": 1.8025155067443848, "rewards/accuracies": 1.0, "rewards/chosen": -0.17062735557556152, "rewards/margins": 0.035525478422641754, "rewards/rejected": -0.20615282654762268, "step": 193 }, { "epoch": 0.5357266137383501, "grad_norm": 0.2361551970243454, "learning_rate": 4.909907151739634e-06, "log_odds_chosen": 0.3865026831626892, "log_odds_ratio": -0.527399480342865, "logits/chosen": 0.3759016692638397, "logits/rejected": -0.6874773502349854, "logps/chosen": -1.6837282180786133, "logps/rejected": -2.00811505317688, "loss": 1.8406, "nll_loss": 1.7878108024597168, "rewards/accuracies": 0.875, "rewards/chosen": -0.16837282478809357, "rewards/margins": 0.03243869170546532, "rewards/rejected": -0.200811505317688, "step": 194 }, { "epoch": 0.5384880911287538, "grad_norm": 0.25059905648231506, "learning_rate": 4.907762682323926e-06, "log_odds_chosen": 0.5617655515670776, "log_odds_ratio": -0.4947778284549713, "logits/chosen": 0.4162059426307678, "logits/rejected": -0.6656520366668701, "logps/chosen": -1.6942963600158691, "logps/rejected": -2.1570441722869873, "loss": 1.8694, "nll_loss": 1.8199554681777954, "rewards/accuracies": 1.0, "rewards/chosen": -0.16942965984344482, "rewards/margins": 0.046274758875370026, "rewards/rejected": -0.21570439636707306, "step": 195 }, { "epoch": 0.5412495685191577, "grad_norm": 0.24844138324260712, "learning_rate": 4.905593469421323e-06, "log_odds_chosen": 0.2864172160625458, "log_odds_ratio": -0.5658568143844604, "logits/chosen": 0.40823209285736084, "logits/rejected": -1.0584533214569092, "logps/chosen": -1.6995760202407837, "logps/rejected": -1.9419208765029907, "loss": 1.8573, "nll_loss": 1.8007633686065674, "rewards/accuracies": 0.875, "rewards/chosen": -0.16995760798454285, "rewards/margins": 0.024234486743807793, "rewards/rejected": -0.1941920816898346, "step": 196 }, { "epoch": 0.5440110459095616, "grad_norm": 0.25183677673339844, "learning_rate": 4.90339953532384e-06, "log_odds_chosen": 0.32565394043922424, "log_odds_ratio": -0.5478559732437134, "logits/chosen": 0.4504649341106415, "logits/rejected": -0.6915105581283569, "logps/chosen": -1.723501443862915, "logps/rejected": -1.9977482557296753, "loss": 1.8793, "nll_loss": 1.8244800567626953, "rewards/accuracies": 0.875, "rewards/chosen": -0.17235015332698822, "rewards/margins": 0.027424685657024384, "rewards/rejected": -0.199774831533432, "step": 197 }, { "epoch": 0.5467725232999655, "grad_norm": 0.24120980501174927, "learning_rate": 4.901180902577549e-06, "log_odds_chosen": 0.36884185671806335, "log_odds_ratio": -0.5313685536384583, "logits/chosen": 0.47393080592155457, "logits/rejected": -0.8454681038856506, "logps/chosen": -1.7091484069824219, "logps/rejected": -2.021761655807495, "loss": 1.8562, "nll_loss": 1.80307936668396, "rewards/accuracies": 1.0, "rewards/chosen": -0.17091482877731323, "rewards/margins": 0.03126133233308792, "rewards/rejected": -0.20217616856098175, "step": 198 }, { "epoch": 0.5495340006903694, "grad_norm": 0.26695704460144043, "learning_rate": 4.8989375939823305e-06, "log_odds_chosen": 0.2716186046600342, "log_odds_ratio": -0.5816208124160767, "logits/chosen": 0.43514788150787354, "logits/rejected": -0.9399389028549194, "logps/chosen": -1.6752939224243164, "logps/rejected": -1.9062312841415405, "loss": 1.829, "nll_loss": 1.770880937576294, "rewards/accuracies": 0.75, "rewards/chosen": -0.1675294041633606, "rewards/margins": 0.023093728348612785, "rewards/rejected": -0.19062313437461853, "step": 199 }, { "epoch": 0.5522954780807732, "grad_norm": 0.248373344540596, "learning_rate": 4.896669632591652e-06, "log_odds_chosen": 0.43290525674819946, "log_odds_ratio": -0.5045643448829651, "logits/chosen": 0.41075634956359863, "logits/rejected": -0.6819908618927002, "logps/chosen": -1.6926066875457764, "logps/rejected": -2.0592057704925537, "loss": 1.8492, "nll_loss": 1.7987439632415771, "rewards/accuracies": 1.0, "rewards/chosen": -0.1692606657743454, "rewards/margins": 0.03665991127490997, "rewards/rejected": -0.20592057704925537, "step": 200 }, { "epoch": 0.5550569554711771, "grad_norm": 0.2426389902830124, "learning_rate": 4.894377041712327e-06, "log_odds_chosen": 0.3046472668647766, "log_odds_ratio": -0.5560302138328552, "logits/chosen": 0.4586237370967865, "logits/rejected": -0.889277458190918, "logps/chosen": -1.7483210563659668, "logps/rejected": -2.0069689750671387, "loss": 1.8995, "nll_loss": 1.8438801765441895, "rewards/accuracies": 0.875, "rewards/chosen": -0.17483210563659668, "rewards/margins": 0.025864800438284874, "rewards/rejected": -0.2006969153881073, "step": 201 }, { "epoch": 0.557818432861581, "grad_norm": 0.2384442389011383, "learning_rate": 4.892059844904273e-06, "log_odds_chosen": 0.35407793521881104, "log_odds_ratio": -0.5373241901397705, "logits/chosen": 0.3391227722167969, "logits/rejected": -0.8238649964332581, "logps/chosen": -1.6727503538131714, "logps/rejected": -1.971817135810852, "loss": 1.8314, "nll_loss": 1.7776691913604736, "rewards/accuracies": 0.875, "rewards/chosen": -0.16727504134178162, "rewards/margins": 0.02990666963160038, "rewards/rejected": -0.19718170166015625, "step": 202 }, { "epoch": 0.5605799102519848, "grad_norm": 0.2237497717142105, "learning_rate": 4.889718065980272e-06, "log_odds_chosen": 0.42560863494873047, "log_odds_ratio": -0.5102251768112183, "logits/chosen": 0.38399800658226013, "logits/rejected": -0.940986156463623, "logps/chosen": -1.7369155883789062, "logps/rejected": -2.0991053581237793, "loss": 1.8724, "nll_loss": 1.821379542350769, "rewards/accuracies": 1.0, "rewards/chosen": -0.1736915558576584, "rewards/margins": 0.03621895611286163, "rewards/rejected": -0.2099105268716812, "step": 203 }, { "epoch": 0.5633413876423887, "grad_norm": 0.23220422863960266, "learning_rate": 4.8873517290057265e-06, "log_odds_chosen": 0.5465662479400635, "log_odds_ratio": -0.45910075306892395, "logits/chosen": 0.33986854553222656, "logits/rejected": -0.9626595973968506, "logps/chosen": -1.7113467454910278, "logps/rejected": -2.1808066368103027, "loss": 1.8594, "nll_loss": 1.8135318756103516, "rewards/accuracies": 1.0, "rewards/chosen": -0.17113468050956726, "rewards/margins": 0.046945974230766296, "rewards/rejected": -0.21808065474033356, "step": 204 }, { "epoch": 0.5661028650327925, "grad_norm": 0.22727084159851074, "learning_rate": 4.88496085829841e-06, "log_odds_chosen": 0.3183567523956299, "log_odds_ratio": -0.5485174059867859, "logits/chosen": 0.38077837228775024, "logits/rejected": -0.3718608617782593, "logps/chosen": -1.682438611984253, "logps/rejected": -1.9494574069976807, "loss": 1.8312, "nll_loss": 1.7763221263885498, "rewards/accuracies": 1.0, "rewards/chosen": -0.168243870139122, "rewards/margins": 0.026701876893639565, "rewards/rejected": -0.19494575262069702, "step": 205 }, { "epoch": 0.5688643424231964, "grad_norm": 0.2435847967863083, "learning_rate": 4.882545478428219e-06, "log_odds_chosen": 0.4757111072540283, "log_odds_ratio": -0.5007758140563965, "logits/chosen": 0.4316224753856659, "logits/rejected": -0.9769002199172974, "logps/chosen": -1.6722346544265747, "logps/rejected": -2.073765277862549, "loss": 1.8362, "nll_loss": 1.7861530780792236, "rewards/accuracies": 0.875, "rewards/chosen": -0.1672234833240509, "rewards/margins": 0.04015304520726204, "rewards/rejected": -0.20737652480602264, "step": 206 }, { "epoch": 0.5716258198136003, "grad_norm": 0.28022557497024536, "learning_rate": 4.880105614216917e-06, "log_odds_chosen": 0.42549797892570496, "log_odds_ratio": -0.507975161075592, "logits/chosen": 0.42804068326950073, "logits/rejected": -0.8541020750999451, "logps/chosen": -1.8415427207946777, "logps/rejected": -2.209498405456543, "loss": 1.9733, "nll_loss": 1.9224536418914795, "rewards/accuracies": 1.0, "rewards/chosen": -0.18415425717830658, "rewards/margins": 0.03679555654525757, "rewards/rejected": -0.22094982862472534, "step": 207 }, { "epoch": 0.5743872972040042, "grad_norm": 0.26431816816329956, "learning_rate": 4.8776412907378845e-06, "log_odds_chosen": 0.3722533583641052, "log_odds_ratio": -0.5265185832977295, "logits/chosen": 0.46087294816970825, "logits/rejected": -0.8973901271820068, "logps/chosen": -1.7529377937316895, "logps/rejected": -2.0698721408843994, "loss": 1.8939, "nll_loss": 1.8412883281707764, "rewards/accuracies": 1.0, "rewards/chosen": -0.17529378831386566, "rewards/margins": 0.03169342502951622, "rewards/rejected": -0.206987202167511, "step": 208 }, { "epoch": 0.577148774594408, "grad_norm": 0.2286507785320282, "learning_rate": 4.875152533315859e-06, "log_odds_chosen": 0.42754417657852173, "log_odds_ratio": -0.5049228668212891, "logits/chosen": 0.3295610547065735, "logits/rejected": -0.821418285369873, "logps/chosen": -1.6234990358352661, "logps/rejected": -1.9790537357330322, "loss": 1.7729, "nll_loss": 1.7223597764968872, "rewards/accuracies": 1.0, "rewards/chosen": -0.1623499095439911, "rewards/margins": 0.035555459558963776, "rewards/rejected": -0.19790537655353546, "step": 209 }, { "epoch": 0.5799102519848118, "grad_norm": 0.23942524194717407, "learning_rate": 4.872639367526672e-06, "log_odds_chosen": 0.4114229083061218, "log_odds_ratio": -0.5152523517608643, "logits/chosen": 0.3920520842075348, "logits/rejected": -0.7173675298690796, "logps/chosen": -1.6646122932434082, "logps/rejected": -2.0126848220825195, "loss": 1.8132, "nll_loss": 1.7616626024246216, "rewards/accuracies": 1.0, "rewards/chosen": -0.16646124422550201, "rewards/margins": 0.0348072350025177, "rewards/rejected": -0.20126846432685852, "step": 210 }, { "epoch": 0.5826717293752157, "grad_norm": 0.259086012840271, "learning_rate": 4.870101819196992e-06, "log_odds_chosen": 0.37038204073905945, "log_odds_ratio": -0.5485732555389404, "logits/chosen": 0.33591514825820923, "logits/rejected": -0.7169915437698364, "logps/chosen": -1.7096450328826904, "logps/rejected": -2.028204917907715, "loss": 1.863, "nll_loss": 1.8081302642822266, "rewards/accuracies": 0.875, "rewards/chosen": -0.17096450924873352, "rewards/margins": 0.031855978071689606, "rewards/rejected": -0.20282049477100372, "step": 211 }, { "epoch": 0.5854332067656196, "grad_norm": 0.23236605525016785, "learning_rate": 4.8675399144040535e-06, "log_odds_chosen": 0.48778051137924194, "log_odds_ratio": -0.4863608777523041, "logits/chosen": 0.3406648337841034, "logits/rejected": -1.1020050048828125, "logps/chosen": -1.6164040565490723, "logps/rejected": -2.0246694087982178, "loss": 1.7551, "nll_loss": 1.7064510583877563, "rewards/accuracies": 1.0, "rewards/chosen": -0.16164040565490723, "rewards/margins": 0.04082653671503067, "rewards/rejected": -0.2024669200181961, "step": 212 }, { "epoch": 0.5881946841560235, "grad_norm": 0.2472049593925476, "learning_rate": 4.864953679475392e-06, "log_odds_chosen": 0.4090927541255951, "log_odds_ratio": -0.5185278654098511, "logits/chosen": 0.3806914687156677, "logits/rejected": -1.1645220518112183, "logps/chosen": -1.6705214977264404, "logps/rejected": -2.010647773742676, "loss": 1.7979, "nll_loss": 1.7460622787475586, "rewards/accuracies": 1.0, "rewards/chosen": -0.16705216467380524, "rewards/margins": 0.03401262313127518, "rewards/rejected": -0.201064795255661, "step": 213 }, { "epoch": 0.5909561615464274, "grad_norm": 0.26932069659233093, "learning_rate": 4.862343140988573e-06, "log_odds_chosen": 0.38164323568344116, "log_odds_ratio": -0.5293680429458618, "logits/chosen": 0.35298576951026917, "logits/rejected": -0.9924853444099426, "logps/chosen": -1.7351062297821045, "logps/rejected": -2.062347173690796, "loss": 1.8652, "nll_loss": 1.8122597932815552, "rewards/accuracies": 0.875, "rewards/chosen": -0.1735106110572815, "rewards/margins": 0.03272408992052078, "rewards/rejected": -0.20623470842838287, "step": 214 }, { "epoch": 0.5937176389368312, "grad_norm": 0.26750069856643677, "learning_rate": 4.859708325770919e-06, "log_odds_chosen": 0.3453901410102844, "log_odds_ratio": -0.5410705804824829, "logits/chosen": 0.402724027633667, "logits/rejected": -0.7651013135910034, "logps/chosen": -1.736122727394104, "logps/rejected": -2.027735948562622, "loss": 1.901, "nll_loss": 1.8469077348709106, "rewards/accuracies": 0.875, "rewards/chosen": -0.17361226677894592, "rewards/margins": 0.029161330312490463, "rewards/rejected": -0.20277361571788788, "step": 215 }, { "epoch": 0.596479116327235, "grad_norm": 0.25545212626457214, "learning_rate": 4.857049260899233e-06, "log_odds_chosen": 0.42634040117263794, "log_odds_ratio": -0.5089380145072937, "logits/chosen": 0.2999016046524048, "logits/rejected": -0.9428575038909912, "logps/chosen": -1.6035995483398438, "logps/rejected": -1.9544857740402222, "loss": 1.7566, "nll_loss": 1.7056996822357178, "rewards/accuracies": 1.0, "rewards/chosen": -0.1603599637746811, "rewards/margins": 0.03508862853050232, "rewards/rejected": -0.19544857740402222, "step": 216 }, { "epoch": 0.5992405937176389, "grad_norm": 0.2341543734073639, "learning_rate": 4.854365973699519e-06, "log_odds_chosen": 0.29267483949661255, "log_odds_ratio": -0.5718191862106323, "logits/chosen": 0.32583779096603394, "logits/rejected": -0.7815302610397339, "logps/chosen": -1.6951828002929688, "logps/rejected": -1.9353394508361816, "loss": 1.8596, "nll_loss": 1.8024464845657349, "rewards/accuracies": 0.75, "rewards/chosen": -0.16951829195022583, "rewards/margins": 0.024015672504901886, "rewards/rejected": -0.19353395700454712, "step": 217 }, { "epoch": 0.6020020711080428, "grad_norm": 0.24366186559200287, "learning_rate": 4.851658491746707e-06, "log_odds_chosen": 0.3909580707550049, "log_odds_ratio": -0.5210633277893066, "logits/chosen": 0.25465232133865356, "logits/rejected": -0.9531072378158569, "logps/chosen": -1.6791120767593384, "logps/rejected": -2.0105059146881104, "loss": 1.8187, "nll_loss": 1.7665891647338867, "rewards/accuracies": 1.0, "rewards/chosen": -0.16791123151779175, "rewards/margins": 0.03313935548067093, "rewards/rejected": -0.20105057954788208, "step": 218 }, { "epoch": 0.6047635484984467, "grad_norm": 0.24001233279705048, "learning_rate": 4.848926842864361e-06, "log_odds_chosen": 0.4899923503398895, "log_odds_ratio": -0.48127448558807373, "logits/chosen": 0.25574028491973877, "logits/rejected": -1.256288766860962, "logps/chosen": -1.6502352952957153, "logps/rejected": -2.064323902130127, "loss": 1.7891, "nll_loss": 1.740965723991394, "rewards/accuracies": 1.0, "rewards/chosen": -0.165023535490036, "rewards/margins": 0.04140886664390564, "rewards/rejected": -0.20643240213394165, "step": 219 }, { "epoch": 0.6075250258888505, "grad_norm": 0.23009978234767914, "learning_rate": 4.846171055124401e-06, "log_odds_chosen": 0.3280143141746521, "log_odds_ratio": -0.55921870470047, "logits/chosen": 0.35827893018722534, "logits/rejected": -0.7289958000183105, "logps/chosen": -1.7372446060180664, "logps/rejected": -2.02004075050354, "loss": 1.8677, "nll_loss": 1.811750888824463, "rewards/accuracies": 1.0, "rewards/chosen": -0.1737244725227356, "rewards/margins": 0.028279609978199005, "rewards/rejected": -0.2020040899515152, "step": 220 }, { "epoch": 0.6102865032792544, "grad_norm": 0.24643385410308838, "learning_rate": 4.843391156846811e-06, "log_odds_chosen": 0.49847206473350525, "log_odds_ratio": -0.4771438539028168, "logits/chosen": 0.38028058409690857, "logits/rejected": -0.8463162779808044, "logps/chosen": -1.6676466464996338, "logps/rejected": -2.0871989727020264, "loss": 1.8045, "nll_loss": 1.756779432296753, "rewards/accuracies": 1.0, "rewards/chosen": -0.16676466166973114, "rewards/margins": 0.04195523262023926, "rewards/rejected": -0.2087198942899704, "step": 221 }, { "epoch": 0.6130479806696583, "grad_norm": 0.23624517023563385, "learning_rate": 4.8405871765993435e-06, "log_odds_chosen": 0.5461086630821228, "log_odds_ratio": -0.4661559462547302, "logits/chosen": 0.2758100926876068, "logits/rejected": -0.9698406457901001, "logps/chosen": -1.716646671295166, "logps/rejected": -2.178037643432617, "loss": 1.8539, "nll_loss": 1.807306170463562, "rewards/accuracies": 1.0, "rewards/chosen": -0.17166468501091003, "rewards/margins": 0.04613909870386124, "rewards/rejected": -0.21780376136302948, "step": 222 }, { "epoch": 0.6158094580600622, "grad_norm": 0.24216988682746887, "learning_rate": 4.837759143197237e-06, "log_odds_chosen": 0.33277636766433716, "log_odds_ratio": -0.543420672416687, "logits/chosen": 0.25846540927886963, "logits/rejected": -1.1584213972091675, "logps/chosen": -1.6766406297683716, "logps/rejected": -1.959399700164795, "loss": 1.818, "nll_loss": 1.7636725902557373, "rewards/accuracies": 1.0, "rewards/chosen": -0.1676640659570694, "rewards/margins": 0.028275907039642334, "rewards/rejected": -0.19593995809555054, "step": 223 }, { "epoch": 0.618570935450466, "grad_norm": 0.22247816622257233, "learning_rate": 4.834907085702909e-06, "log_odds_chosen": 0.42663368582725525, "log_odds_ratio": -0.5054609775543213, "logits/chosen": 0.2946387827396393, "logits/rejected": -0.8483214378356934, "logps/chosen": -1.6203222274780273, "logps/rejected": -1.9752922058105469, "loss": 1.7654, "nll_loss": 1.7148088216781616, "rewards/accuracies": 1.0, "rewards/chosen": -0.16203221678733826, "rewards/margins": 0.03549701347947121, "rewards/rejected": -0.19752921164035797, "step": 224 }, { "epoch": 0.6213324128408698, "grad_norm": 0.23438099026679993, "learning_rate": 4.832031033425663e-06, "log_odds_chosen": 0.4974308907985687, "log_odds_ratio": -0.4770738184452057, "logits/chosen": 0.22860127687454224, "logits/rejected": -1.1651134490966797, "logps/chosen": -1.5770584344863892, "logps/rejected": -1.98891282081604, "loss": 1.7128, "nll_loss": 1.6650840044021606, "rewards/accuracies": 1.0, "rewards/chosen": -0.15770584344863892, "rewards/margins": 0.04118544980883598, "rewards/rejected": -0.198891282081604, "step": 225 }, { "epoch": 0.6240938902312737, "grad_norm": 0.2453928291797638, "learning_rate": 4.829131015921386e-06, "log_odds_chosen": 0.336994469165802, "log_odds_ratio": -0.5416135787963867, "logits/chosen": 0.40966925024986267, "logits/rejected": -0.7484245300292969, "logps/chosen": -1.7475402355194092, "logps/rejected": -2.035691499710083, "loss": 1.8881, "nll_loss": 1.833910346031189, "rewards/accuracies": 1.0, "rewards/chosen": -0.17475402355194092, "rewards/margins": 0.0288151316344738, "rewards/rejected": -0.2035691738128662, "step": 226 }, { "epoch": 0.6268553676216776, "grad_norm": 0.22821033000946045, "learning_rate": 4.826207062992245e-06, "log_odds_chosen": 0.47645366191864014, "log_odds_ratio": -0.4914059638977051, "logits/chosen": 0.2782054543495178, "logits/rejected": -0.9900674223899841, "logps/chosen": -1.8174843788146973, "logps/rejected": -2.2320914268493652, "loss": 1.95, "nll_loss": 1.9008376598358154, "rewards/accuracies": 1.0, "rewards/chosen": -0.1817484349012375, "rewards/margins": 0.041460707783699036, "rewards/rejected": -0.22320915758609772, "step": 227 }, { "epoch": 0.6296168450120815, "grad_norm": 0.2569068968296051, "learning_rate": 4.82325920468638e-06, "log_odds_chosen": 0.2413053661584854, "log_odds_ratio": -0.5858334302902222, "logits/chosen": 0.32741737365722656, "logits/rejected": -0.9221272468566895, "logps/chosen": -1.7791626453399658, "logps/rejected": -1.9837150573730469, "loss": 1.9203, "nll_loss": 1.8617504835128784, "rewards/accuracies": 0.875, "rewards/chosen": -0.1779162585735321, "rewards/margins": 0.020455272868275642, "rewards/rejected": -0.1983715444803238, "step": 228 }, { "epoch": 0.6323783224024854, "grad_norm": 0.2548074424266815, "learning_rate": 4.820287471297598e-06, "log_odds_chosen": 0.4002269208431244, "log_odds_ratio": -0.5239609479904175, "logits/chosen": 0.21428313851356506, "logits/rejected": -0.9460724592208862, "logps/chosen": -1.6473997831344604, "logps/rejected": -1.9846879243850708, "loss": 1.7941, "nll_loss": 1.7416696548461914, "rewards/accuracies": 0.875, "rewards/chosen": -0.1647399663925171, "rewards/margins": 0.03372883051633835, "rewards/rejected": -0.19846880435943604, "step": 229 }, { "epoch": 0.6351397997928891, "grad_norm": 0.22254504263401031, "learning_rate": 4.817291893365055e-06, "log_odds_chosen": 0.46622079610824585, "log_odds_ratio": -0.4967763423919678, "logits/chosen": 0.07556484639644623, "logits/rejected": -1.116629719734192, "logps/chosen": -1.5138487815856934, "logps/rejected": -1.8917471170425415, "loss": 1.675, "nll_loss": 1.625287413597107, "rewards/accuracies": 1.0, "rewards/chosen": -0.15138490498065948, "rewards/margins": 0.03778982535004616, "rewards/rejected": -0.18917471170425415, "step": 230 }, { "epoch": 0.637901277183293, "grad_norm": 0.28197240829467773, "learning_rate": 4.81427250167295e-06, "log_odds_chosen": 0.24183571338653564, "log_odds_ratio": -0.583247721195221, "logits/chosen": 0.15436850488185883, "logits/rejected": -1.1790400743484497, "logps/chosen": -1.6747009754180908, "logps/rejected": -1.8760582208633423, "loss": 1.8184, "nll_loss": 1.7600996494293213, "rewards/accuracies": 0.875, "rewards/chosen": -0.16747009754180908, "rewards/margins": 0.020135723054409027, "rewards/rejected": -0.1876058131456375, "step": 231 }, { "epoch": 0.6406627545736969, "grad_norm": 0.24836094677448273, "learning_rate": 4.811229327250204e-06, "log_odds_chosen": 0.45771628618240356, "log_odds_ratio": -0.5010443329811096, "logits/chosen": 0.25003117322921753, "logits/rejected": -1.1424946784973145, "logps/chosen": -1.714363694190979, "logps/rejected": -2.1092028617858887, "loss": 1.8544, "nll_loss": 1.8042795658111572, "rewards/accuracies": 0.875, "rewards/chosen": -0.1714363694190979, "rewards/margins": 0.03948391228914261, "rewards/rejected": -0.2109202891588211, "step": 232 }, { "epoch": 0.6434242319641008, "grad_norm": 0.23518826067447662, "learning_rate": 4.8081624013701435e-06, "log_odds_chosen": 0.4311872124671936, "log_odds_ratio": -0.5104647278785706, "logits/chosen": 0.1991138905286789, "logits/rejected": -1.3669785261154175, "logps/chosen": -1.6888374090194702, "logps/rejected": -2.053189992904663, "loss": 1.8231, "nll_loss": 1.7720434665679932, "rewards/accuracies": 1.0, "rewards/chosen": -0.16888374090194702, "rewards/margins": 0.03643525391817093, "rewards/rejected": -0.20531900227069855, "step": 233 }, { "epoch": 0.6461857093545047, "grad_norm": 0.23462118208408356, "learning_rate": 4.805071755550177e-06, "log_odds_chosen": 0.394045889377594, "log_odds_ratio": -0.5184462070465088, "logits/chosen": 0.2896556854248047, "logits/rejected": -1.3101093769073486, "logps/chosen": -1.713099479675293, "logps/rejected": -2.048048496246338, "loss": 1.8398, "nll_loss": 1.7879210710525513, "rewards/accuracies": 1.0, "rewards/chosen": -0.1713099479675293, "rewards/margins": 0.03349488973617554, "rewards/rejected": -0.20480485260486603, "step": 234 }, { "epoch": 0.6489471867449085, "grad_norm": 0.23507662117481232, "learning_rate": 4.8019574215514705e-06, "log_odds_chosen": 0.3388752043247223, "log_odds_ratio": -0.5402787327766418, "logits/chosen": 0.20577961206436157, "logits/rejected": -0.8569263219833374, "logps/chosen": -1.6788839101791382, "logps/rejected": -1.9616367816925049, "loss": 1.8351, "nll_loss": 1.7810907363891602, "rewards/accuracies": 1.0, "rewards/chosen": -0.16788838803768158, "rewards/margins": 0.028275297954678535, "rewards/rejected": -0.19616368412971497, "step": 235 }, { "epoch": 0.6517086641353124, "grad_norm": 0.2070939689874649, "learning_rate": 4.7988194313786275e-06, "log_odds_chosen": 0.37065887451171875, "log_odds_ratio": -0.5329977869987488, "logits/chosen": 0.22679734230041504, "logits/rejected": -0.9689663052558899, "logps/chosen": -1.629687786102295, "logps/rejected": -1.9382522106170654, "loss": 1.7528, "nll_loss": 1.6994796991348267, "rewards/accuracies": 1.0, "rewards/chosen": -0.16296879947185516, "rewards/margins": 0.03085644729435444, "rewards/rejected": -0.19382524490356445, "step": 236 }, { "epoch": 0.6544701415257163, "grad_norm": 0.2251017838716507, "learning_rate": 4.795657817279349e-06, "log_odds_chosen": 0.3826752305030823, "log_odds_ratio": -0.5331037044525146, "logits/chosen": 0.143580362200737, "logits/rejected": -1.0959559679031372, "logps/chosen": -1.568124532699585, "logps/rejected": -1.8851323127746582, "loss": 1.7164, "nll_loss": 1.663122296333313, "rewards/accuracies": 0.875, "rewards/chosen": -0.15681245923042297, "rewards/margins": 0.03170077130198479, "rewards/rejected": -0.18851323425769806, "step": 237 }, { "epoch": 0.6572316189161201, "grad_norm": 0.24112077057361603, "learning_rate": 4.7924726117441135e-06, "log_odds_chosen": 0.45494502782821655, "log_odds_ratio": -0.5097072124481201, "logits/chosen": 0.12531203031539917, "logits/rejected": -1.1530197858810425, "logps/chosen": -1.6881966590881348, "logps/rejected": -2.074317216873169, "loss": 1.8279, "nll_loss": 1.7769427299499512, "rewards/accuracies": 0.875, "rewards/chosen": -0.16881968080997467, "rewards/margins": 0.03861205279827118, "rewards/rejected": -0.20743173360824585, "step": 238 }, { "epoch": 0.659993096306524, "grad_norm": 0.23394882678985596, "learning_rate": 4.789263847505835e-06, "log_odds_chosen": 0.48239994049072266, "log_odds_ratio": -0.484576016664505, "logits/chosen": 0.19327585399150848, "logits/rejected": -1.0120890140533447, "logps/chosen": -1.673211693763733, "logps/rejected": -2.078700542449951, "loss": 1.8042, "nll_loss": 1.7557491064071655, "rewards/accuracies": 1.0, "rewards/chosen": -0.16732117533683777, "rewards/margins": 0.04054888337850571, "rewards/rejected": -0.20787005126476288, "step": 239 }, { "epoch": 0.6627545736969278, "grad_norm": 0.24529801309108734, "learning_rate": 4.786031557539532e-06, "log_odds_chosen": 0.6541503667831421, "log_odds_ratio": -0.42781609296798706, "logits/chosen": 0.1258109211921692, "logits/rejected": -1.3018333911895752, "logps/chosen": -1.6192426681518555, "logps/rejected": -2.17771577835083, "loss": 1.7489, "nll_loss": 1.7061513662338257, "rewards/accuracies": 1.0, "rewards/chosen": -0.16192427277565002, "rewards/margins": 0.055847302079200745, "rewards/rejected": -0.21777155995368958, "step": 240 }, { "epoch": 0.6655160510873317, "grad_norm": 0.21988217532634735, "learning_rate": 4.782775775061983e-06, "log_odds_chosen": 0.4216863214969635, "log_odds_ratio": -0.5078084468841553, "logits/chosen": 0.22119548916816711, "logits/rejected": -1.0060391426086426, "logps/chosen": -1.6245155334472656, "logps/rejected": -1.9757689237594604, "loss": 1.7524, "nll_loss": 1.7016619443893433, "rewards/accuracies": 1.0, "rewards/chosen": -0.16245155036449432, "rewards/margins": 0.03512535244226456, "rewards/rejected": -0.19757691025733948, "step": 241 }, { "epoch": 0.6682775284777356, "grad_norm": 0.20372723042964935, "learning_rate": 4.779496533531393e-06, "log_odds_chosen": 0.5198055505752563, "log_odds_ratio": -0.4691554009914398, "logits/chosen": 0.20225608348846436, "logits/rejected": -1.1654834747314453, "logps/chosen": -1.5811524391174316, "logps/rejected": -2.01274037361145, "loss": 1.7073, "nll_loss": 1.6603847742080688, "rewards/accuracies": 1.0, "rewards/chosen": -0.15811526775360107, "rewards/margins": 0.043158773332834244, "rewards/rejected": -0.20127403736114502, "step": 242 }, { "epoch": 0.6710390058681395, "grad_norm": 0.22578665614128113, "learning_rate": 4.7761938666470405e-06, "log_odds_chosen": 0.4481002688407898, "log_odds_ratio": -0.5030589699745178, "logits/chosen": 0.21000558137893677, "logits/rejected": -1.1474783420562744, "logps/chosen": -1.6968859434127808, "logps/rejected": -2.079129695892334, "loss": 1.8331, "nll_loss": 1.7828097343444824, "rewards/accuracies": 1.0, "rewards/chosen": -0.16968859732151031, "rewards/margins": 0.03822438418865204, "rewards/rejected": -0.20791295170783997, "step": 243 }, { "epoch": 0.6738004832585434, "grad_norm": 0.21789832413196564, "learning_rate": 4.7728678083489375e-06, "log_odds_chosen": 0.3310457170009613, "log_odds_ratio": -0.5467555522918701, "logits/chosen": 0.21008087694644928, "logits/rejected": -1.071950912475586, "logps/chosen": -1.6163996458053589, "logps/rejected": -1.8934293985366821, "loss": 1.7531, "nll_loss": 1.6983906030654907, "rewards/accuracies": 0.875, "rewards/chosen": -0.1616399735212326, "rewards/margins": 0.027702966704964638, "rewards/rejected": -0.1893429309129715, "step": 244 }, { "epoch": 0.6765619606489471, "grad_norm": 0.21386297047138214, "learning_rate": 4.7695183928174804e-06, "log_odds_chosen": 0.4576282799243927, "log_odds_ratio": -0.4972766637802124, "logits/chosen": 0.11432070285081863, "logits/rejected": -1.215945839881897, "logps/chosen": -1.6433749198913574, "logps/rejected": -2.0280091762542725, "loss": 1.7829, "nll_loss": 1.7331418991088867, "rewards/accuracies": 1.0, "rewards/chosen": -0.16433750092983246, "rewards/margins": 0.03846340626478195, "rewards/rejected": -0.2028008997440338, "step": 245 }, { "epoch": 0.679323438039351, "grad_norm": 0.23491686582565308, "learning_rate": 4.766145654473096e-06, "log_odds_chosen": 0.37732306122779846, "log_odds_ratio": -0.5303936004638672, "logits/chosen": 0.18962648510932922, "logits/rejected": -1.3029686212539673, "logps/chosen": -1.716407299041748, "logps/rejected": -2.0386903285980225, "loss": 1.8439, "nll_loss": 1.790850043296814, "rewards/accuracies": 0.875, "rewards/chosen": -0.17164072394371033, "rewards/margins": 0.03222829848527908, "rewards/rejected": -0.2038690447807312, "step": 246 }, { "epoch": 0.6820849154297549, "grad_norm": 0.2103213518857956, "learning_rate": 4.762749627975888e-06, "log_odds_chosen": 0.40977245569229126, "log_odds_ratio": -0.5140390396118164, "logits/chosen": 0.0982648953795433, "logits/rejected": -1.1915839910507202, "logps/chosen": -1.557995319366455, "logps/rejected": -1.8929803371429443, "loss": 1.6972, "nll_loss": 1.6458226442337036, "rewards/accuracies": 1.0, "rewards/chosen": -0.1557995229959488, "rewards/margins": 0.033498503267765045, "rewards/rejected": -0.18929803371429443, "step": 247 }, { "epoch": 0.6848463928201588, "grad_norm": 0.2225971668958664, "learning_rate": 4.7593303482252835e-06, "log_odds_chosen": 0.36240053176879883, "log_odds_ratio": -0.5350769758224487, "logits/chosen": 0.16154634952545166, "logits/rejected": -0.829785943031311, "logps/chosen": -1.6611778736114502, "logps/rejected": -1.9659464359283447, "loss": 1.7928, "nll_loss": 1.7392576932907104, "rewards/accuracies": 1.0, "rewards/chosen": -0.16611778736114502, "rewards/margins": 0.03047686442732811, "rewards/rejected": -0.19659464061260223, "step": 248 }, { "epoch": 0.6876078702105627, "grad_norm": 0.19730301201343536, "learning_rate": 4.755887850359673e-06, "log_odds_chosen": 0.598614513874054, "log_odds_ratio": -0.4566551446914673, "logits/chosen": 0.10718496143817902, "logits/rejected": -1.343011498451233, "logps/chosen": -1.5110077857971191, "logps/rejected": -2.002279043197632, "loss": 1.6562, "nll_loss": 1.6105355024337769, "rewards/accuracies": 1.0, "rewards/chosen": -0.1511007696390152, "rewards/margins": 0.04912712052464485, "rewards/rejected": -0.20022790133953094, "step": 249 }, { "epoch": 0.6903693476009665, "grad_norm": 0.2222234308719635, "learning_rate": 4.752422169756048e-06, "log_odds_chosen": 0.4699355959892273, "log_odds_ratio": -0.4920656085014343, "logits/chosen": 0.1497102826833725, "logits/rejected": -1.4104342460632324, "logps/chosen": -1.6844984292984009, "logps/rejected": -2.0840110778808594, "loss": 1.8276, "nll_loss": 1.7784277200698853, "rewards/accuracies": 1.0, "rewards/chosen": -0.16844984889030457, "rewards/margins": 0.03995127975940704, "rewards/rejected": -0.20840111374855042, "step": 250 }, { "epoch": 0.6931308249913704, "grad_norm": 0.2100534588098526, "learning_rate": 4.748933342029639e-06, "log_odds_chosen": 0.5825514793395996, "log_odds_ratio": -0.4510309398174286, "logits/chosen": 0.1121891662478447, "logits/rejected": -1.3764441013336182, "logps/chosen": -1.560391902923584, "logps/rejected": -2.048152446746826, "loss": 1.7094, "nll_loss": 1.6642597913742065, "rewards/accuracies": 1.0, "rewards/chosen": -0.15603917837142944, "rewards/margins": 0.0487760454416275, "rewards/rejected": -0.20481522381305695, "step": 251 }, { "epoch": 0.6958923023817742, "grad_norm": 0.20206160843372345, "learning_rate": 4.745421403033548e-06, "log_odds_chosen": 0.4050910174846649, "log_odds_ratio": -0.5125004649162292, "logits/chosen": 0.10816405713558197, "logits/rejected": -1.1761468648910522, "logps/chosen": -1.6354482173919678, "logps/rejected": -1.9715569019317627, "loss": 1.7615, "nll_loss": 1.7102546691894531, "rewards/accuracies": 1.0, "rewards/chosen": -0.16354484856128693, "rewards/margins": 0.033610858023166656, "rewards/rejected": -0.1971556842327118, "step": 252 }, { "epoch": 0.6986537797721781, "grad_norm": 0.2380354106426239, "learning_rate": 4.741886388858384e-06, "log_odds_chosen": 0.3426484167575836, "log_odds_ratio": -0.543499231338501, "logits/chosen": 0.17392773926258087, "logits/rejected": -1.2038366794586182, "logps/chosen": -1.6468617916107178, "logps/rejected": -1.928429365158081, "loss": 1.79, "nll_loss": 1.7356586456298828, "rewards/accuracies": 0.875, "rewards/chosen": -0.1646861881017685, "rewards/margins": 0.02815674990415573, "rewards/rejected": -0.19284293055534363, "step": 253 }, { "epoch": 0.701415257162582, "grad_norm": 0.2207070291042328, "learning_rate": 4.738328335831883e-06, "log_odds_chosen": 0.3862311542034149, "log_odds_ratio": -0.5232746005058289, "logits/chosen": 0.13721241056919098, "logits/rejected": -1.4115301370620728, "logps/chosen": -1.612336277961731, "logps/rejected": -1.9344983100891113, "loss": 1.7501, "nll_loss": 1.6977391242980957, "rewards/accuracies": 1.0, "rewards/chosen": -0.16123361885547638, "rewards/margins": 0.03221620246767998, "rewards/rejected": -0.19344982504844666, "step": 254 }, { "epoch": 0.7041767345529858, "grad_norm": 0.2061997801065445, "learning_rate": 4.734747280518549e-06, "log_odds_chosen": 0.47361427545547485, "log_odds_ratio": -0.48697221279144287, "logits/chosen": 0.03231241926550865, "logits/rejected": -1.5338258743286133, "logps/chosen": -1.6500543355941772, "logps/rejected": -2.0473241806030273, "loss": 1.7861, "nll_loss": 1.737368106842041, "rewards/accuracies": 1.0, "rewards/chosen": -0.16500544548034668, "rewards/margins": 0.039726972579956055, "rewards/rejected": -0.20473241806030273, "step": 255 }, { "epoch": 0.7069382119433897, "grad_norm": 0.20688936114311218, "learning_rate": 4.7311432597192655e-06, "log_odds_chosen": 0.39435428380966187, "log_odds_ratio": -0.5248243808746338, "logits/chosen": 0.03083261288702488, "logits/rejected": -1.4570684432983398, "logps/chosen": -1.648856520652771, "logps/rejected": -1.9787046909332275, "loss": 1.7765, "nll_loss": 1.7240355014801025, "rewards/accuracies": 0.875, "rewards/chosen": -0.16488566994667053, "rewards/margins": 0.03298482671380043, "rewards/rejected": -0.19787049293518066, "step": 256 }, { "epoch": 0.7096996893337936, "grad_norm": 0.22945424914360046, "learning_rate": 4.72751631047092e-06, "log_odds_chosen": 0.5417366027832031, "log_odds_ratio": -0.46491706371307373, "logits/chosen": -0.006479084491729736, "logits/rejected": -1.1154435873031616, "logps/chosen": -1.6812236309051514, "logps/rejected": -2.1395444869995117, "loss": 1.8262, "nll_loss": 1.7797247171401978, "rewards/accuracies": 1.0, "rewards/chosen": -0.16812236607074738, "rewards/margins": 0.0458320677280426, "rewards/rejected": -0.21395443379878998, "step": 257 }, { "epoch": 0.7124611667241975, "grad_norm": 0.22095176577568054, "learning_rate": 4.72386647004603e-06, "log_odds_chosen": 0.4106917977333069, "log_odds_ratio": -0.5133163332939148, "logits/chosen": 0.1232781782746315, "logits/rejected": -1.1046854257583618, "logps/chosen": -1.6649987697601318, "logps/rejected": -2.0084848403930664, "loss": 1.811, "nll_loss": 1.7596914768218994, "rewards/accuracies": 1.0, "rewards/chosen": -0.16649989783763885, "rewards/margins": 0.03434859216213226, "rewards/rejected": -0.20084848999977112, "step": 258 }, { "epoch": 0.7152226441146013, "grad_norm": 0.22131314873695374, "learning_rate": 4.720193775952352e-06, "log_odds_chosen": 0.2770199477672577, "log_odds_ratio": -0.5675607919692993, "logits/chosen": 0.09717811644077301, "logits/rejected": -1.1877737045288086, "logps/chosen": -1.6597505807876587, "logps/rejected": -1.889330267906189, "loss": 1.7861, "nll_loss": 1.729378581047058, "rewards/accuracies": 1.0, "rewards/chosen": -0.16597506403923035, "rewards/margins": 0.022957956418395042, "rewards/rejected": -0.18893301486968994, "step": 259 }, { "epoch": 0.7179841215050051, "grad_norm": 0.2108003944158554, "learning_rate": 4.716498265932501e-06, "log_odds_chosen": 0.5200653076171875, "log_odds_ratio": -0.47012218832969666, "logits/chosen": -0.01136242039501667, "logits/rejected": -1.1188238859176636, "logps/chosen": -1.4902641773223877, "logps/rejected": -1.915147066116333, "loss": 1.6221, "nll_loss": 1.575091004371643, "rewards/accuracies": 1.0, "rewards/chosen": -0.14902642369270325, "rewards/margins": 0.04248826950788498, "rewards/rejected": -0.19151470065116882, "step": 260 }, { "epoch": 0.720745598895409, "grad_norm": 0.22381585836410522, "learning_rate": 4.712779977963559e-06, "log_odds_chosen": 0.39622795581817627, "log_odds_ratio": -0.5201148390769958, "logits/chosen": 0.10159610211849213, "logits/rejected": -0.859241247177124, "logps/chosen": -1.5900012254714966, "logps/rejected": -1.917615294456482, "loss": 1.7383, "nll_loss": 1.6862510442733765, "rewards/accuracies": 0.875, "rewards/chosen": -0.15900012850761414, "rewards/margins": 0.03276140242815018, "rewards/rejected": -0.1917615383863449, "step": 261 }, { "epoch": 0.7235070762858129, "grad_norm": 0.2046324461698532, "learning_rate": 4.7090389502566884e-06, "log_odds_chosen": 0.5120800733566284, "log_odds_ratio": -0.47404342889785767, "logits/chosen": 0.017112823203206062, "logits/rejected": -1.1166574954986572, "logps/chosen": -1.6049623489379883, "logps/rejected": -2.0321714878082275, "loss": 1.7168, "nll_loss": 1.6693758964538574, "rewards/accuracies": 1.0, "rewards/chosen": -0.16049623489379883, "rewards/margins": 0.042720913887023926, "rewards/rejected": -0.20321716368198395, "step": 262 }, { "epoch": 0.7262685536762168, "grad_norm": 0.20920297503471375, "learning_rate": 4.705275221256738e-06, "log_odds_chosen": 0.42567548155784607, "log_odds_ratio": -0.5072777271270752, "logits/chosen": 0.11223579943180084, "logits/rejected": -1.1794720888137817, "logps/chosen": -1.6436134576797485, "logps/rejected": -2.0019237995147705, "loss": 1.7837, "nll_loss": 1.7329978942871094, "rewards/accuracies": 1.0, "rewards/chosen": -0.16436134278774261, "rewards/margins": 0.035831037908792496, "rewards/rejected": -0.2001923769712448, "step": 263 }, { "epoch": 0.7290300310666207, "grad_norm": 0.215216726064682, "learning_rate": 4.701488829641845e-06, "log_odds_chosen": 0.3972318172454834, "log_odds_ratio": -0.5163053870201111, "logits/chosen": 0.07385722547769547, "logits/rejected": -1.1585350036621094, "logps/chosen": -1.5981651544570923, "logps/rejected": -1.9258975982666016, "loss": 1.7361, "nll_loss": 1.6844836473464966, "rewards/accuracies": 1.0, "rewards/chosen": -0.15981650352478027, "rewards/margins": 0.032773248851299286, "rewards/rejected": -0.19258975982666016, "step": 264 }, { "epoch": 0.7317915084570245, "grad_norm": 0.20215153694152832, "learning_rate": 4.697679814323044e-06, "log_odds_chosen": 0.373761922121048, "log_odds_ratio": -0.5314816832542419, "logits/chosen": 0.0431194081902504, "logits/rejected": -1.3062759637832642, "logps/chosen": -1.6237438917160034, "logps/rejected": -1.9311178922653198, "loss": 1.746, "nll_loss": 1.6928824186325073, "rewards/accuracies": 1.0, "rewards/chosen": -0.1623743772506714, "rewards/margins": 0.030737407505512238, "rewards/rejected": -0.19311177730560303, "step": 265 }, { "epoch": 0.7345529858474283, "grad_norm": 0.2043098360300064, "learning_rate": 4.693848214443858e-06, "log_odds_chosen": 0.3958456516265869, "log_odds_ratio": -0.5203114151954651, "logits/chosen": 0.016560683026909828, "logits/rejected": -1.4892038106918335, "logps/chosen": -1.690280795097351, "logps/rejected": -2.024770736694336, "loss": 1.8103, "nll_loss": 1.7582213878631592, "rewards/accuracies": 0.875, "rewards/chosen": -0.16902808845043182, "rewards/margins": 0.03344898298382759, "rewards/rejected": -0.2024770826101303, "step": 266 }, { "epoch": 0.7373144632378322, "grad_norm": 0.2200057953596115, "learning_rate": 4.689994069379905e-06, "log_odds_chosen": 0.6603919863700867, "log_odds_ratio": -0.4194304049015045, "logits/chosen": 0.0796060711145401, "logits/rejected": -1.6784858703613281, "logps/chosen": -1.5854812860488892, "logps/rejected": -2.141174554824829, "loss": 1.7061, "nll_loss": 1.6641736030578613, "rewards/accuracies": 1.0, "rewards/chosen": -0.15854813158512115, "rewards/margins": 0.05556933209300041, "rewards/rejected": -0.21411746740341187, "step": 267 }, { "epoch": 0.7400759406282361, "grad_norm": 0.207722008228302, "learning_rate": 4.686117418738489e-06, "log_odds_chosen": 0.4980109930038452, "log_odds_ratio": -0.477446049451828, "logits/chosen": 0.0639527440071106, "logits/rejected": -1.2904075384140015, "logps/chosen": -1.632141351699829, "logps/rejected": -2.049818515777588, "loss": 1.7618, "nll_loss": 1.7140535116195679, "rewards/accuracies": 1.0, "rewards/chosen": -0.16321413218975067, "rewards/margins": 0.041767701506614685, "rewards/rejected": -0.20498183369636536, "step": 268 }, { "epoch": 0.74283741801864, "grad_norm": 0.20242716372013092, "learning_rate": 4.6822183023581945e-06, "log_odds_chosen": 0.42001479864120483, "log_odds_ratio": -0.517326831817627, "logits/chosen": 0.07094614952802658, "logits/rejected": -1.5565531253814697, "logps/chosen": -1.6155306100845337, "logps/rejected": -1.9663753509521484, "loss": 1.7551, "nll_loss": 1.7033692598342896, "rewards/accuracies": 0.875, "rewards/chosen": -0.1615530550479889, "rewards/margins": 0.035084471106529236, "rewards/rejected": -0.19663754105567932, "step": 269 }, { "epoch": 0.7455988954090439, "grad_norm": 0.19576282799243927, "learning_rate": 4.678296760308474e-06, "log_odds_chosen": 0.34514501690864563, "log_odds_ratio": -0.5403321385383606, "logits/chosen": -0.05599237233400345, "logits/rejected": -1.4408526420593262, "logps/chosen": -1.5795042514801025, "logps/rejected": -1.8630210161209106, "loss": 1.7087, "nll_loss": 1.6546752452850342, "rewards/accuracies": 1.0, "rewards/chosen": -0.15795043110847473, "rewards/margins": 0.028351658955216408, "rewards/rejected": -0.1863020956516266, "step": 270 }, { "epoch": 0.7483603727994477, "grad_norm": 0.22211046516895294, "learning_rate": 4.674352832889239e-06, "log_odds_chosen": 0.5620113611221313, "log_odds_ratio": -0.4555080533027649, "logits/chosen": -0.005663935095071793, "logits/rejected": -1.2683230638504028, "logps/chosen": -1.6520458459854126, "logps/rejected": -2.1268177032470703, "loss": 1.7996, "nll_loss": 1.7540650367736816, "rewards/accuracies": 1.0, "rewards/chosen": -0.16520458459854126, "rewards/margins": 0.04747716709971428, "rewards/rejected": -0.21268175542354584, "step": 271 }, { "epoch": 0.7511218501898516, "grad_norm": 0.1922963708639145, "learning_rate": 4.670386560630446e-06, "log_odds_chosen": 0.444943368434906, "log_odds_ratio": -0.5015792846679688, "logits/chosen": -0.06714704632759094, "logits/rejected": -1.379449725151062, "logps/chosen": -1.5659842491149902, "logps/rejected": -1.9318914413452148, "loss": 1.6989, "nll_loss": 1.6487019062042236, "rewards/accuracies": 0.875, "rewards/chosen": -0.15659843385219574, "rewards/margins": 0.036590706557035446, "rewards/rejected": -0.19318914413452148, "step": 272 }, { "epoch": 0.7538833275802554, "grad_norm": 0.19772395491600037, "learning_rate": 4.66639798429168e-06, "log_odds_chosen": 0.5909014940261841, "log_odds_ratio": -0.4440915584564209, "logits/chosen": -0.010252359323203564, "logits/rejected": -1.6117687225341797, "logps/chosen": -1.6227033138275146, "logps/rejected": -2.1234261989593506, "loss": 1.7368, "nll_loss": 1.69236421585083, "rewards/accuracies": 1.0, "rewards/chosen": -0.16227032244205475, "rewards/margins": 0.05007229745388031, "rewards/rejected": -0.21234261989593506, "step": 273 }, { "epoch": 0.7566448049706593, "grad_norm": 0.21296410262584686, "learning_rate": 4.6623871448617345e-06, "log_odds_chosen": 0.37625253200531006, "log_odds_ratio": -0.5279226303100586, "logits/chosen": -0.11291900277137756, "logits/rejected": -1.474963665008545, "logps/chosen": -1.5688221454620361, "logps/rejected": -1.8770910501480103, "loss": 1.6973, "nll_loss": 1.6444581747055054, "rewards/accuracies": 1.0, "rewards/chosen": -0.15688221156597137, "rewards/margins": 0.03082689456641674, "rewards/rejected": -0.18770912289619446, "step": 274 }, { "epoch": 0.7594062823610632, "grad_norm": 0.21504846215248108, "learning_rate": 4.6583540835581885e-06, "log_odds_chosen": 0.446832537651062, "log_odds_ratio": -0.5026010870933533, "logits/chosen": -0.0652085542678833, "logits/rejected": -1.2765593528747559, "logps/chosen": -1.5856618881225586, "logps/rejected": -1.9579052925109863, "loss": 1.7239, "nll_loss": 1.6736685037612915, "rewards/accuracies": 1.0, "rewards/chosen": -0.1585661768913269, "rewards/margins": 0.037224359810352325, "rewards/rejected": -0.19579055905342102, "step": 275 }, { "epoch": 0.762167759751467, "grad_norm": 0.1951994001865387, "learning_rate": 4.654298841826988e-06, "log_odds_chosen": 0.38752269744873047, "log_odds_ratio": -0.5222585797309875, "logits/chosen": -0.09070023894309998, "logits/rejected": -1.2031896114349365, "logps/chosen": -1.5128428936004639, "logps/rejected": -1.8278968334197998, "loss": 1.6382, "nll_loss": 1.586016058921814, "rewards/accuracies": 1.0, "rewards/chosen": -0.15128430724143982, "rewards/margins": 0.031505391001701355, "rewards/rejected": -0.18278968334197998, "step": 276 }, { "epoch": 0.7649292371418709, "grad_norm": 0.20356132090091705, "learning_rate": 4.6502214613420164e-06, "log_odds_chosen": 0.661637008190155, "log_odds_ratio": -0.4188510775566101, "logits/chosen": -0.07356397807598114, "logits/rejected": -1.270960807800293, "logps/chosen": -1.4853070974349976, "logps/rejected": -2.030834674835205, "loss": 1.6179, "nll_loss": 1.5760544538497925, "rewards/accuracies": 1.0, "rewards/chosen": -0.14853070676326752, "rewards/margins": 0.054552774876356125, "rewards/rejected": -0.20308347046375275, "step": 277 }, { "epoch": 0.7676907145322748, "grad_norm": 0.21808800101280212, "learning_rate": 4.646121984004666e-06, "log_odds_chosen": 0.535017192363739, "log_odds_ratio": -0.4694536030292511, "logits/chosen": -0.058985427021980286, "logits/rejected": -1.204667329788208, "logps/chosen": -1.6129412651062012, "logps/rejected": -2.0625391006469727, "loss": 1.7558, "nll_loss": 1.7088611125946045, "rewards/accuracies": 1.0, "rewards/chosen": -0.1612941473722458, "rewards/margins": 0.04495978727936745, "rewards/rejected": -0.20625391602516174, "step": 278 }, { "epoch": 0.7704521919226787, "grad_norm": 0.21462294459342957, "learning_rate": 4.642000451943409e-06, "log_odds_chosen": 0.4302963316440582, "log_odds_ratio": -0.5060604214668274, "logits/chosen": 0.040374599397182465, "logits/rejected": -1.187546968460083, "logps/chosen": -1.6709057092666626, "logps/rejected": -2.035818576812744, "loss": 1.7989, "nll_loss": 1.7482545375823975, "rewards/accuracies": 1.0, "rewards/chosen": -0.16709057986736298, "rewards/margins": 0.03649128973484039, "rewards/rejected": -0.20358186960220337, "step": 279 }, { "epoch": 0.7732136693130826, "grad_norm": 0.19471004605293274, "learning_rate": 4.637856907513366e-06, "log_odds_chosen": 0.5729100704193115, "log_odds_ratio": -0.45996299386024475, "logits/chosen": -0.0875316932797432, "logits/rejected": -1.2949634790420532, "logps/chosen": -1.6255837678909302, "logps/rejected": -2.107527256011963, "loss": 1.7409, "nll_loss": 1.6949416399002075, "rewards/accuracies": 1.0, "rewards/chosen": -0.1625583916902542, "rewards/margins": 0.04819435626268387, "rewards/rejected": -0.21075274050235748, "step": 280 }, { "epoch": 0.7759751467034863, "grad_norm": 0.20401322841644287, "learning_rate": 4.633691393295865e-06, "log_odds_chosen": 0.3522525429725647, "log_odds_ratio": -0.5362752676010132, "logits/chosen": -0.07532086223363876, "logits/rejected": -1.3928956985473633, "logps/chosen": -1.6393187046051025, "logps/rejected": -1.9348175525665283, "loss": 1.7628, "nll_loss": 1.7092012166976929, "rewards/accuracies": 1.0, "rewards/chosen": -0.16393187642097473, "rewards/margins": 0.02954990416765213, "rewards/rejected": -0.19348177313804626, "step": 281 }, { "epoch": 0.7787366240938902, "grad_norm": 0.19418881833553314, "learning_rate": 4.629503952098011e-06, "log_odds_chosen": 0.6238572597503662, "log_odds_ratio": -0.43670332431793213, "logits/chosen": -0.08066678047180176, "logits/rejected": -1.6776320934295654, "logps/chosen": -1.5727202892303467, "logps/rejected": -2.097163200378418, "loss": 1.6952, "nll_loss": 1.6514896154403687, "rewards/accuracies": 1.0, "rewards/chosen": -0.15727202594280243, "rewards/margins": 0.05244428664445877, "rewards/rejected": -0.2097163200378418, "step": 282 }, { "epoch": 0.7814981014842941, "grad_norm": 0.19461016356945038, "learning_rate": 4.6252946269522406e-06, "log_odds_chosen": 0.41456982493400574, "log_odds_ratio": -0.5249388813972473, "logits/chosen": -0.09783484041690826, "logits/rejected": -1.5897575616836548, "logps/chosen": -1.5939539670944214, "logps/rejected": -1.9444385766983032, "loss": 1.7211, "nll_loss": 1.6686402559280396, "rewards/accuracies": 0.75, "rewards/chosen": -0.15939539670944214, "rewards/margins": 0.0350484773516655, "rewards/rejected": -0.19444386661052704, "step": 283 }, { "epoch": 0.784259578874698, "grad_norm": 0.2026386708021164, "learning_rate": 4.621063461115882e-06, "log_odds_chosen": 0.42722252011299133, "log_odds_ratio": -0.5157784223556519, "logits/chosen": -0.04370000213384628, "logits/rejected": -1.5634028911590576, "logps/chosen": -1.6642380952835083, "logps/rejected": -2.0263044834136963, "loss": 1.7849, "nll_loss": 1.733304738998413, "rewards/accuracies": 0.875, "rewards/chosen": -0.16642381250858307, "rewards/margins": 0.03620663285255432, "rewards/rejected": -0.20263046026229858, "step": 284 }, { "epoch": 0.7870210562651019, "grad_norm": 0.20439012348651886, "learning_rate": 4.6168104980707105e-06, "log_odds_chosen": 0.4680078625679016, "log_odds_ratio": -0.4906473755836487, "logits/chosen": -0.09853056073188782, "logits/rejected": -1.5130950212478638, "logps/chosen": -1.5937169790267944, "logps/rejected": -1.9836535453796387, "loss": 1.7178, "nll_loss": 1.668696641921997, "rewards/accuracies": 1.0, "rewards/chosen": -0.15937167406082153, "rewards/margins": 0.038993678987026215, "rewards/rejected": -0.19836536049842834, "step": 285 }, { "epoch": 0.7897825336555057, "grad_norm": 0.1877906173467636, "learning_rate": 4.612535781522504e-06, "log_odds_chosen": 0.37254124879837036, "log_odds_ratio": -0.525785505771637, "logits/chosen": -0.08526084572076797, "logits/rejected": -1.4526267051696777, "logps/chosen": -1.5574032068252563, "logps/rejected": -1.863482117652893, "loss": 1.6777, "nll_loss": 1.6251548528671265, "rewards/accuracies": 1.0, "rewards/chosen": -0.15574032068252563, "rewards/margins": 0.03060789778828621, "rewards/rejected": -0.18634822964668274, "step": 286 }, { "epoch": 0.7925440110459095, "grad_norm": 0.19580195844173431, "learning_rate": 4.6082393554005855e-06, "log_odds_chosen": 0.5562997460365295, "log_odds_ratio": -0.4619132876396179, "logits/chosen": 0.01294594258069992, "logits/rejected": -1.4304238557815552, "logps/chosen": -1.5266389846801758, "logps/rejected": -1.9919108152389526, "loss": 1.6616, "nll_loss": 1.6153795719146729, "rewards/accuracies": 0.875, "rewards/chosen": -0.152663916349411, "rewards/margins": 0.046527184545993805, "rewards/rejected": -0.19919107854366302, "step": 287 }, { "epoch": 0.7953054884363134, "grad_norm": 0.1980113834142685, "learning_rate": 4.6039212638573835e-06, "log_odds_chosen": 0.3346819281578064, "log_odds_ratio": -0.5443198680877686, "logits/chosen": -0.15275517106056213, "logits/rejected": -1.6041910648345947, "logps/chosen": -1.6962547302246094, "logps/rejected": -1.9770543575286865, "loss": 1.8067, "nll_loss": 1.752286672592163, "rewards/accuracies": 1.0, "rewards/chosen": -0.16962547600269318, "rewards/margins": 0.028079960495233536, "rewards/rejected": -0.1977054327726364, "step": 288 }, { "epoch": 0.7980669658267173, "grad_norm": 0.2273865044116974, "learning_rate": 4.599581551267969e-06, "log_odds_chosen": 0.5466289520263672, "log_odds_ratio": -0.46321243047714233, "logits/chosen": 0.03993244469165802, "logits/rejected": -1.286005973815918, "logps/chosen": -1.6738882064819336, "logps/rejected": -2.1377532482147217, "loss": 1.8, "nll_loss": 1.7537211179733276, "rewards/accuracies": 1.0, "rewards/chosen": -0.16738884150981903, "rewards/margins": 0.046386465430259705, "rewards/rejected": -0.21377530694007874, "step": 289 }, { "epoch": 0.8008284432171212, "grad_norm": 0.20244112610816956, "learning_rate": 4.5952202622296015e-06, "log_odds_chosen": 0.3539222180843353, "log_odds_ratio": -0.53514164686203, "logits/chosen": -0.11812002211809158, "logits/rejected": -1.4588350057601929, "logps/chosen": -1.5985196828842163, "logps/rejected": -1.8924399614334106, "loss": 1.7271, "nll_loss": 1.6736091375350952, "rewards/accuracies": 1.0, "rewards/chosen": -0.15985198318958282, "rewards/margins": 0.029392031952738762, "rewards/rejected": -0.18924400210380554, "step": 290 }, { "epoch": 0.803589920607525, "grad_norm": 0.20306488871574402, "learning_rate": 4.590837441561277e-06, "log_odds_chosen": 0.4768354296684265, "log_odds_ratio": -0.49164149165153503, "logits/chosen": -0.09056994318962097, "logits/rejected": -1.4742063283920288, "logps/chosen": -1.6228840351104736, "logps/rejected": -2.022940158843994, "loss": 1.7341, "nll_loss": 1.6849125623703003, "rewards/accuracies": 1.0, "rewards/chosen": -0.1622883826494217, "rewards/margins": 0.040005628019571304, "rewards/rejected": -0.2022940218448639, "step": 291 }, { "epoch": 0.8063513979979289, "grad_norm": 0.19205592572689056, "learning_rate": 4.586433134303257e-06, "log_odds_chosen": 0.5734292268753052, "log_odds_ratio": -0.44972163438796997, "logits/chosen": -0.07964983582496643, "logits/rejected": -1.3166429996490479, "logps/chosen": -1.4941394329071045, "logps/rejected": -1.9636625051498413, "loss": 1.6364, "nll_loss": 1.5913902521133423, "rewards/accuracies": 1.0, "rewards/chosen": -0.14941394329071045, "rewards/margins": 0.04695230349898338, "rewards/rejected": -0.19636625051498413, "step": 292 }, { "epoch": 0.8091128753883328, "grad_norm": 0.20325274765491486, "learning_rate": 4.582007385716614e-06, "log_odds_chosen": 0.4024369418621063, "log_odds_ratio": -0.5175961852073669, "logits/chosen": -0.07684268802404404, "logits/rejected": -1.502617597579956, "logps/chosen": -1.6142741441726685, "logps/rejected": -1.9492229223251343, "loss": 1.7574, "nll_loss": 1.7056207656860352, "rewards/accuracies": 1.0, "rewards/chosen": -0.16142742335796356, "rewards/margins": 0.033494893461465836, "rewards/rejected": -0.1949223130941391, "step": 293 }, { "epoch": 0.8118743527787367, "grad_norm": 0.20083336532115936, "learning_rate": 4.57756024128276e-06, "log_odds_chosen": 0.5593162178993225, "log_odds_ratio": -0.45524081587791443, "logits/chosen": -0.17213015258312225, "logits/rejected": -1.3977055549621582, "logps/chosen": -1.5258231163024902, "logps/rejected": -1.9875348806381226, "loss": 1.6559, "nll_loss": 1.610384225845337, "rewards/accuracies": 1.0, "rewards/chosen": -0.1525823026895523, "rewards/margins": 0.04617120325565338, "rewards/rejected": -0.1987534910440445, "step": 294 }, { "epoch": 0.8146358301691405, "grad_norm": 0.19001390039920807, "learning_rate": 4.573091746702988e-06, "log_odds_chosen": 0.5878125429153442, "log_odds_ratio": -0.44817692041397095, "logits/chosen": -0.16976626217365265, "logits/rejected": -1.3345102071762085, "logps/chosen": -1.5175349712371826, "logps/rejected": -2.003750801086426, "loss": 1.6573, "nll_loss": 1.612461805343628, "rewards/accuracies": 1.0, "rewards/chosen": -0.1517535001039505, "rewards/margins": 0.048621561378240585, "rewards/rejected": -0.20037508010864258, "step": 295 }, { "epoch": 0.8173973075595443, "grad_norm": 0.18019473552703857, "learning_rate": 4.5686019478979915e-06, "log_odds_chosen": 0.6261818408966064, "log_odds_ratio": -0.43093276023864746, "logits/chosen": -0.045413050800561905, "logits/rejected": -1.8382006883621216, "logps/chosen": -1.5509614944458008, "logps/rejected": -2.0732412338256836, "loss": 1.6609, "nll_loss": 1.617802619934082, "rewards/accuracies": 1.0, "rewards/chosen": -0.1550961434841156, "rewards/margins": 0.05222797393798828, "rewards/rejected": -0.20732411742210388, "step": 296 }, { "epoch": 0.8201587849499482, "grad_norm": 0.20908498764038086, "learning_rate": 4.564090891007401e-06, "log_odds_chosen": 0.5261347889900208, "log_odds_ratio": -0.4701959192752838, "logits/chosen": -0.1871597170829773, "logits/rejected": -1.5989326238632202, "logps/chosen": -1.648465633392334, "logps/rejected": -2.0937981605529785, "loss": 1.7682, "nll_loss": 1.7211995124816895, "rewards/accuracies": 1.0, "rewards/chosen": -0.16484656929969788, "rewards/margins": 0.04453325271606445, "rewards/rejected": -0.20937982201576233, "step": 297 }, { "epoch": 0.8229202623403521, "grad_norm": 0.188289076089859, "learning_rate": 4.559558622389304e-06, "log_odds_chosen": 0.5777133703231812, "log_odds_ratio": -0.4563387632369995, "logits/chosen": -0.13260145485401154, "logits/rejected": -1.7002828121185303, "logps/chosen": -1.5394691228866577, "logps/rejected": -2.0138742923736572, "loss": 1.6561, "nll_loss": 1.6105040311813354, "rewards/accuracies": 1.0, "rewards/chosen": -0.1539469212293625, "rewards/margins": 0.047440510243177414, "rewards/rejected": -0.201387420296669, "step": 298 }, { "epoch": 0.825681739730756, "grad_norm": 0.19627498090267181, "learning_rate": 4.555005188619776e-06, "log_odds_chosen": 0.5525295734405518, "log_odds_ratio": -0.4643310308456421, "logits/chosen": -0.20222769677639008, "logits/rejected": -1.6397647857666016, "logps/chosen": -1.5733509063720703, "logps/rejected": -2.0376038551330566, "loss": 1.6892, "nll_loss": 1.642791748046875, "rewards/accuracies": 1.0, "rewards/chosen": -0.1573350876569748, "rewards/margins": 0.04642530530691147, "rewards/rejected": -0.20376040041446686, "step": 299 }, { "epoch": 0.8284432171211599, "grad_norm": 0.2084610015153885, "learning_rate": 4.55043063649239e-06, "log_odds_chosen": 0.6468226313591003, "log_odds_ratio": -0.4257048964500427, "logits/chosen": -0.23111680150032043, "logits/rejected": -1.959672212600708, "logps/chosen": -1.5862557888031006, "logps/rejected": -2.1299490928649902, "loss": 1.7007, "nll_loss": 1.6581330299377441, "rewards/accuracies": 1.0, "rewards/chosen": -0.15862558782100677, "rewards/margins": 0.054369326680898666, "rewards/rejected": -0.21299490332603455, "step": 300 }, { "epoch": 0.8312046945115636, "grad_norm": 0.18870840966701508, "learning_rate": 4.54583501301775e-06, "log_odds_chosen": 0.5869650840759277, "log_odds_ratio": -0.45076417922973633, "logits/chosen": -0.1821564882993698, "logits/rejected": -1.9625955820083618, "logps/chosen": -1.6092771291732788, "logps/rejected": -2.104759693145752, "loss": 1.7134, "nll_loss": 1.668313980102539, "rewards/accuracies": 1.0, "rewards/chosen": -0.16092771291732788, "rewards/margins": 0.04954826086759567, "rewards/rejected": -0.21047596633434296, "step": 301 }, { "epoch": 0.8339661719019675, "grad_norm": 0.1871059238910675, "learning_rate": 4.541218365422997e-06, "log_odds_chosen": 0.6819782257080078, "log_odds_ratio": -0.4187195301055908, "logits/chosen": -0.2113940417766571, "logits/rejected": -1.5733145475387573, "logps/chosen": -1.4704827070236206, "logps/rejected": -2.02266001701355, "loss": 1.5975, "nll_loss": 1.5556209087371826, "rewards/accuracies": 1.0, "rewards/chosen": -0.14704826474189758, "rewards/margins": 0.05521773174405098, "rewards/rejected": -0.20226599276065826, "step": 302 }, { "epoch": 0.8367276492923714, "grad_norm": 0.1986740678548813, "learning_rate": 4.536580741151328e-06, "log_odds_chosen": 0.427354633808136, "log_odds_ratio": -0.5044746398925781, "logits/chosen": -0.17407816648483276, "logits/rejected": -1.2958568334579468, "logps/chosen": -1.6126006841659546, "logps/rejected": -1.9662362337112427, "loss": 1.7285, "nll_loss": 1.678093433380127, "rewards/accuracies": 1.0, "rewards/chosen": -0.16126006841659546, "rewards/margins": 0.03536355867981911, "rewards/rejected": -0.19662362337112427, "step": 303 }, { "epoch": 0.8394891266827753, "grad_norm": 0.20910044014453888, "learning_rate": 4.531922187861507e-06, "log_odds_chosen": 0.6855639219284058, "log_odds_ratio": -0.41133368015289307, "logits/chosen": -0.17175668478012085, "logits/rejected": -1.4931915998458862, "logps/chosen": -1.491321086883545, "logps/rejected": -2.059537649154663, "loss": 1.631, "nll_loss": 1.589834451675415, "rewards/accuracies": 1.0, "rewards/chosen": -0.1491321176290512, "rewards/margins": 0.056821659207344055, "rewards/rejected": -0.20595377683639526, "step": 304 }, { "epoch": 0.8422506040731792, "grad_norm": 0.2135782539844513, "learning_rate": 4.527242753427378e-06, "log_odds_chosen": 0.4499048590660095, "log_odds_ratio": -0.4998108744621277, "logits/chosen": -0.23703832924365997, "logits/rejected": -1.2717434167861938, "logps/chosen": -1.6932170391082764, "logps/rejected": -2.074625015258789, "loss": 1.8238, "nll_loss": 1.7738076448440552, "rewards/accuracies": 1.0, "rewards/chosen": -0.1693217009305954, "rewards/margins": 0.038140811026096344, "rewards/rejected": -0.20746250450611115, "step": 305 }, { "epoch": 0.845012081463583, "grad_norm": 0.1993100643157959, "learning_rate": 4.522542485937369e-06, "log_odds_chosen": 0.6878387331962585, "log_odds_ratio": -0.4140303134918213, "logits/chosen": -0.17333604395389557, "logits/rejected": -1.6610162258148193, "logps/chosen": -1.5485466718673706, "logps/rejected": -2.122166395187378, "loss": 1.6836, "nll_loss": 1.6422399282455444, "rewards/accuracies": 1.0, "rewards/chosen": -0.1548546552658081, "rewards/margins": 0.057361967861652374, "rewards/rejected": -0.21221664547920227, "step": 306 }, { "epoch": 0.8477735588539869, "grad_norm": 0.20089736580848694, "learning_rate": 4.5178214336940015e-06, "log_odds_chosen": 0.5427862405776978, "log_odds_ratio": -0.4643491506576538, "logits/chosen": -0.1863488256931305, "logits/rejected": -1.4939963817596436, "logps/chosen": -1.556074857711792, "logps/rejected": -2.007810592651367, "loss": 1.6867, "nll_loss": 1.640239953994751, "rewards/accuracies": 1.0, "rewards/chosen": -0.15560749173164368, "rewards/margins": 0.04517355561256409, "rewards/rejected": -0.20078104734420776, "step": 307 }, { "epoch": 0.8505350362443908, "grad_norm": 0.19465354084968567, "learning_rate": 4.513079645213391e-06, "log_odds_chosen": 0.6006003618240356, "log_odds_ratio": -0.4448007047176361, "logits/chosen": -0.21229855716228485, "logits/rejected": -1.460700273513794, "logps/chosen": -1.4619970321655273, "logps/rejected": -1.951730728149414, "loss": 1.5808, "nll_loss": 1.5363428592681885, "rewards/accuracies": 1.0, "rewards/chosen": -0.14619971811771393, "rewards/margins": 0.04897337406873703, "rewards/rejected": -0.19517306983470917, "step": 308 }, { "epoch": 0.8532965136347946, "grad_norm": 0.22921526432037354, "learning_rate": 4.508317169224752e-06, "log_odds_chosen": 0.28590530157089233, "log_odds_ratio": -0.5672011375427246, "logits/chosen": -0.3028235137462616, "logits/rejected": -1.3352372646331787, "logps/chosen": -1.5896085500717163, "logps/rejected": -1.8217551708221436, "loss": 1.7142, "nll_loss": 1.6574809551239014, "rewards/accuracies": 0.875, "rewards/chosen": -0.15896086394786835, "rewards/margins": 0.0232146717607975, "rewards/rejected": -0.18217553198337555, "step": 309 }, { "epoch": 0.8560579910251985, "grad_norm": 0.20834492146968842, "learning_rate": 4.5035340546698915e-06, "log_odds_chosen": 0.5302640795707703, "log_odds_ratio": -0.47237080335617065, "logits/chosen": -0.13226553797721863, "logits/rejected": -1.6689988374710083, "logps/chosen": -1.5815542936325073, "logps/rejected": -2.017597198486328, "loss": 1.7019, "nll_loss": 1.6546366214752197, "rewards/accuracies": 0.875, "rewards/chosen": -0.1581554412841797, "rewards/margins": 0.0436042957007885, "rewards/rejected": -0.2017597258090973, "step": 310 }, { "epoch": 0.8588194684156023, "grad_norm": 0.2186000496149063, "learning_rate": 4.4987303507027155e-06, "log_odds_chosen": 0.3287478983402252, "log_odds_ratio": -0.549705982208252, "logits/chosen": -0.11287423223257065, "logits/rejected": -1.349088430404663, "logps/chosen": -1.6460450887680054, "logps/rejected": -1.9183804988861084, "loss": 1.7738, "nll_loss": 1.718807339668274, "rewards/accuracies": 0.75, "rewards/chosen": -0.16460449993610382, "rewards/margins": 0.027233552187681198, "rewards/rejected": -0.19183805584907532, "step": 311 }, { "epoch": 0.8615809458060062, "grad_norm": 0.19687776267528534, "learning_rate": 4.493906106688712e-06, "log_odds_chosen": 0.6679433584213257, "log_odds_ratio": -0.41612738370895386, "logits/chosen": -0.1544291228055954, "logits/rejected": -1.6344720125198364, "logps/chosen": -1.5363831520080566, "logps/rejected": -2.0934810638427734, "loss": 1.6699, "nll_loss": 1.6282765865325928, "rewards/accuracies": 1.0, "rewards/chosen": -0.1536383181810379, "rewards/margins": 0.05570977181196213, "rewards/rejected": -0.20934809744358063, "step": 312 }, { "epoch": 0.8643424231964101, "grad_norm": 0.18424390256404877, "learning_rate": 4.4890613722044526e-06, "log_odds_chosen": 0.5501468181610107, "log_odds_ratio": -0.4591679573059082, "logits/chosen": -0.21150004863739014, "logits/rejected": -1.5727708339691162, "logps/chosen": -1.529168963432312, "logps/rejected": -1.9859933853149414, "loss": 1.6331, "nll_loss": 1.587223768234253, "rewards/accuracies": 1.0, "rewards/chosen": -0.15291690826416016, "rewards/margins": 0.04568243771791458, "rewards/rejected": -0.19859933853149414, "step": 313 }, { "epoch": 0.867103900586814, "grad_norm": 0.20299668610095978, "learning_rate": 4.484196197037082e-06, "log_odds_chosen": 0.5865733027458191, "log_odds_ratio": -0.4465975761413574, "logits/chosen": -0.13622766733169556, "logits/rejected": -1.5913082361221313, "logps/chosen": -1.5737249851226807, "logps/rejected": -2.0672624111175537, "loss": 1.691, "nll_loss": 1.6463013887405396, "rewards/accuracies": 1.0, "rewards/chosen": -0.15737250447273254, "rewards/margins": 0.04935373738408089, "rewards/rejected": -0.20672622323036194, "step": 314 }, { "epoch": 0.8698653779772179, "grad_norm": 0.19080907106399536, "learning_rate": 4.4793106311838e-06, "log_odds_chosen": 0.556647002696991, "log_odds_ratio": -0.46323782205581665, "logits/chosen": -0.2553403377532959, "logits/rejected": -1.4489879608154297, "logps/chosen": -1.562751054763794, "logps/rejected": -2.0281355381011963, "loss": 1.6685, "nll_loss": 1.6221669912338257, "rewards/accuracies": 1.0, "rewards/chosen": -0.15627512335777283, "rewards/margins": 0.04653845354914665, "rewards/rejected": -0.20281356573104858, "step": 315 }, { "epoch": 0.8726268553676216, "grad_norm": 0.21013295650482178, "learning_rate": 4.474404724851356e-06, "log_odds_chosen": 0.5066735148429871, "log_odds_ratio": -0.48284393548965454, "logits/chosen": -0.18822398781776428, "logits/rejected": -1.6378931999206543, "logps/chosen": -1.6176685094833374, "logps/rejected": -2.041299819946289, "loss": 1.7356, "nll_loss": 1.687273621559143, "rewards/accuracies": 1.0, "rewards/chosen": -0.16176684200763702, "rewards/margins": 0.04236314073204994, "rewards/rejected": -0.20412999391555786, "step": 316 }, { "epoch": 0.8753883327580255, "grad_norm": 0.2111610472202301, "learning_rate": 4.469478528455529e-06, "log_odds_chosen": 0.4905741810798645, "log_odds_ratio": -0.4830451011657715, "logits/chosen": -0.3353807330131531, "logits/rejected": -1.3652547597885132, "logps/chosen": -1.6042860746383667, "logps/rejected": -2.011214017868042, "loss": 1.7255, "nll_loss": 1.677234411239624, "rewards/accuracies": 1.0, "rewards/chosen": -0.16042861342430115, "rewards/margins": 0.04069279134273529, "rewards/rejected": -0.20112140476703644, "step": 317 }, { "epoch": 0.8781498101484294, "grad_norm": 0.20293498039245605, "learning_rate": 4.464532092620607e-06, "log_odds_chosen": 0.523048996925354, "log_odds_ratio": -0.47193020582199097, "logits/chosen": -0.18486103415489197, "logits/rejected": -1.4264463186264038, "logps/chosen": -1.638154149055481, "logps/rejected": -2.0816562175750732, "loss": 1.7478, "nll_loss": 1.7006094455718994, "rewards/accuracies": 1.0, "rewards/chosen": -0.1638154238462448, "rewards/margins": 0.04435021057724953, "rewards/rejected": -0.20816563069820404, "step": 318 }, { "epoch": 0.8809112875388333, "grad_norm": 0.17995508015155792, "learning_rate": 4.4595654681788715e-06, "log_odds_chosen": 0.647110641002655, "log_odds_ratio": -0.428227037191391, "logits/chosen": -0.23671314120292664, "logits/rejected": -1.8144798278808594, "logps/chosen": -1.5134212970733643, "logps/rejected": -2.050638437271118, "loss": 1.6212, "nll_loss": 1.5783425569534302, "rewards/accuracies": 1.0, "rewards/chosen": -0.15134215354919434, "rewards/margins": 0.053721703588962555, "rewards/rejected": -0.2050638496875763, "step": 319 }, { "epoch": 0.8836727649292372, "grad_norm": 0.20032061636447906, "learning_rate": 4.454578706170075e-06, "log_odds_chosen": 0.35773491859436035, "log_odds_ratio": -0.5383328795433044, "logits/chosen": -0.1582796573638916, "logits/rejected": -1.6286273002624512, "logps/chosen": -1.6196863651275635, "logps/rejected": -1.9159085750579834, "loss": 1.7371, "nll_loss": 1.6832914352416992, "rewards/accuracies": 1.0, "rewards/chosen": -0.1619686335325241, "rewards/margins": 0.02962222881615162, "rewards/rejected": -0.19159086048603058, "step": 320 }, { "epoch": 0.886434242319641, "grad_norm": 0.2055967003107071, "learning_rate": 4.449571857840911e-06, "log_odds_chosen": 0.586254358291626, "log_odds_ratio": -0.45622390508651733, "logits/chosen": -0.09812657535076141, "logits/rejected": -1.682092547416687, "logps/chosen": -1.5706590414047241, "logps/rejected": -2.0691263675689697, "loss": 1.6908, "nll_loss": 1.6451488733291626, "rewards/accuracies": 0.875, "rewards/chosen": -0.15706591308116913, "rewards/margins": 0.04984673112630844, "rewards/rejected": -0.20691262185573578, "step": 321 }, { "epoch": 0.8891957197100449, "grad_norm": 0.2170560508966446, "learning_rate": 4.444544974644493e-06, "log_odds_chosen": 0.42108941078186035, "log_odds_ratio": -0.5122984647750854, "logits/chosen": -0.20832902193069458, "logits/rejected": -1.468292236328125, "logps/chosen": -1.5347344875335693, "logps/rejected": -1.8808050155639648, "loss": 1.6619, "nll_loss": 1.61066472530365, "rewards/accuracies": 0.875, "rewards/chosen": -0.15347345173358917, "rewards/margins": 0.03460706025362015, "rewards/rejected": -0.18808050453662872, "step": 322 }, { "epoch": 0.8919571971004487, "grad_norm": 0.20523445308208466, "learning_rate": 4.4394981082398254e-06, "log_odds_chosen": 0.4536086320877075, "log_odds_ratio": -0.5041500926017761, "logits/chosen": -0.26630857586860657, "logits/rejected": -1.5968291759490967, "logps/chosen": -1.5988963842391968, "logps/rejected": -1.9788540601730347, "loss": 1.7272, "nll_loss": 1.676782250404358, "rewards/accuracies": 0.875, "rewards/chosen": -0.15988965332508087, "rewards/margins": 0.037995755672454834, "rewards/rejected": -0.19788537919521332, "step": 323 }, { "epoch": 0.8947186744908526, "grad_norm": 0.21546316146850586, "learning_rate": 4.434431310491267e-06, "log_odds_chosen": 0.6247555017471313, "log_odds_ratio": -0.43717044591903687, "logits/chosen": -0.27398669719696045, "logits/rejected": -1.5846881866455078, "logps/chosen": -1.660336971282959, "logps/rejected": -2.190629243850708, "loss": 1.7647, "nll_loss": 1.7209898233413696, "rewards/accuracies": 1.0, "rewards/chosen": -0.16603372991085052, "rewards/margins": 0.05302921682596207, "rewards/rejected": -0.2190629243850708, "step": 324 }, { "epoch": 0.8974801518812565, "grad_norm": 0.21981805562973022, "learning_rate": 4.429344633468005e-06, "log_odds_chosen": 0.5096076130867004, "log_odds_ratio": -0.4749549627304077, "logits/chosen": -0.22183284163475037, "logits/rejected": -1.6317569017410278, "logps/chosen": -1.528499722480774, "logps/rejected": -1.9499012231826782, "loss": 1.6449, "nll_loss": 1.597440242767334, "rewards/accuracies": 1.0, "rewards/chosen": -0.1528499722480774, "rewards/margins": 0.0421401672065258, "rewards/rejected": -0.1949901133775711, "step": 325 }, { "epoch": 0.9002416292716603, "grad_norm": 0.23145584762096405, "learning_rate": 4.424238129443515e-06, "log_odds_chosen": 0.4823288321495056, "log_odds_ratio": -0.48343366384506226, "logits/chosen": -0.1781751811504364, "logits/rejected": -1.2557498216629028, "logps/chosen": -1.6358767747879028, "logps/rejected": -2.0398380756378174, "loss": 1.7597, "nll_loss": 1.7113655805587769, "rewards/accuracies": 1.0, "rewards/chosen": -0.16358768939971924, "rewards/margins": 0.04039612039923668, "rewards/rejected": -0.20398379862308502, "step": 326 }, { "epoch": 0.9030031066620642, "grad_norm": 0.21986520290374756, "learning_rate": 4.4191118508950286e-06, "log_odds_chosen": 0.5763283371925354, "log_odds_ratio": -0.4508843421936035, "logits/chosen": -0.2733724117279053, "logits/rejected": -1.4346749782562256, "logps/chosen": -1.5722405910491943, "logps/rejected": -2.0537078380584717, "loss": 1.7093, "nll_loss": 1.6641618013381958, "rewards/accuracies": 1.0, "rewards/chosen": -0.15722407400608063, "rewards/margins": 0.048146720975637436, "rewards/rejected": -0.20537079870700836, "step": 327 }, { "epoch": 0.9057645840524681, "grad_norm": 0.19916215538978577, "learning_rate": 4.413965850502987e-06, "log_odds_chosen": 0.6543524265289307, "log_odds_ratio": -0.42543232440948486, "logits/chosen": -0.28132179379463196, "logits/rejected": -1.6163063049316406, "logps/chosen": -1.4477849006652832, "logps/rejected": -1.9837119579315186, "loss": 1.5427, "nll_loss": 1.5002020597457886, "rewards/accuracies": 1.0, "rewards/chosen": -0.14477849006652832, "rewards/margins": 0.05359271913766861, "rewards/rejected": -0.19837118685245514, "step": 328 }, { "epoch": 0.908526061442872, "grad_norm": 0.20786328613758087, "learning_rate": 4.408800181150509e-06, "log_odds_chosen": 0.7103330492973328, "log_odds_ratio": -0.405758798122406, "logits/chosen": -0.27468955516815186, "logits/rejected": -1.7239093780517578, "logps/chosen": -1.5232479572296143, "logps/rejected": -2.113443374633789, "loss": 1.6308, "nll_loss": 1.5901868343353271, "rewards/accuracies": 1.0, "rewards/chosen": -0.15232481062412262, "rewards/margins": 0.0590195432305336, "rewards/rejected": -0.21134433150291443, "step": 329 }, { "epoch": 0.9112875388332758, "grad_norm": 0.21173468232154846, "learning_rate": 4.4036148959228365e-06, "log_odds_chosen": 0.5421361327171326, "log_odds_ratio": -0.45979243516921997, "logits/chosen": -0.19346265494823456, "logits/rejected": -1.3866770267486572, "logps/chosen": -1.5939050912857056, "logps/rejected": -2.0476269721984863, "loss": 1.7068, "nll_loss": 1.660808801651001, "rewards/accuracies": 1.0, "rewards/chosen": -0.15939049422740936, "rewards/margins": 0.045372217893600464, "rewards/rejected": -0.20476271212100983, "step": 330 }, { "epoch": 0.9140490162236796, "grad_norm": 0.20240680873394012, "learning_rate": 4.3984100481068e-06, "log_odds_chosen": 0.5311475992202759, "log_odds_ratio": -0.47231799364089966, "logits/chosen": -0.10619790852069855, "logits/rejected": -1.8089945316314697, "logps/chosen": -1.528999924659729, "logps/rejected": -1.9681410789489746, "loss": 1.6383, "nll_loss": 1.5910669565200806, "rewards/accuracies": 1.0, "rewards/chosen": -0.15289999544620514, "rewards/margins": 0.04391412436962128, "rewards/rejected": -0.19681411981582642, "step": 331 }, { "epoch": 0.9168104936140835, "grad_norm": 0.18672843277454376, "learning_rate": 4.3931856911902635e-06, "log_odds_chosen": 0.6374708414077759, "log_odds_ratio": -0.433654248714447, "logits/chosen": -0.31257641315460205, "logits/rejected": -1.766423225402832, "logps/chosen": -1.5535167455673218, "logps/rejected": -2.087543487548828, "loss": 1.6746, "nll_loss": 1.631203293800354, "rewards/accuracies": 1.0, "rewards/chosen": -0.1553516834974289, "rewards/margins": 0.05340268462896347, "rewards/rejected": -0.20875434577465057, "step": 332 }, { "epoch": 0.9195719710044874, "grad_norm": 0.20278650522232056, "learning_rate": 4.387941878861578e-06, "log_odds_chosen": 0.442794531583786, "log_odds_ratio": -0.5059284567832947, "logits/chosen": -0.2818780243396759, "logits/rejected": -1.41943359375, "logps/chosen": -1.475003957748413, "logps/rejected": -1.838566541671753, "loss": 1.5936, "nll_loss": 1.5429768562316895, "rewards/accuracies": 0.875, "rewards/chosen": -0.1475003957748413, "rewards/margins": 0.03635626286268234, "rewards/rejected": -0.18385665118694305, "step": 333 }, { "epoch": 0.9223334483948913, "grad_norm": 0.21072961390018463, "learning_rate": 4.382678665009028e-06, "log_odds_chosen": 0.5503413677215576, "log_odds_ratio": -0.4611153304576874, "logits/chosen": -0.24195455014705658, "logits/rejected": -1.77567720413208, "logps/chosen": -1.6017379760742188, "logps/rejected": -2.0620176792144775, "loss": 1.7086, "nll_loss": 1.6624857187271118, "rewards/accuracies": 1.0, "rewards/chosen": -0.16017380356788635, "rewards/margins": 0.04602799564599991, "rewards/rejected": -0.20620179176330566, "step": 334 }, { "epoch": 0.9250949257852952, "grad_norm": 0.21240665018558502, "learning_rate": 4.3773961037202784e-06, "log_odds_chosen": 0.5787122249603271, "log_odds_ratio": -0.4496590793132782, "logits/chosen": -0.24867500364780426, "logits/rejected": -1.7745471000671387, "logps/chosen": -1.6565507650375366, "logps/rejected": -2.1457858085632324, "loss": 1.7518, "nll_loss": 1.706835150718689, "rewards/accuracies": 1.0, "rewards/chosen": -0.16565507650375366, "rewards/margins": 0.04892349988222122, "rewards/rejected": -0.21457859873771667, "step": 335 }, { "epoch": 0.927856403175699, "grad_norm": 0.20056602358818054, "learning_rate": 4.37209424928182e-06, "log_odds_chosen": 0.5505763292312622, "log_odds_ratio": -0.4596712589263916, "logits/chosen": -0.18031375110149384, "logits/rejected": -1.4376002550125122, "logps/chosen": -1.557979941368103, "logps/rejected": -2.017378091812134, "loss": 1.6716, "nll_loss": 1.6256715059280396, "rewards/accuracies": 1.0, "rewards/chosen": -0.1557980179786682, "rewards/margins": 0.04593981057405472, "rewards/rejected": -0.20173780620098114, "step": 336 }, { "epoch": 0.9306178805661028, "grad_norm": 0.18808571994304657, "learning_rate": 4.366773156178413e-06, "log_odds_chosen": 0.42912667989730835, "log_odds_ratio": -0.5055819153785706, "logits/chosen": -0.3135528564453125, "logits/rejected": -1.5518022775650024, "logps/chosen": -1.481281042098999, "logps/rejected": -1.826270341873169, "loss": 1.5991, "nll_loss": 1.5485769510269165, "rewards/accuracies": 1.0, "rewards/chosen": -0.14812810719013214, "rewards/margins": 0.0344989076256752, "rewards/rejected": -0.18262703716754913, "step": 337 }, { "epoch": 0.9333793579565067, "grad_norm": 0.21728971600532532, "learning_rate": 4.361432879092518e-06, "log_odds_chosen": 0.5635970234870911, "log_odds_ratio": -0.4642696976661682, "logits/chosen": -0.32608091831207275, "logits/rejected": -1.478355050086975, "logps/chosen": -1.5051325559616089, "logps/rejected": -1.974266529083252, "loss": 1.6273, "nll_loss": 1.5808420181274414, "rewards/accuracies": 1.0, "rewards/chosen": -0.15051327645778656, "rewards/margins": 0.04691339656710625, "rewards/rejected": -0.1974266618490219, "step": 338 }, { "epoch": 0.9361408353469106, "grad_norm": 0.20923133194446564, "learning_rate": 4.356073472903747e-06, "log_odds_chosen": 0.5899335145950317, "log_odds_ratio": -0.44590240716934204, "logits/chosen": -0.17045272886753082, "logits/rejected": -1.5179771184921265, "logps/chosen": -1.4507163763046265, "logps/rejected": -1.9340299367904663, "loss": 1.5672, "nll_loss": 1.5226441621780396, "rewards/accuracies": 1.0, "rewards/chosen": -0.14507164061069489, "rewards/margins": 0.048331368714571, "rewards/rejected": -0.1934029906988144, "step": 339 }, { "epoch": 0.9389023127373145, "grad_norm": 0.20438268780708313, "learning_rate": 4.350694992688289e-06, "log_odds_chosen": 0.6362269520759583, "log_odds_ratio": -0.43733319640159607, "logits/chosen": -0.14821594953536987, "logits/rejected": -1.5649621486663818, "logps/chosen": -1.4848688840866089, "logps/rejected": -2.009694814682007, "loss": 1.6031, "nll_loss": 1.5594022274017334, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484868973493576, "rewards/margins": 0.05248260498046875, "rewards/rejected": -0.20096951723098755, "step": 340 }, { "epoch": 0.9416637901277183, "grad_norm": 0.20621763169765472, "learning_rate": 4.345297493718352e-06, "log_odds_chosen": 0.547203540802002, "log_odds_ratio": -0.462637722492218, "logits/chosen": -0.31976550817489624, "logits/rejected": -1.318708062171936, "logps/chosen": -1.4836325645446777, "logps/rejected": -1.930004358291626, "loss": 1.6093, "nll_loss": 1.563034176826477, "rewards/accuracies": 1.0, "rewards/chosen": -0.14836326241493225, "rewards/margins": 0.044637180864810944, "rewards/rejected": -0.1930004358291626, "step": 341 }, { "epoch": 0.9444252675181222, "grad_norm": 0.18609599769115448, "learning_rate": 4.339881031461588e-06, "log_odds_chosen": 0.47142109274864197, "log_odds_ratio": -0.48913687467575073, "logits/chosen": -0.26566094160079956, "logits/rejected": -1.550631046295166, "logps/chosen": -1.4443541765213013, "logps/rejected": -1.8148910999298096, "loss": 1.578, "nll_loss": 1.5290637016296387, "rewards/accuracies": 1.0, "rewards/chosen": -0.14443542063236237, "rewards/margins": 0.03705369308590889, "rewards/rejected": -0.18148910999298096, "step": 342 }, { "epoch": 0.9471867449085261, "grad_norm": 0.1996508240699768, "learning_rate": 4.334445661580527e-06, "log_odds_chosen": 0.5693928599357605, "log_odds_ratio": -0.4520787000656128, "logits/chosen": -0.33670923113822937, "logits/rejected": -1.7492713928222656, "logps/chosen": -1.5564992427825928, "logps/rejected": -2.030904531478882, "loss": 1.6725, "nll_loss": 1.6272705793380737, "rewards/accuracies": 1.0, "rewards/chosen": -0.15564994513988495, "rewards/margins": 0.04744052141904831, "rewards/rejected": -0.20309044420719147, "step": 343 }, { "epoch": 0.94994822229893, "grad_norm": 0.19215835630893707, "learning_rate": 4.328991439932003e-06, "log_odds_chosen": 0.6314505934715271, "log_odds_ratio": -0.4288046658039093, "logits/chosen": -0.29215654730796814, "logits/rejected": -1.5209310054779053, "logps/chosen": -1.486976981163025, "logps/rejected": -2.0075106620788574, "loss": 1.6116, "nll_loss": 1.568747878074646, "rewards/accuracies": 1.0, "rewards/chosen": -0.14869770407676697, "rewards/margins": 0.05205334722995758, "rewards/rejected": -0.20075105130672455, "step": 344 }, { "epoch": 0.9527096996893338, "grad_norm": 0.20573773980140686, "learning_rate": 4.323518422566586e-06, "log_odds_chosen": 0.7072029113769531, "log_odds_ratio": -0.40426695346832275, "logits/chosen": -0.23872965574264526, "logits/rejected": -1.5439509153366089, "logps/chosen": -1.5577975511550903, "logps/rejected": -2.149085760116577, "loss": 1.6903, "nll_loss": 1.649876594543457, "rewards/accuracies": 1.0, "rewards/chosen": -0.15577976405620575, "rewards/margins": 0.059128809720277786, "rewards/rejected": -0.21490855515003204, "step": 345 }, { "epoch": 0.9554711770797376, "grad_norm": 0.1889713704586029, "learning_rate": 4.318026665727993e-06, "log_odds_chosen": 0.6957321166992188, "log_odds_ratio": -0.411748468875885, "logits/chosen": -0.3132311701774597, "logits/rejected": -1.7383476495742798, "logps/chosen": -1.4288955926895142, "logps/rejected": -1.9945143461227417, "loss": 1.5422, "nll_loss": 1.501028299331665, "rewards/accuracies": 1.0, "rewards/chosen": -0.14288955926895142, "rewards/margins": 0.056561872363090515, "rewards/rejected": -0.19945143163204193, "step": 346 }, { "epoch": 0.9582326544701415, "grad_norm": 0.21370814740657806, "learning_rate": 4.3125162258525265e-06, "log_odds_chosen": 0.37931889295578003, "log_odds_ratio": -0.5239338874816895, "logits/chosen": -0.34714582562446594, "logits/rejected": -1.3666698932647705, "logps/chosen": -1.6313103437423706, "logps/rejected": -1.9475239515304565, "loss": 1.7528, "nll_loss": 1.7004497051239014, "rewards/accuracies": 1.0, "rewards/chosen": -0.16313102841377258, "rewards/margins": 0.031621355563402176, "rewards/rejected": -0.19475241005420685, "step": 347 }, { "epoch": 0.9609941318605454, "grad_norm": 0.20202378928661346, "learning_rate": 4.3069871595684795e-06, "log_odds_chosen": 0.5231636762619019, "log_odds_ratio": -0.4718437194824219, "logits/chosen": -0.3397434949874878, "logits/rejected": -1.5151662826538086, "logps/chosen": -1.4959080219268799, "logps/rejected": -1.9243054389953613, "loss": 1.6233, "nll_loss": 1.5760971307754517, "rewards/accuracies": 1.0, "rewards/chosen": -0.14959080517292023, "rewards/margins": 0.04283975064754486, "rewards/rejected": -0.1924305558204651, "step": 348 }, { "epoch": 0.9637556092509493, "grad_norm": 0.1996474266052246, "learning_rate": 4.3014395236955635e-06, "log_odds_chosen": 0.6398702263832092, "log_odds_ratio": -0.4294753670692444, "logits/chosen": -0.2723667621612549, "logits/rejected": -1.6896021366119385, "logps/chosen": -1.4915456771850586, "logps/rejected": -2.0213863849639893, "loss": 1.6075, "nll_loss": 1.5645424127578735, "rewards/accuracies": 1.0, "rewards/chosen": -0.14915457367897034, "rewards/margins": 0.0529840886592865, "rewards/rejected": -0.20213866233825684, "step": 349 }, { "epoch": 0.9665170866413532, "grad_norm": 0.18923406302928925, "learning_rate": 4.295873375244319e-06, "log_odds_chosen": 0.5657058358192444, "log_odds_ratio": -0.4569021463394165, "logits/chosen": -0.2960559129714966, "logits/rejected": -1.5297155380249023, "logps/chosen": -1.446962833404541, "logps/rejected": -1.9055999517440796, "loss": 1.5727, "nll_loss": 1.527005672454834, "rewards/accuracies": 1.0, "rewards/chosen": -0.14469628036022186, "rewards/margins": 0.045863717794418335, "rewards/rejected": -0.1905599981546402, "step": 350 }, { "epoch": 0.9692785640317569, "grad_norm": 0.2020733803510666, "learning_rate": 4.290288771415536e-06, "log_odds_chosen": 0.47521698474884033, "log_odds_ratio": -0.48997536301612854, "logits/chosen": -0.2961908280849457, "logits/rejected": -1.738245964050293, "logps/chosen": -1.6070820093154907, "logps/rejected": -2.003466844558716, "loss": 1.7209, "nll_loss": 1.6719499826431274, "rewards/accuracies": 0.875, "rewards/chosen": -0.16070818901062012, "rewards/margins": 0.039638496935367584, "rewards/rejected": -0.2003466784954071, "step": 351 }, { "epoch": 0.9720400414221608, "grad_norm": 0.19740743935108185, "learning_rate": 4.284685769599658e-06, "log_odds_chosen": 0.5527662038803101, "log_odds_ratio": -0.4617147445678711, "logits/chosen": -0.3190363645553589, "logits/rejected": -1.6859633922576904, "logps/chosen": -1.5147120952606201, "logps/rejected": -1.9768071174621582, "loss": 1.6243, "nll_loss": 1.5781550407409668, "rewards/accuracies": 1.0, "rewards/chosen": -0.15147122740745544, "rewards/margins": 0.04620949178934097, "rewards/rejected": -0.19768072664737701, "step": 352 }, { "epoch": 0.9748015188125647, "grad_norm": 0.19644393026828766, "learning_rate": 4.279064427376199e-06, "log_odds_chosen": 0.5512232780456543, "log_odds_ratio": -0.45938748121261597, "logits/chosen": -0.2991534173488617, "logits/rejected": -1.5584678649902344, "logps/chosen": -1.5441019535064697, "logps/rejected": -2.002403736114502, "loss": 1.6453, "nll_loss": 1.5993587970733643, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544102132320404, "rewards/margins": 0.045830175280570984, "rewards/rejected": -0.2002403736114502, "step": 353 }, { "epoch": 0.9775629962029686, "grad_norm": 0.19680863618850708, "learning_rate": 4.273424802513145e-06, "log_odds_chosen": 0.5857518911361694, "log_odds_ratio": -0.4452309012413025, "logits/chosen": -0.2941315174102783, "logits/rejected": -1.6820390224456787, "logps/chosen": -1.5470666885375977, "logps/rejected": -2.031536102294922, "loss": 1.6621, "nll_loss": 1.6175867319107056, "rewards/accuracies": 1.0, "rewards/chosen": -0.1547066569328308, "rewards/margins": 0.04844695329666138, "rewards/rejected": -0.2031536102294922, "step": 354 }, { "epoch": 0.9803244735933725, "grad_norm": 0.18272706866264343, "learning_rate": 4.267766952966369e-06, "log_odds_chosen": 0.7239515781402588, "log_odds_ratio": -0.4105169177055359, "logits/chosen": -0.23595450818538666, "logits/rejected": -1.5807809829711914, "logps/chosen": -1.3758444786071777, "logps/rejected": -1.9615696668624878, "loss": 1.4949, "nll_loss": 1.4538719654083252, "rewards/accuracies": 1.0, "rewards/chosen": -0.13758444786071777, "rewards/margins": 0.058572523295879364, "rewards/rejected": -0.19615697860717773, "step": 355 }, { "epoch": 0.9830859509837763, "grad_norm": 0.19880905747413635, "learning_rate": 4.26209093687903e-06, "log_odds_chosen": 0.4604690670967102, "log_odds_ratio": -0.4963679313659668, "logits/chosen": -0.30030739307403564, "logits/rejected": -1.8233280181884766, "logps/chosen": -1.5396702289581299, "logps/rejected": -1.922170639038086, "loss": 1.6588, "nll_loss": 1.6091995239257812, "rewards/accuracies": 1.0, "rewards/chosen": -0.153967022895813, "rewards/margins": 0.038250040262937546, "rewards/rejected": -0.19221706688404083, "step": 356 }, { "epoch": 0.9858474283741802, "grad_norm": 0.19747234880924225, "learning_rate": 4.2563968125809734e-06, "log_odds_chosen": 0.5938950777053833, "log_odds_ratio": -0.44322288036346436, "logits/chosen": -0.17701445519924164, "logits/rejected": -1.6618235111236572, "logps/chosen": -1.60313880443573, "logps/rejected": -2.101062774658203, "loss": 1.7054, "nll_loss": 1.6611095666885376, "rewards/accuracies": 1.0, "rewards/chosen": -0.16031388938426971, "rewards/margins": 0.04979238659143448, "rewards/rejected": -0.2101062536239624, "step": 357 }, { "epoch": 0.988608905764584, "grad_norm": 0.19918540120124817, "learning_rate": 4.2506846385881375e-06, "log_odds_chosen": 0.6773942708969116, "log_odds_ratio": -0.4168775975704193, "logits/chosen": -0.4877479672431946, "logits/rejected": -1.6255730390548706, "logps/chosen": -1.409053087234497, "logps/rejected": -1.9528459310531616, "loss": 1.5337, "nll_loss": 1.491982340812683, "rewards/accuracies": 1.0, "rewards/chosen": -0.14090532064437866, "rewards/margins": 0.05437929555773735, "rewards/rejected": -0.19528460502624512, "step": 358 }, { "epoch": 0.9913703831549879, "grad_norm": 0.1901620328426361, "learning_rate": 4.2449544736019486e-06, "log_odds_chosen": 0.5646210312843323, "log_odds_ratio": -0.45523136854171753, "logits/chosen": -0.22285765409469604, "logits/rejected": -1.4501441717147827, "logps/chosen": -1.479009985923767, "logps/rejected": -1.9385974407196045, "loss": 1.5958, "nll_loss": 1.5503125190734863, "rewards/accuracies": 1.0, "rewards/chosen": -0.1479010134935379, "rewards/margins": 0.045958735048770905, "rewards/rejected": -0.1938597410917282, "step": 359 }, { "epoch": 0.9941318605453918, "grad_norm": 0.18198014795780182, "learning_rate": 4.239206376508716e-06, "log_odds_chosen": 0.7485941648483276, "log_odds_ratio": -0.39402005076408386, "logits/chosen": -0.25095412135124207, "logits/rejected": -1.9625945091247559, "logps/chosen": -1.4998773336410522, "logps/rejected": -2.127000570297241, "loss": 1.6257, "nll_loss": 1.5863466262817383, "rewards/accuracies": 1.0, "rewards/chosen": -0.14998774230480194, "rewards/margins": 0.06271231919527054, "rewards/rejected": -0.21270006895065308, "step": 360 }, { "epoch": 0.9968933379357956, "grad_norm": 0.18113547563552856, "learning_rate": 4.233440406379032e-06, "log_odds_chosen": 0.560468852519989, "log_odds_ratio": -0.45691755414009094, "logits/chosen": -0.35995978116989136, "logits/rejected": -1.630347490310669, "logps/chosen": -1.4223926067352295, "logps/rejected": -1.871058702468872, "loss": 1.5472, "nll_loss": 1.501501202583313, "rewards/accuracies": 1.0, "rewards/chosen": -0.1422392576932907, "rewards/margins": 0.04486660659313202, "rewards/rejected": -0.18710586428642273, "step": 361 }, { "epoch": 0.9996548153261995, "grad_norm": 0.1977638155221939, "learning_rate": 4.227656622467162e-06, "log_odds_chosen": 0.6151151657104492, "log_odds_ratio": -0.4389611482620239, "logits/chosen": -0.34485432505607605, "logits/rejected": -1.5607213973999023, "logps/chosen": -1.528998851776123, "logps/rejected": -2.0421009063720703, "loss": 1.6442, "nll_loss": 1.6003334522247314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15289989113807678, "rewards/margins": 0.05131019651889801, "rewards/rejected": -0.2042100876569748, "step": 362 }, { "epoch": 1.0, "grad_norm": 0.4796704351902008, "learning_rate": 4.221855084210433e-06, "log_odds_chosen": 0.42105579376220703, "log_odds_ratio": -0.5046184659004211, "logits/chosen": -0.522539496421814, "logits/rejected": -2.0535831451416016, "logps/chosen": -1.5670783519744873, "logps/rejected": -1.9137303829193115, "loss": 1.6859, "nll_loss": 1.635398030281067, "rewards/accuracies": 1.0, "rewards/chosen": -0.15670783817768097, "rewards/margins": 0.034665197134017944, "rewards/rejected": -0.1913730353116989, "step": 363 }, { "epoch": 1.0027614773904039, "grad_norm": 0.2030143141746521, "learning_rate": 4.2160358512286266e-06, "log_odds_chosen": 0.5737169981002808, "log_odds_ratio": -0.45416390895843506, "logits/chosen": -0.2930692732334137, "logits/rejected": -1.6667262315750122, "logps/chosen": -1.5078061819076538, "logps/rejected": -1.9778721332550049, "loss": 1.6304, "nll_loss": 1.5849525928497314, "rewards/accuracies": 1.0, "rewards/chosen": -0.1507806032896042, "rewards/margins": 0.04700660705566406, "rewards/rejected": -0.19778722524642944, "step": 364 }, { "epoch": 1.0055229547808078, "grad_norm": 0.18416902422904968, "learning_rate": 4.210198983323366e-06, "log_odds_chosen": 0.6264990568161011, "log_odds_ratio": -0.4338659346103668, "logits/chosen": -0.2520604133605957, "logits/rejected": -1.8486303091049194, "logps/chosen": -1.5509365797042847, "logps/rejected": -2.071831703186035, "loss": 1.6494, "nll_loss": 1.606053113937378, "rewards/accuracies": 1.0, "rewards/chosen": -0.15509365499019623, "rewards/margins": 0.052089497447013855, "rewards/rejected": -0.20718316733837128, "step": 365 }, { "epoch": 1.0082844321712117, "grad_norm": 0.19729849696159363, "learning_rate": 4.204344540477499e-06, "log_odds_chosen": 0.6402697563171387, "log_odds_ratio": -0.429515540599823, "logits/chosen": -0.288376122713089, "logits/rejected": -1.6332948207855225, "logps/chosen": -1.5338780879974365, "logps/rejected": -2.068819046020508, "loss": 1.6531, "nll_loss": 1.6101782321929932, "rewards/accuracies": 1.0, "rewards/chosen": -0.15338779985904694, "rewards/margins": 0.053494103252887726, "rewards/rejected": -0.20688191056251526, "step": 366 }, { "epoch": 1.0110459095616156, "grad_norm": 0.18270985782146454, "learning_rate": 4.1984725828544855e-06, "log_odds_chosen": 0.6716368198394775, "log_odds_ratio": -0.41607195138931274, "logits/chosen": -0.27237242460250854, "logits/rejected": -1.8720418214797974, "logps/chosen": -1.4672292470932007, "logps/rejected": -2.0190887451171875, "loss": 1.5786, "nll_loss": 1.5369707345962524, "rewards/accuracies": 1.0, "rewards/chosen": -0.1467229127883911, "rewards/margins": 0.0551859587430954, "rewards/rejected": -0.2019088715314865, "step": 367 }, { "epoch": 1.0138073869520194, "grad_norm": 0.20039811730384827, "learning_rate": 4.192583170797775e-06, "log_odds_chosen": 0.5032901167869568, "log_odds_ratio": -0.4751865863800049, "logits/chosen": -0.3330274224281311, "logits/rejected": -1.4479092359542847, "logps/chosen": -1.5037407875061035, "logps/rejected": -1.9160493612289429, "loss": 1.6307, "nll_loss": 1.5831364393234253, "rewards/accuracies": 1.0, "rewards/chosen": -0.15037408471107483, "rewards/margins": 0.041230857372283936, "rewards/rejected": -0.19160494208335876, "step": 368 }, { "epoch": 1.016568864342423, "grad_norm": 0.1855594515800476, "learning_rate": 4.186676364830187e-06, "log_odds_chosen": 0.5586062669754028, "log_odds_ratio": -0.45506125688552856, "logits/chosen": -0.23738795518875122, "logits/rejected": -1.6253269910812378, "logps/chosen": -1.566367745399475, "logps/rejected": -2.030472755432129, "loss": 1.6675, "nll_loss": 1.6219737529754639, "rewards/accuracies": 1.0, "rewards/chosen": -0.1566367745399475, "rewards/margins": 0.04641049727797508, "rewards/rejected": -0.20304730534553528, "step": 369 }, { "epoch": 1.019330341732827, "grad_norm": 0.2150687575340271, "learning_rate": 4.1807522256532925e-06, "log_odds_chosen": 0.6354942321777344, "log_odds_ratio": -0.43560659885406494, "logits/chosen": -0.29759910702705383, "logits/rejected": -1.7936424016952515, "logps/chosen": -1.621231198310852, "logps/rejected": -2.1618242263793945, "loss": 1.7339, "nll_loss": 1.6903626918792725, "rewards/accuracies": 1.0, "rewards/chosen": -0.16212311387062073, "rewards/margins": 0.054059334099292755, "rewards/rejected": -0.21618244051933289, "step": 370 }, { "epoch": 1.0220918191232309, "grad_norm": 0.18705667555332184, "learning_rate": 4.174810814146789e-06, "log_odds_chosen": 0.6377235651016235, "log_odds_ratio": -0.4334976375102997, "logits/chosen": -0.17882226407527924, "logits/rejected": -1.4699535369873047, "logps/chosen": -1.5574746131896973, "logps/rejected": -2.092029094696045, "loss": 1.6697, "nll_loss": 1.626399040222168, "rewards/accuracies": 1.0, "rewards/chosen": -0.1557474583387375, "rewards/margins": 0.05345546454191208, "rewards/rejected": -0.20920291543006897, "step": 371 }, { "epoch": 1.0248532965136348, "grad_norm": 0.18511676788330078, "learning_rate": 4.1688521913678706e-06, "log_odds_chosen": 0.7460950613021851, "log_odds_ratio": -0.39828044176101685, "logits/chosen": -0.24553045630455017, "logits/rejected": -1.5528843402862549, "logps/chosen": -1.506624460220337, "logps/rejected": -2.1312265396118164, "loss": 1.6125, "nll_loss": 1.5727205276489258, "rewards/accuracies": 1.0, "rewards/chosen": -0.15066243708133698, "rewards/margins": 0.06246021389961243, "rewards/rejected": -0.2131226509809494, "step": 372 }, { "epoch": 1.0276147739040387, "grad_norm": 0.18531759083271027, "learning_rate": 4.162876418550606e-06, "log_odds_chosen": 0.5151762962341309, "log_odds_ratio": -0.47239360213279724, "logits/chosen": -0.3635602593421936, "logits/rejected": -1.655716061592102, "logps/chosen": -1.509564995765686, "logps/rejected": -1.9321736097335815, "loss": 1.6228, "nll_loss": 1.5756025314331055, "rewards/accuracies": 1.0, "rewards/chosen": -0.15095649659633636, "rewards/margins": 0.042260847985744476, "rewards/rejected": -0.19321735203266144, "step": 373 }, { "epoch": 1.0303762512944425, "grad_norm": 0.19160981476306915, "learning_rate": 4.156883557105308e-06, "log_odds_chosen": 0.537889301776886, "log_odds_ratio": -0.46930158138275146, "logits/chosen": -0.3415728509426117, "logits/rejected": -1.7609002590179443, "logps/chosen": -1.4833260774612427, "logps/rejected": -1.925763726234436, "loss": 1.5902, "nll_loss": 1.5432225465774536, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483326107263565, "rewards/margins": 0.044243764132261276, "rewards/rejected": -0.1925763636827469, "step": 374 }, { "epoch": 1.0331377286848464, "grad_norm": 0.2014252096414566, "learning_rate": 4.150873668617899e-06, "log_odds_chosen": 0.5346254110336304, "log_odds_ratio": -0.4631119966506958, "logits/chosen": -0.4020942449569702, "logits/rejected": -1.6286017894744873, "logps/chosen": -1.554813265800476, "logps/rejected": -1.9967365264892578, "loss": 1.6587, "nll_loss": 1.6124264001846313, "rewards/accuracies": 1.0, "rewards/chosen": -0.15548132359981537, "rewards/margins": 0.0441923588514328, "rewards/rejected": -0.19967366755008698, "step": 375 }, { "epoch": 1.0358992060752503, "grad_norm": 0.1909962296485901, "learning_rate": 4.144846814849282e-06, "log_odds_chosen": 0.4685186743736267, "log_odds_ratio": -0.4881165623664856, "logits/chosen": -0.19894683361053467, "logits/rejected": -1.4885890483856201, "logps/chosen": -1.5590081214904785, "logps/rejected": -1.9445595741271973, "loss": 1.6834, "nll_loss": 1.634606957435608, "rewards/accuracies": 1.0, "rewards/chosen": -0.15590080618858337, "rewards/margins": 0.03855516016483307, "rewards/rejected": -0.19445598125457764, "step": 376 }, { "epoch": 1.0386606834656542, "grad_norm": 0.1877821981906891, "learning_rate": 4.138803057734705e-06, "log_odds_chosen": 0.7200895547866821, "log_odds_ratio": -0.4008093476295471, "logits/chosen": -0.25993677973747253, "logits/rejected": -1.574484944343567, "logps/chosen": -1.456033706665039, "logps/rejected": -2.0489182472229004, "loss": 1.5752, "nll_loss": 1.5350782871246338, "rewards/accuracies": 1.0, "rewards/chosen": -0.14560337364673615, "rewards/margins": 0.05928843468427658, "rewards/rejected": -0.20489181578159332, "step": 377 }, { "epoch": 1.041422160856058, "grad_norm": 0.17346148192882538, "learning_rate": 4.132742459383122e-06, "log_odds_chosen": 0.7830832004547119, "log_odds_ratio": -0.3833864629268646, "logits/chosen": -0.27740198373794556, "logits/rejected": -1.7669141292572021, "logps/chosen": -1.380853533744812, "logps/rejected": -2.0105137825012207, "loss": 1.4985, "nll_loss": 1.4602102041244507, "rewards/accuracies": 1.0, "rewards/chosen": -0.13808535039424896, "rewards/margins": 0.06296603381633759, "rewards/rejected": -0.20105136930942535, "step": 378 }, { "epoch": 1.0441836382464618, "grad_norm": 0.183350071310997, "learning_rate": 4.126665082076559e-06, "log_odds_chosen": 0.5126218795776367, "log_odds_ratio": -0.47373971343040466, "logits/chosen": -0.3141680955886841, "logits/rejected": -1.3418911695480347, "logps/chosen": -1.516774296760559, "logps/rejected": -1.933377742767334, "loss": 1.6442, "nll_loss": 1.5968701839447021, "rewards/accuracies": 1.0, "rewards/chosen": -0.1516774296760559, "rewards/margins": 0.04166034609079361, "rewards/rejected": -0.19333778321743011, "step": 379 }, { "epoch": 1.0469451156368657, "grad_norm": 0.18558627367019653, "learning_rate": 4.120570988269472e-06, "log_odds_chosen": 0.651531994342804, "log_odds_ratio": -0.423582524061203, "logits/chosen": -0.38820552825927734, "logits/rejected": -1.826667070388794, "logps/chosen": -1.5054341554641724, "logps/rejected": -2.0457534790039062, "loss": 1.5926, "nll_loss": 1.5502351522445679, "rewards/accuracies": 1.0, "rewards/chosen": -0.1505434215068817, "rewards/margins": 0.05403192341327667, "rewards/rejected": -0.20457535982131958, "step": 380 }, { "epoch": 1.0497065930272695, "grad_norm": 0.19876988232135773, "learning_rate": 4.114460240588101e-06, "log_odds_chosen": 0.7609922885894775, "log_odds_ratio": -0.3903619050979614, "logits/chosen": -0.28415167331695557, "logits/rejected": -1.509800910949707, "logps/chosen": -1.5427088737487793, "logps/rejected": -2.184783458709717, "loss": 1.6545, "nll_loss": 1.6154515743255615, "rewards/accuracies": 1.0, "rewards/chosen": -0.15427090227603912, "rewards/margins": 0.06420743465423584, "rewards/rejected": -0.21847833693027496, "step": 381 }, { "epoch": 1.0524680704176734, "grad_norm": 0.17985652387142181, "learning_rate": 4.1083329018298356e-06, "log_odds_chosen": 0.7214679718017578, "log_odds_ratio": -0.401907354593277, "logits/chosen": -0.2644188404083252, "logits/rejected": -1.569467544555664, "logps/chosen": -1.4487037658691406, "logps/rejected": -2.043267250061035, "loss": 1.5525, "nll_loss": 1.5123515129089355, "rewards/accuracies": 1.0, "rewards/chosen": -0.14487037062644958, "rewards/margins": 0.05945635586977005, "rewards/rejected": -0.20432673394680023, "step": 382 }, { "epoch": 1.0552295478080773, "grad_norm": 0.19042105972766876, "learning_rate": 4.102189034962561e-06, "log_odds_chosen": 0.6154542565345764, "log_odds_ratio": -0.4352980852127075, "logits/chosen": -0.21143808960914612, "logits/rejected": -1.4588818550109863, "logps/chosen": -1.583728551864624, "logps/rejected": -2.1008310317993164, "loss": 1.6812, "nll_loss": 1.6376571655273438, "rewards/accuracies": 1.0, "rewards/chosen": -0.15837284922599792, "rewards/margins": 0.051710255444049835, "rewards/rejected": -0.21008309721946716, "step": 383 }, { "epoch": 1.0579910251984812, "grad_norm": 0.202115997672081, "learning_rate": 4.096028703124014e-06, "log_odds_chosen": 0.6282423138618469, "log_odds_ratio": -0.43798086047172546, "logits/chosen": -0.3399620056152344, "logits/rejected": -1.302710771560669, "logps/chosen": -1.5835597515106201, "logps/rejected": -2.1131136417388916, "loss": 1.67, "nll_loss": 1.6261749267578125, "rewards/accuracies": 1.0, "rewards/chosen": -0.1583559811115265, "rewards/margins": 0.05295538902282715, "rewards/rejected": -0.21131137013435364, "step": 384 }, { "epoch": 1.060752502588885, "grad_norm": 0.18484216928482056, "learning_rate": 4.089851969621138e-06, "log_odds_chosen": 0.7135899662971497, "log_odds_ratio": -0.41620948910713196, "logits/chosen": -0.28658849000930786, "logits/rejected": -1.5913586616516113, "logps/chosen": -1.425489902496338, "logps/rejected": -2.015927791595459, "loss": 1.5551, "nll_loss": 1.5135202407836914, "rewards/accuracies": 1.0, "rewards/chosen": -0.14254900813102722, "rewards/margins": 0.059043798595666885, "rewards/rejected": -0.20159278810024261, "step": 385 }, { "epoch": 1.063513979979289, "grad_norm": 0.20954498648643494, "learning_rate": 4.083658897929425e-06, "log_odds_chosen": 0.6113450527191162, "log_odds_ratio": -0.4348878562450409, "logits/chosen": -0.3510580360889435, "logits/rejected": -1.5097743272781372, "logps/chosen": -1.5490520000457764, "logps/rejected": -2.056283950805664, "loss": 1.6599, "nll_loss": 1.616385817527771, "rewards/accuracies": 1.0, "rewards/chosen": -0.15490520000457764, "rewards/margins": 0.050723206251859665, "rewards/rejected": -0.2056283950805664, "step": 386 }, { "epoch": 1.0662754573696929, "grad_norm": 0.19627171754837036, "learning_rate": 4.077449551692268e-06, "log_odds_chosen": 0.5390780568122864, "log_odds_ratio": -0.46470367908477783, "logits/chosen": -0.23739197850227356, "logits/rejected": -1.621540904045105, "logps/chosen": -1.555280089378357, "logps/rejected": -2.001728057861328, "loss": 1.6682, "nll_loss": 1.6217542886734009, "rewards/accuracies": 1.0, "rewards/chosen": -0.1555280089378357, "rewards/margins": 0.044644795358181, "rewards/rejected": -0.20017282664775848, "step": 387 }, { "epoch": 1.0690369347600965, "grad_norm": 0.1885172724723816, "learning_rate": 4.071223994720309e-06, "log_odds_chosen": 0.7198303937911987, "log_odds_ratio": -0.40047964453697205, "logits/chosen": -0.38902541995048523, "logits/rejected": -1.71836519241333, "logps/chosen": -1.4929300546646118, "logps/rejected": -2.0929007530212402, "loss": 1.6048, "nll_loss": 1.5647118091583252, "rewards/accuracies": 1.0, "rewards/chosen": -0.14929300546646118, "rewards/margins": 0.0599970780313015, "rewards/rejected": -0.20929010212421417, "step": 388 }, { "epoch": 1.0717984121505004, "grad_norm": 0.20288851857185364, "learning_rate": 4.064982290990777e-06, "log_odds_chosen": 0.5762845277786255, "log_odds_ratio": -0.45192721486091614, "logits/chosen": -0.3292975425720215, "logits/rejected": -1.4449775218963623, "logps/chosen": -1.4430099725723267, "logps/rejected": -1.9106184244155884, "loss": 1.5566, "nll_loss": 1.511430263519287, "rewards/accuracies": 1.0, "rewards/chosen": -0.14430099725723267, "rewards/margins": 0.04676084965467453, "rewards/rejected": -0.1910618394613266, "step": 389 }, { "epoch": 1.0745598895409043, "grad_norm": 0.18857373297214508, "learning_rate": 4.058724504646834e-06, "log_odds_chosen": 0.698142945766449, "log_odds_ratio": -0.41126978397369385, "logits/chosen": -0.3490840494632721, "logits/rejected": -1.5612983703613281, "logps/chosen": -1.4951398372650146, "logps/rejected": -2.07562255859375, "loss": 1.6046, "nll_loss": 1.563432216644287, "rewards/accuracies": 1.0, "rewards/chosen": -0.14951398968696594, "rewards/margins": 0.058048274368047714, "rewards/rejected": -0.20756226778030396, "step": 390 }, { "epoch": 1.0773213669313082, "grad_norm": 0.18817338347434998, "learning_rate": 4.0524506999969185e-06, "log_odds_chosen": 0.5495921969413757, "log_odds_ratio": -0.46190890669822693, "logits/chosen": -0.32165098190307617, "logits/rejected": -1.7347975969314575, "logps/chosen": -1.5509448051452637, "logps/rejected": -2.0019354820251465, "loss": 1.6579, "nll_loss": 1.6116652488708496, "rewards/accuracies": 1.0, "rewards/chosen": -0.15509448945522308, "rewards/margins": 0.045099057257175446, "rewards/rejected": -0.20019352436065674, "step": 391 }, { "epoch": 1.080082844321712, "grad_norm": 0.18304969370365143, "learning_rate": 4.046160941514079e-06, "log_odds_chosen": 0.7243356704711914, "log_odds_ratio": -0.4019787311553955, "logits/chosen": -0.20833127200603485, "logits/rejected": -1.3766155242919922, "logps/chosen": -1.4449529647827148, "logps/rejected": -2.0372161865234375, "loss": 1.5421, "nll_loss": 1.5019174814224243, "rewards/accuracies": 1.0, "rewards/chosen": -0.14449530839920044, "rewards/margins": 0.05922630429267883, "rewards/rejected": -0.20372159779071808, "step": 392 }, { "epoch": 1.082844321712116, "grad_norm": 0.23034730553627014, "learning_rate": 4.039855293835316e-06, "log_odds_chosen": 0.6121989488601685, "log_odds_ratio": -0.4352457523345947, "logits/chosen": -0.37496262788772583, "logits/rejected": -1.6959614753723145, "logps/chosen": -1.4758626222610474, "logps/rejected": -1.9751558303833008, "loss": 1.5737, "nll_loss": 1.530150294303894, "rewards/accuracies": 1.0, "rewards/chosen": -0.14758625626564026, "rewards/margins": 0.04992932081222534, "rewards/rejected": -0.1975155770778656, "step": 393 }, { "epoch": 1.0856057991025199, "grad_norm": 0.17257992923259735, "learning_rate": 4.033533821760917e-06, "log_odds_chosen": 0.7732821702957153, "log_odds_ratio": -0.3887802064418793, "logits/chosen": -0.3277055025100708, "logits/rejected": -1.868857741355896, "logps/chosen": -1.4420204162597656, "logps/rejected": -2.0812883377075195, "loss": 1.5466, "nll_loss": 1.5076910257339478, "rewards/accuracies": 1.0, "rewards/chosen": -0.14420203864574432, "rewards/margins": 0.06392678618431091, "rewards/rejected": -0.20812883973121643, "step": 394 }, { "epoch": 1.0883672764929238, "grad_norm": 0.1858333796262741, "learning_rate": 4.027196590253786e-06, "log_odds_chosen": 0.4944222569465637, "log_odds_ratio": -0.4831688404083252, "logits/chosen": -0.28738462924957275, "logits/rejected": -1.3927885293960571, "logps/chosen": -1.4958670139312744, "logps/rejected": -1.8963196277618408, "loss": 1.6172, "nll_loss": 1.5688387155532837, "rewards/accuracies": 1.0, "rewards/chosen": -0.14958669245243073, "rewards/margins": 0.04004526883363724, "rewards/rejected": -0.18963195383548737, "step": 395 }, { "epoch": 1.0911287538833276, "grad_norm": 0.1886051446199417, "learning_rate": 4.020843664438783e-06, "log_odds_chosen": 0.6189723610877991, "log_odds_ratio": -0.4430191218852997, "logits/chosen": -0.30028706789016724, "logits/rejected": -1.7108986377716064, "logps/chosen": -1.446290135383606, "logps/rejected": -1.9520014524459839, "loss": 1.5704, "nll_loss": 1.5261311531066895, "rewards/accuracies": 0.875, "rewards/chosen": -0.14462901651859283, "rewards/margins": 0.05057113245129585, "rewards/rejected": -0.1952001452445984, "step": 396 }, { "epoch": 1.0938902312737315, "grad_norm": 0.189390167593956, "learning_rate": 4.01447510960205e-06, "log_odds_chosen": 0.7106426358222961, "log_odds_ratio": -0.40562015771865845, "logits/chosen": -0.4158485531806946, "logits/rejected": -1.7321726083755493, "logps/chosen": -1.5334806442260742, "logps/rejected": -2.1250576972961426, "loss": 1.6332, "nll_loss": 1.592668890953064, "rewards/accuracies": 1.0, "rewards/chosen": -0.15334807336330414, "rewards/margins": 0.059157684445381165, "rewards/rejected": -0.2125057429075241, "step": 397 }, { "epoch": 1.0966517086641354, "grad_norm": 0.19324593245983124, "learning_rate": 4.008090991190341e-06, "log_odds_chosen": 0.49737095832824707, "log_odds_ratio": -0.47871023416519165, "logits/chosen": -0.20496344566345215, "logits/rejected": -1.5982781648635864, "logps/chosen": -1.570819616317749, "logps/rejected": -1.9830747842788696, "loss": 1.6702, "nll_loss": 1.622377634048462, "rewards/accuracies": 1.0, "rewards/chosen": -0.1570819765329361, "rewards/margins": 0.04122550040483475, "rewards/rejected": -0.19830746948719025, "step": 398 }, { "epoch": 1.099413186054539, "grad_norm": 0.17873111367225647, "learning_rate": 4.001691374810352e-06, "log_odds_chosen": 0.7614402770996094, "log_odds_ratio": -0.39445751905441284, "logits/chosen": -0.27544835209846497, "logits/rejected": -1.992354154586792, "logps/chosen": -1.5202884674072266, "logps/rejected": -2.1587905883789062, "loss": 1.6232, "nll_loss": 1.5837651491165161, "rewards/accuracies": 1.0, "rewards/chosen": -0.15202882885932922, "rewards/margins": 0.06385020911693573, "rewards/rejected": -0.21587903797626495, "step": 399 }, { "epoch": 1.102174663444943, "grad_norm": 0.17042513191699982, "learning_rate": 3.99527632622804e-06, "log_odds_chosen": 0.6560631990432739, "log_odds_ratio": -0.4252350628376007, "logits/chosen": -0.3314496576786041, "logits/rejected": -1.559665322303772, "logps/chosen": -1.4409160614013672, "logps/rejected": -1.9774514436721802, "loss": 1.5645, "nll_loss": 1.5220184326171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.14409160614013672, "rewards/margins": 0.05365355312824249, "rewards/rejected": -0.19774514436721802, "step": 400 }, { "epoch": 1.1049361408353469, "grad_norm": 0.1929718255996704, "learning_rate": 3.988845911367957e-06, "log_odds_chosen": 0.6236564517021179, "log_odds_ratio": -0.4364378750324249, "logits/chosen": -0.3177638649940491, "logits/rejected": -1.5883958339691162, "logps/chosen": -1.598113775253296, "logps/rejected": -2.1222128868103027, "loss": 1.6877, "nll_loss": 1.644063949584961, "rewards/accuracies": 1.0, "rewards/chosen": -0.1598113626241684, "rewards/margins": 0.052409932017326355, "rewards/rejected": -0.21222129464149475, "step": 401 }, { "epoch": 1.1076976182257507, "grad_norm": 0.17265060544013977, "learning_rate": 3.982400196312565e-06, "log_odds_chosen": 0.6908576488494873, "log_odds_ratio": -0.4076445698738098, "logits/chosen": -0.28831660747528076, "logits/rejected": -1.4703201055526733, "logps/chosen": -1.4102579355239868, "logps/rejected": -1.9662861824035645, "loss": 1.5263, "nll_loss": 1.4855272769927979, "rewards/accuracies": 1.0, "rewards/chosen": -0.14102579653263092, "rewards/margins": 0.05560281127691269, "rewards/rejected": -0.1966286301612854, "step": 402 }, { "epoch": 1.1104590956161546, "grad_norm": 0.18258056044578552, "learning_rate": 3.975939247301558e-06, "log_odds_chosen": 0.6171894669532776, "log_odds_ratio": -0.43477940559387207, "logits/chosen": -0.34336021542549133, "logits/rejected": -1.718552589416504, "logps/chosen": -1.5480486154556274, "logps/rejected": -2.061230421066284, "loss": 1.6639, "nll_loss": 1.620387077331543, "rewards/accuracies": 1.0, "rewards/chosen": -0.15480485558509827, "rewards/margins": 0.051318198442459106, "rewards/rejected": -0.20612303912639618, "step": 403 }, { "epoch": 1.1132205730065585, "grad_norm": 0.17894716560840607, "learning_rate": 3.969463130731183e-06, "log_odds_chosen": 0.601898729801178, "log_odds_ratio": -0.44408684968948364, "logits/chosen": -0.2595762610435486, "logits/rejected": -1.671970248222351, "logps/chosen": -1.480761170387268, "logps/rejected": -1.973817229270935, "loss": 1.5901, "nll_loss": 1.5457121133804321, "rewards/accuracies": 1.0, "rewards/chosen": -0.148076131939888, "rewards/margins": 0.049305595457553864, "rewards/rejected": -0.19738171994686127, "step": 404 }, { "epoch": 1.1159820503969624, "grad_norm": 0.182702898979187, "learning_rate": 3.9629719131535595e-06, "log_odds_chosen": 0.5538443922996521, "log_odds_ratio": -0.463712602853775, "logits/chosen": -0.3739258646965027, "logits/rejected": -1.5419493913650513, "logps/chosen": -1.4951484203338623, "logps/rejected": -1.9511198997497559, "loss": 1.6103, "nll_loss": 1.563896656036377, "rewards/accuracies": 1.0, "rewards/chosen": -0.14951485395431519, "rewards/margins": 0.04559716209769249, "rewards/rejected": -0.19511200487613678, "step": 405 }, { "epoch": 1.1187435277873663, "grad_norm": 0.1713375300168991, "learning_rate": 3.9564656612759904e-06, "log_odds_chosen": 0.6431328058242798, "log_odds_ratio": -0.42764732241630554, "logits/chosen": -0.29896292090415955, "logits/rejected": -1.8772965669631958, "logps/chosen": -1.4999200105667114, "logps/rejected": -2.031973361968994, "loss": 1.6098, "nll_loss": 1.5670844316482544, "rewards/accuracies": 1.0, "rewards/chosen": -0.14999200403690338, "rewards/margins": 0.05320533737540245, "rewards/rejected": -0.20319733023643494, "step": 406 }, { "epoch": 1.1215050051777702, "grad_norm": 0.1880570352077484, "learning_rate": 3.94994444196028e-06, "log_odds_chosen": 0.6853294968605042, "log_odds_ratio": -0.41755619645118713, "logits/chosen": -0.2672858238220215, "logits/rejected": -1.5635608434677124, "logps/chosen": -1.5350513458251953, "logps/rejected": -2.1063671112060547, "loss": 1.6214, "nll_loss": 1.579624056816101, "rewards/accuracies": 1.0, "rewards/chosen": -0.1535051316022873, "rewards/margins": 0.0571315735578537, "rewards/rejected": -0.21063672006130219, "step": 407 }, { "epoch": 1.124266482568174, "grad_norm": 0.17640192806720734, "learning_rate": 3.943408322222049e-06, "log_odds_chosen": 0.6473301649093628, "log_odds_ratio": -0.430767297744751, "logits/chosen": -0.25322699546813965, "logits/rejected": -1.3362030982971191, "logps/chosen": -1.4611663818359375, "logps/rejected": -1.9962221384048462, "loss": 1.5796, "nll_loss": 1.5365506410598755, "rewards/accuracies": 1.0, "rewards/chosen": -0.14611662924289703, "rewards/margins": 0.053505584597587585, "rewards/rejected": -0.19962221384048462, "step": 408 }, { "epoch": 1.127027959958578, "grad_norm": 0.1975967288017273, "learning_rate": 3.936857369230037e-06, "log_odds_chosen": 0.5013847351074219, "log_odds_ratio": -0.48179811239242554, "logits/chosen": -0.24628515541553497, "logits/rejected": -1.5175528526306152, "logps/chosen": -1.5307085514068604, "logps/rejected": -1.9439282417297363, "loss": 1.6331, "nll_loss": 1.584876537322998, "rewards/accuracies": 1.0, "rewards/chosen": -0.153070867061615, "rewards/margins": 0.04132195562124252, "rewards/rejected": -0.1943928301334381, "step": 409 }, { "epoch": 1.1297894373489816, "grad_norm": 0.19114039838314056, "learning_rate": 3.930291650305424e-06, "log_odds_chosen": 0.6988283395767212, "log_odds_ratio": -0.40597397089004517, "logits/chosen": -0.25875231623649597, "logits/rejected": -1.3210163116455078, "logps/chosen": -1.4922490119934082, "logps/rejected": -2.0712697505950928, "loss": 1.5831, "nll_loss": 1.5425041913986206, "rewards/accuracies": 1.0, "rewards/chosen": -0.1492249071598053, "rewards/margins": 0.05790204927325249, "rewards/rejected": -0.20712696015834808, "step": 410 }, { "epoch": 1.1325509147393855, "grad_norm": 0.1878093183040619, "learning_rate": 3.92371123292113e-06, "log_odds_chosen": 0.6247770190238953, "log_odds_ratio": -0.43160274624824524, "logits/chosen": -0.2506449520587921, "logits/rejected": -1.691251516342163, "logps/chosen": -1.5277273654937744, "logps/rejected": -2.0446317195892334, "loss": 1.6273, "nll_loss": 1.5841726064682007, "rewards/accuracies": 1.0, "rewards/chosen": -0.15277275443077087, "rewards/margins": 0.051690444350242615, "rewards/rejected": -0.2044631838798523, "step": 411 }, { "epoch": 1.1353123921297894, "grad_norm": 0.1842024326324463, "learning_rate": 3.917116184701125e-06, "log_odds_chosen": 0.7279251217842102, "log_odds_ratio": -0.4019698202610016, "logits/chosen": -0.2969970107078552, "logits/rejected": -1.9143730401992798, "logps/chosen": -1.4923046827316284, "logps/rejected": -2.099824905395508, "loss": 1.5904, "nll_loss": 1.5501734018325806, "rewards/accuracies": 1.0, "rewards/chosen": -0.1492304652929306, "rewards/margins": 0.0607520155608654, "rewards/rejected": -0.2099824696779251, "step": 412 }, { "epoch": 1.1380738695201933, "grad_norm": 0.2017175853252411, "learning_rate": 3.910506573419734e-06, "log_odds_chosen": 0.8388389348983765, "log_odds_ratio": -0.3656638562679291, "logits/chosen": -0.27687397599220276, "logits/rejected": -1.5076192617416382, "logps/chosen": -1.4898481369018555, "logps/rejected": -2.190901279449463, "loss": 1.5889, "nll_loss": 1.5523051023483276, "rewards/accuracies": 1.0, "rewards/chosen": -0.14898481965065002, "rewards/margins": 0.07010531425476074, "rewards/rejected": -0.21909013390541077, "step": 413 }, { "epoch": 1.1408353469105972, "grad_norm": 0.18677456676959991, "learning_rate": 3.903882467000938e-06, "log_odds_chosen": 0.789838433265686, "log_odds_ratio": -0.37885501980781555, "logits/chosen": -0.2613790035247803, "logits/rejected": -1.6607636213302612, "logps/chosen": -1.5320932865142822, "logps/rejected": -2.194924831390381, "loss": 1.6278, "nll_loss": 1.5898661613464355, "rewards/accuracies": 1.0, "rewards/chosen": -0.15320934355258942, "rewards/margins": 0.06628316640853882, "rewards/rejected": -0.21949250996112823, "step": 414 }, { "epoch": 1.143596824301001, "grad_norm": 0.18295292556285858, "learning_rate": 3.897243933517679e-06, "log_odds_chosen": 0.697784423828125, "log_odds_ratio": -0.40981873869895935, "logits/chosen": -0.36293089389801025, "logits/rejected": -1.7186784744262695, "logps/chosen": -1.4446918964385986, "logps/rejected": -2.0167624950408936, "loss": 1.5485, "nll_loss": 1.50748872756958, "rewards/accuracies": 1.0, "rewards/chosen": -0.14446918666362762, "rewards/margins": 0.05720707029104233, "rewards/rejected": -0.20167624950408936, "step": 415 }, { "epoch": 1.146358301691405, "grad_norm": 0.18878361582756042, "learning_rate": 3.890591041191162e-06, "log_odds_chosen": 0.7591959834098816, "log_odds_ratio": -0.39290934801101685, "logits/chosen": -0.2790437340736389, "logits/rejected": -1.4805063009262085, "logps/chosen": -1.4139214754104614, "logps/rejected": -2.03341007232666, "loss": 1.5176, "nll_loss": 1.478288173675537, "rewards/accuracies": 1.0, "rewards/chosen": -0.14139214158058167, "rewards/margins": 0.06194887310266495, "rewards/rejected": -0.20334100723266602, "step": 416 }, { "epoch": 1.1491197790818088, "grad_norm": 0.2020062804222107, "learning_rate": 3.883923858390149e-06, "log_odds_chosen": 0.75853431224823, "log_odds_ratio": -0.38675639033317566, "logits/chosen": -0.3220999538898468, "logits/rejected": -1.7704015970230103, "logps/chosen": -1.5146489143371582, "logps/rejected": -2.149966239929199, "loss": 1.6306, "nll_loss": 1.5919427871704102, "rewards/accuracies": 1.0, "rewards/chosen": -0.15146489441394806, "rewards/margins": 0.06353174149990082, "rewards/rejected": -0.21499663591384888, "step": 417 }, { "epoch": 1.1518812564722127, "grad_norm": 0.17892806231975555, "learning_rate": 3.8772424536302565e-06, "log_odds_chosen": 0.5580451488494873, "log_odds_ratio": -0.4568861126899719, "logits/chosen": -0.3027711510658264, "logits/rejected": -1.707384467124939, "logps/chosen": -1.5017491579055786, "logps/rejected": -1.9595609903335571, "loss": 1.5943, "nll_loss": 1.5486559867858887, "rewards/accuracies": 1.0, "rewards/chosen": -0.15017491579055786, "rewards/margins": 0.04578119143843651, "rewards/rejected": -0.19595609605312347, "step": 418 }, { "epoch": 1.1546427338626164, "grad_norm": 0.19901612401008606, "learning_rate": 3.870546895573258e-06, "log_odds_chosen": 0.6981452703475952, "log_odds_ratio": -0.4115220010280609, "logits/chosen": -0.3455246686935425, "logits/rejected": -1.3123235702514648, "logps/chosen": -1.479371428489685, "logps/rejected": -2.0550196170806885, "loss": 1.5787, "nll_loss": 1.5375878810882568, "rewards/accuracies": 1.0, "rewards/chosen": -0.14793714880943298, "rewards/margins": 0.05756482481956482, "rewards/rejected": -0.2055019587278366, "step": 419 }, { "epoch": 1.1574042112530203, "grad_norm": 0.1906924694776535, "learning_rate": 3.863837253026372e-06, "log_odds_chosen": 0.7406031489372253, "log_odds_ratio": -0.40153640508651733, "logits/chosen": -0.37284234166145325, "logits/rejected": -1.6314030885696411, "logps/chosen": -1.4350590705871582, "logps/rejected": -2.0481374263763428, "loss": 1.5474, "nll_loss": 1.507211446762085, "rewards/accuracies": 1.0, "rewards/chosen": -0.14350590109825134, "rewards/margins": 0.061307840049266815, "rewards/rejected": -0.20481374859809875, "step": 420 }, { "epoch": 1.1601656886434242, "grad_norm": 0.19268250465393066, "learning_rate": 3.857113594941556e-06, "log_odds_chosen": 0.5946828126907349, "log_odds_ratio": -0.44611191749572754, "logits/chosen": -0.3333417475223541, "logits/rejected": -1.820539116859436, "logps/chosen": -1.5357310771942139, "logps/rejected": -2.030311346054077, "loss": 1.6374, "nll_loss": 1.5927873849868774, "rewards/accuracies": 1.0, "rewards/chosen": -0.15357311069965363, "rewards/margins": 0.04945802688598633, "rewards/rejected": -0.20303113758563995, "step": 421 }, { "epoch": 1.162927166033828, "grad_norm": 0.1852002888917923, "learning_rate": 3.8503759904148005e-06, "log_odds_chosen": 0.753965437412262, "log_odds_ratio": -0.39210668206214905, "logits/chosen": -0.2807456851005554, "logits/rejected": -1.7219178676605225, "logps/chosen": -1.4659725427627563, "logps/rejected": -2.0921542644500732, "loss": 1.5699, "nll_loss": 1.5306528806686401, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465972512960434, "rewards/margins": 0.0626181811094284, "rewards/rejected": -0.209215447306633, "step": 422 }, { "epoch": 1.165688643424232, "grad_norm": 0.18899178504943848, "learning_rate": 3.843624508685416e-06, "log_odds_chosen": 0.6049898266792297, "log_odds_ratio": -0.43925538659095764, "logits/chosen": -0.3452371060848236, "logits/rejected": -1.7807867527008057, "logps/chosen": -1.5395519733428955, "logps/rejected": -2.040437698364258, "loss": 1.6476, "nll_loss": 1.603638768196106, "rewards/accuracies": 1.0, "rewards/chosen": -0.15395519137382507, "rewards/margins": 0.05008859187364578, "rewards/rejected": -0.20404377579689026, "step": 423 }, { "epoch": 1.1684501208146358, "grad_norm": 0.1812516748905182, "learning_rate": 3.8368592191353246e-06, "log_odds_chosen": 0.6994820833206177, "log_odds_ratio": -0.412604957818985, "logits/chosen": -0.32768282294273376, "logits/rejected": -1.7750557661056519, "logps/chosen": -1.4714299440383911, "logps/rejected": -2.0467658042907715, "loss": 1.5632, "nll_loss": 1.5219401121139526, "rewards/accuracies": 1.0, "rewards/chosen": -0.14714302122592926, "rewards/margins": 0.05753358453512192, "rewards/rejected": -0.20467659831047058, "step": 424 }, { "epoch": 1.1712115982050397, "grad_norm": 0.18136382102966309, "learning_rate": 3.830080191288342e-06, "log_odds_chosen": 0.6253668665885925, "log_odds_ratio": -0.42983123660087585, "logits/chosen": -0.2867172360420227, "logits/rejected": -1.5802618265151978, "logps/chosen": -1.5029292106628418, "logps/rejected": -2.018980026245117, "loss": 1.6134, "nll_loss": 1.5704439878463745, "rewards/accuracies": 1.0, "rewards/chosen": -0.15029294788837433, "rewards/margins": 0.05160506069660187, "rewards/rejected": -0.2018979787826538, "step": 425 }, { "epoch": 1.1739730755954436, "grad_norm": 0.19747108221054077, "learning_rate": 3.823287494809469e-06, "log_odds_chosen": 0.7677461504936218, "log_odds_ratio": -0.38692528009414673, "logits/chosen": -0.38328537344932556, "logits/rejected": -1.6817901134490967, "logps/chosen": -1.4731775522232056, "logps/rejected": -2.1067628860473633, "loss": 1.5885, "nll_loss": 1.5498508214950562, "rewards/accuracies": 1.0, "rewards/chosen": -0.14731775224208832, "rewards/margins": 0.06335853040218353, "rewards/rejected": -0.21067628264427185, "step": 426 }, { "epoch": 1.1767345529858475, "grad_norm": 0.185506671667099, "learning_rate": 3.816481199504171e-06, "log_odds_chosen": 0.6367388963699341, "log_odds_ratio": -0.4264675974845886, "logits/chosen": -0.3647797703742981, "logits/rejected": -1.5863122940063477, "logps/chosen": -1.5056264400482178, "logps/rejected": -2.031801462173462, "loss": 1.6257, "nll_loss": 1.5830763578414917, "rewards/accuracies": 1.0, "rewards/chosen": -0.15056264400482178, "rewards/margins": 0.052617497742176056, "rewards/rejected": -0.20318013429641724, "step": 427 }, { "epoch": 1.1794960303762512, "grad_norm": 0.1755794882774353, "learning_rate": 3.8096613753176635e-06, "log_odds_chosen": 0.584825873374939, "log_odds_ratio": -0.4493084251880646, "logits/chosen": -0.27612411975860596, "logits/rejected": -1.4378138780593872, "logps/chosen": -1.3924418687820435, "logps/rejected": -1.8575388193130493, "loss": 1.5104, "nll_loss": 1.4654783010482788, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392441838979721, "rewards/margins": 0.046509698033332825, "rewards/rejected": -0.18575388193130493, "step": 428 }, { "epoch": 1.1822575077666553, "grad_norm": 0.19249552488327026, "learning_rate": 3.8028280923341927e-06, "log_odds_chosen": 0.6473675966262817, "log_odds_ratio": -0.4286215901374817, "logits/chosen": -0.384939968585968, "logits/rejected": -1.5598773956298828, "logps/chosen": -1.4334121942520142, "logps/rejected": -1.9639126062393188, "loss": 1.5432, "nll_loss": 1.500383973121643, "rewards/accuracies": 1.0, "rewards/chosen": -0.14334121346473694, "rewards/margins": 0.05305003747344017, "rewards/rejected": -0.1963912546634674, "step": 429 }, { "epoch": 1.185018985157059, "grad_norm": 0.19077420234680176, "learning_rate": 3.7959814207763134e-06, "log_odds_chosen": 0.7534008026123047, "log_odds_ratio": -0.3935543894767761, "logits/chosen": -0.33575281500816345, "logits/rejected": -1.7560871839523315, "logps/chosen": -1.5460617542266846, "logps/rejected": -2.17948579788208, "loss": 1.6361, "nll_loss": 1.5967589616775513, "rewards/accuracies": 1.0, "rewards/chosen": -0.1546061784029007, "rewards/margins": 0.0633423924446106, "rewards/rejected": -0.2179485559463501, "step": 430 }, { "epoch": 1.1877804625474628, "grad_norm": 0.18546007573604584, "learning_rate": 3.789121431004168e-06, "log_odds_chosen": 0.6419503092765808, "log_odds_ratio": -0.42676353454589844, "logits/chosen": -0.36785295605659485, "logits/rejected": -1.6887683868408203, "logps/chosen": -1.5084519386291504, "logps/rejected": -2.036573886871338, "loss": 1.5997, "nll_loss": 1.5570096969604492, "rewards/accuracies": 1.0, "rewards/chosen": -0.15084518492221832, "rewards/margins": 0.05281219631433487, "rewards/rejected": -0.2036573737859726, "step": 431 }, { "epoch": 1.1905419399378667, "grad_norm": 0.194391131401062, "learning_rate": 3.782248193514766e-06, "log_odds_chosen": 0.6094563007354736, "log_odds_ratio": -0.43929269909858704, "logits/chosen": -0.29947635531425476, "logits/rejected": -1.4955629110336304, "logps/chosen": -1.4138904809951782, "logps/rejected": -1.906821846961975, "loss": 1.5403, "nll_loss": 1.4963343143463135, "rewards/accuracies": 1.0, "rewards/chosen": -0.14138904213905334, "rewards/margins": 0.049293152987957, "rewards/rejected": -0.19068220257759094, "step": 432 }, { "epoch": 1.1933034173282706, "grad_norm": 0.19166447222232819, "learning_rate": 3.775361778941257e-06, "log_odds_chosen": 0.7113713026046753, "log_odds_ratio": -0.403501957654953, "logits/chosen": -0.308038592338562, "logits/rejected": -1.6283587217330933, "logps/chosen": -1.501065731048584, "logps/rejected": -2.0941624641418457, "loss": 1.6079, "nll_loss": 1.567560076713562, "rewards/accuracies": 1.0, "rewards/chosen": -0.15010657906532288, "rewards/margins": 0.05930966138839722, "rewards/rejected": -0.2094162404537201, "step": 433 }, { "epoch": 1.1960648947186745, "grad_norm": 0.18622690439224243, "learning_rate": 3.7684622580522057e-06, "log_odds_chosen": 0.42840951681137085, "log_odds_ratio": -0.5051460862159729, "logits/chosen": -0.2970636785030365, "logits/rejected": -1.6902334690093994, "logps/chosen": -1.4522030353546143, "logps/rejected": -1.7967474460601807, "loss": 1.5557, "nll_loss": 1.5051684379577637, "rewards/accuracies": 1.0, "rewards/chosen": -0.1452203243970871, "rewards/margins": 0.03445442020893097, "rewards/rejected": -0.17967472970485687, "step": 434 }, { "epoch": 1.1988263721090784, "grad_norm": 0.17388908565044403, "learning_rate": 3.761549701750865e-06, "log_odds_chosen": 0.7077789902687073, "log_odds_ratio": -0.41333314776420593, "logits/chosen": -0.3090924024581909, "logits/rejected": -1.843423843383789, "logps/chosen": -1.4652737379074097, "logps/rejected": -2.052661418914795, "loss": 1.5629, "nll_loss": 1.5216063261032104, "rewards/accuracies": 1.0, "rewards/chosen": -0.14652739465236664, "rewards/margins": 0.058738768100738525, "rewards/rejected": -0.20526614785194397, "step": 435 }, { "epoch": 1.2015878494994823, "grad_norm": 0.17293590307235718, "learning_rate": 3.7546241810744444e-06, "log_odds_chosen": 0.5752092003822327, "log_odds_ratio": -0.4525831639766693, "logits/chosen": -0.32182741165161133, "logits/rejected": -1.3567001819610596, "logps/chosen": -1.4578838348388672, "logps/rejected": -1.9286342859268188, "loss": 1.5493, "nll_loss": 1.50407075881958, "rewards/accuracies": 1.0, "rewards/chosen": -0.14578840136528015, "rewards/margins": 0.047075025737285614, "rewards/rejected": -0.19286341965198517, "step": 436 }, { "epoch": 1.2043493268898862, "grad_norm": 0.19718696177005768, "learning_rate": 3.747685767193385e-06, "log_odds_chosen": 0.5642775297164917, "log_odds_ratio": -0.4569299817085266, "logits/chosen": -0.3862457871437073, "logits/rejected": -1.4825284481048584, "logps/chosen": -1.5323418378829956, "logps/rejected": -1.9992895126342773, "loss": 1.6529, "nll_loss": 1.6072373390197754, "rewards/accuracies": 1.0, "rewards/chosen": -0.15323419868946075, "rewards/margins": 0.046694785356521606, "rewards/rejected": -0.19992896914482117, "step": 437 }, { "epoch": 1.20711080428029, "grad_norm": 0.17915678024291992, "learning_rate": 3.740734531410626e-06, "log_odds_chosen": 0.7052585482597351, "log_odds_ratio": -0.4100540578365326, "logits/chosen": -0.3775138258934021, "logits/rejected": -1.859032154083252, "logps/chosen": -1.448523998260498, "logps/rejected": -2.021770477294922, "loss": 1.5652, "nll_loss": 1.5241587162017822, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448523849248886, "rewards/margins": 0.057324644178152084, "rewards/rejected": -0.202177032828331, "step": 438 }, { "epoch": 1.2098722816706937, "grad_norm": 0.19827017188072205, "learning_rate": 3.7337705451608676e-06, "log_odds_chosen": 0.859776496887207, "log_odds_ratio": -0.3666639029979706, "logits/chosen": -0.4492262601852417, "logits/rejected": -1.819311261177063, "logps/chosen": -1.3853495121002197, "logps/rejected": -2.082892894744873, "loss": 1.4954, "nll_loss": 1.4586937427520752, "rewards/accuracies": 1.0, "rewards/chosen": -0.13853494822978973, "rewards/margins": 0.06975432485342026, "rewards/rejected": -0.2082892656326294, "step": 439 }, { "epoch": 1.2126337590610976, "grad_norm": 0.19302192330360413, "learning_rate": 3.7267938800098454e-06, "log_odds_chosen": 0.8500153422355652, "log_odds_ratio": -0.36537328362464905, "logits/chosen": -0.5428920984268188, "logits/rejected": -1.821568489074707, "logps/chosen": -1.3568768501281738, "logps/rejected": -2.052211046218872, "loss": 1.4693, "nll_loss": 1.4327150583267212, "rewards/accuracies": 1.0, "rewards/chosen": -0.1356876939535141, "rewards/margins": 0.06953340768814087, "rewards/rejected": -0.20522108674049377, "step": 440 }, { "epoch": 1.2153952364515015, "grad_norm": 0.19974324107170105, "learning_rate": 3.7198046076535865e-06, "log_odds_chosen": 0.7102779746055603, "log_odds_ratio": -0.40437954664230347, "logits/chosen": -0.3492871820926666, "logits/rejected": -1.5553520917892456, "logps/chosen": -1.4951152801513672, "logps/rejected": -2.0831298828125, "loss": 1.6058, "nll_loss": 1.565347671508789, "rewards/accuracies": 1.0, "rewards/chosen": -0.14951153099536896, "rewards/margins": 0.05880144238471985, "rewards/rejected": -0.20831298828125, "step": 441 }, { "epoch": 1.2181567138419054, "grad_norm": 0.18230760097503662, "learning_rate": 3.71280279991768e-06, "log_odds_chosen": 0.7506513595581055, "log_odds_ratio": -0.3923317492008209, "logits/chosen": -0.4152137339115143, "logits/rejected": -1.5510238409042358, "logps/chosen": -1.4440295696258545, "logps/rejected": -2.0645270347595215, "loss": 1.5654, "nll_loss": 1.5261952877044678, "rewards/accuracies": 1.0, "rewards/chosen": -0.14440295100212097, "rewards/margins": 0.06204976886510849, "rewards/rejected": -0.20645272731781006, "step": 442 }, { "epoch": 1.2209181912323093, "grad_norm": 0.17220567166805267, "learning_rate": 3.705788528756533e-06, "log_odds_chosen": 0.7942907810211182, "log_odds_ratio": -0.37784573435783386, "logits/chosen": -0.43315792083740234, "logits/rejected": -1.8588192462921143, "logps/chosen": -1.4370851516723633, "logps/rejected": -2.0935044288635254, "loss": 1.5386, "nll_loss": 1.500786304473877, "rewards/accuracies": 1.0, "rewards/chosen": -0.14370852708816528, "rewards/margins": 0.06564193964004517, "rewards/rejected": -0.20935045182704926, "step": 443 }, { "epoch": 1.2236796686227132, "grad_norm": 0.34615159034729004, "learning_rate": 3.698761866252635e-06, "log_odds_chosen": 0.6159027218818665, "log_odds_ratio": -0.44577932357788086, "logits/chosen": -0.4024880826473236, "logits/rejected": -1.7161859273910522, "logps/chosen": -1.533597707748413, "logps/rejected": -2.050471067428589, "loss": 1.6393, "nll_loss": 1.5947318077087402, "rewards/accuracies": 1.0, "rewards/chosen": -0.1533597856760025, "rewards/margins": 0.0516873374581337, "rewards/rejected": -0.2050471305847168, "step": 444 }, { "epoch": 1.226441146013117, "grad_norm": 0.19309046864509583, "learning_rate": 3.691722884615814e-06, "log_odds_chosen": 0.6141983270645142, "log_odds_ratio": -0.43802952766418457, "logits/chosen": -0.2951904237270355, "logits/rejected": -1.1809048652648926, "logps/chosen": -1.4468181133270264, "logps/rejected": -1.9419705867767334, "loss": 1.5744, "nll_loss": 1.5305941104888916, "rewards/accuracies": 1.0, "rewards/chosen": -0.14468181133270264, "rewards/margins": 0.04951523244380951, "rewards/rejected": -0.19419705867767334, "step": 445 }, { "epoch": 1.229202623403521, "grad_norm": 0.19239898025989532, "learning_rate": 3.684671656182497e-06, "log_odds_chosen": 0.7434544563293457, "log_odds_ratio": -0.39474281668663025, "logits/chosen": -0.4174625277519226, "logits/rejected": -1.7456690073013306, "logps/chosen": -1.4840799570083618, "logps/rejected": -2.0977742671966553, "loss": 1.5985, "nll_loss": 1.5590325593948364, "rewards/accuracies": 1.0, "rewards/chosen": -0.14840799570083618, "rewards/margins": 0.06136942654848099, "rewards/rejected": -0.20977741479873657, "step": 446 }, { "epoch": 1.2319641007939248, "grad_norm": 0.19208678603172302, "learning_rate": 3.6776082534149664e-06, "log_odds_chosen": 0.7092225551605225, "log_odds_ratio": -0.40430110692977905, "logits/chosen": -0.4059605598449707, "logits/rejected": -1.8414146900177002, "logps/chosen": -1.5050946474075317, "logps/rejected": -2.0934860706329346, "loss": 1.5881, "nll_loss": 1.5477027893066406, "rewards/accuracies": 1.0, "rewards/chosen": -0.15050947666168213, "rewards/margins": 0.05883914604783058, "rewards/rejected": -0.2093486189842224, "step": 447 }, { "epoch": 1.2347255781843287, "grad_norm": 0.18131853640079498, "learning_rate": 3.670532748900615e-06, "log_odds_chosen": 0.7535479664802551, "log_odds_ratio": -0.3983326554298401, "logits/chosen": -0.3855303227901459, "logits/rejected": -1.6423949003219604, "logps/chosen": -1.4195005893707275, "logps/rejected": -2.0328564643859863, "loss": 1.5252, "nll_loss": 1.4853616952896118, "rewards/accuracies": 1.0, "rewards/chosen": -0.14195004105567932, "rewards/margins": 0.06133558601140976, "rewards/rejected": -0.20328564941883087, "step": 448 }, { "epoch": 1.2374870555747326, "grad_norm": 0.1773749440908432, "learning_rate": 3.663445215351198e-06, "log_odds_chosen": 0.794275164604187, "log_odds_ratio": -0.38321515917778015, "logits/chosen": -0.4386705160140991, "logits/rejected": -1.5580754280090332, "logps/chosen": -1.4094889163970947, "logps/rejected": -2.067706823348999, "loss": 1.5152, "nll_loss": 1.476863145828247, "rewards/accuracies": 1.0, "rewards/chosen": -0.14094889163970947, "rewards/margins": 0.06582178175449371, "rewards/rejected": -0.20677067339420319, "step": 449 }, { "epoch": 1.2402485329651363, "grad_norm": 0.18508228659629822, "learning_rate": 3.656345725602089e-06, "log_odds_chosen": 0.8697155714035034, "log_odds_ratio": -0.35649657249450684, "logits/chosen": -0.36322081089019775, "logits/rejected": -1.8790079355239868, "logps/chosen": -1.5029995441436768, "logps/rejected": -2.235288143157959, "loss": 1.5982, "nll_loss": 1.5625407695770264, "rewards/accuracies": 1.0, "rewards/chosen": -0.15029993653297424, "rewards/margins": 0.0732288807630539, "rewards/rejected": -0.22352883219718933, "step": 450 }, { "epoch": 1.2430100103555402, "grad_norm": 0.19414876401424408, "learning_rate": 3.6492343526115292e-06, "log_odds_chosen": 0.6894505023956299, "log_odds_ratio": -0.4081804156303406, "logits/chosen": -0.26924291253089905, "logits/rejected": -1.8292944431304932, "logps/chosen": -1.5266273021697998, "logps/rejected": -2.100613594055176, "loss": 1.612, "nll_loss": 1.5711458921432495, "rewards/accuracies": 1.0, "rewards/chosen": -0.1526627391576767, "rewards/margins": 0.05739862471818924, "rewards/rejected": -0.21006137132644653, "step": 451 }, { "epoch": 1.245771487745944, "grad_norm": 0.18495413661003113, "learning_rate": 3.642111169459879e-06, "log_odds_chosen": 0.5452620983123779, "log_odds_ratio": -0.4608571529388428, "logits/chosen": -0.3355942666530609, "logits/rejected": -1.535355806350708, "logps/chosen": -1.489123821258545, "logps/rejected": -1.9351115226745605, "loss": 1.5796, "nll_loss": 1.533546805381775, "rewards/accuracies": 1.0, "rewards/chosen": -0.14891238510608673, "rewards/margins": 0.044598765671253204, "rewards/rejected": -0.19351115822792053, "step": 452 }, { "epoch": 1.248532965136348, "grad_norm": 0.18209105730056763, "learning_rate": 3.634976249348867e-06, "log_odds_chosen": 0.6647239923477173, "log_odds_ratio": -0.42399975657463074, "logits/chosen": -0.3849475681781769, "logits/rejected": -1.8075506687164307, "logps/chosen": -1.4811009168624878, "logps/rejected": -2.028481960296631, "loss": 1.5873, "nll_loss": 1.5449340343475342, "rewards/accuracies": 1.0, "rewards/chosen": -0.14811009168624878, "rewards/margins": 0.05473810061812401, "rewards/rejected": -0.2028481811285019, "step": 453 }, { "epoch": 1.2512944425267518, "grad_norm": 0.20314469933509827, "learning_rate": 3.6278296656008366e-06, "log_odds_chosen": 0.8694459199905396, "log_odds_ratio": -0.371795654296875, "logits/chosen": -0.4006415903568268, "logits/rejected": -1.7445772886276245, "logps/chosen": -1.3579434156417847, "logps/rejected": -2.065506935119629, "loss": 1.4782, "nll_loss": 1.4409888982772827, "rewards/accuracies": 1.0, "rewards/chosen": -0.13579432666301727, "rewards/margins": 0.07075636833906174, "rewards/rejected": -0.2065506875514984, "step": 454 }, { "epoch": 1.2540559199171557, "grad_norm": 0.1716334968805313, "learning_rate": 3.6206714916579925e-06, "log_odds_chosen": 0.7703570127487183, "log_odds_ratio": -0.3888989984989166, "logits/chosen": -0.38477808237075806, "logits/rejected": -1.7767083644866943, "logps/chosen": -1.398410439491272, "logps/rejected": -2.027221441268921, "loss": 1.4987, "nll_loss": 1.4598515033721924, "rewards/accuracies": 1.0, "rewards/chosen": -0.13984103500843048, "rewards/margins": 0.06288108229637146, "rewards/rejected": -0.20272211730480194, "step": 455 }, { "epoch": 1.2568173973075596, "grad_norm": 0.18591812252998352, "learning_rate": 3.613501801081648e-06, "log_odds_chosen": 0.617323637008667, "log_odds_ratio": -0.4372296929359436, "logits/chosen": -0.36923906207084656, "logits/rejected": -1.4284781217575073, "logps/chosen": -1.4494431018829346, "logps/rejected": -1.9530479907989502, "loss": 1.5618, "nll_loss": 1.5181188583374023, "rewards/accuracies": 1.0, "rewards/chosen": -0.14494431018829346, "rewards/margins": 0.05036048963665962, "rewards/rejected": -0.19530481100082397, "step": 456 }, { "epoch": 1.2595788746979635, "grad_norm": 0.1984872668981552, "learning_rate": 3.606320667551466e-06, "log_odds_chosen": 0.7281745672225952, "log_odds_ratio": -0.4030551612377167, "logits/chosen": -0.36235833168029785, "logits/rejected": -1.7550745010375977, "logps/chosen": -1.49772310256958, "logps/rejected": -2.1043448448181152, "loss": 1.6083, "nll_loss": 1.5679997205734253, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497723013162613, "rewards/margins": 0.06066218018531799, "rewards/rejected": -0.21043448150157928, "step": 457 }, { "epoch": 1.2623403520883674, "grad_norm": 0.1827681064605713, "learning_rate": 3.599128164864706e-06, "log_odds_chosen": 0.8096928596496582, "log_odds_ratio": -0.37837105989456177, "logits/chosen": -0.45445504784584045, "logits/rejected": -1.770817756652832, "logps/chosen": -1.4516648054122925, "logps/rejected": -2.1250061988830566, "loss": 1.5451, "nll_loss": 1.5072699785232544, "rewards/accuracies": 1.0, "rewards/chosen": -0.14516648650169373, "rewards/margins": 0.0673341453075409, "rewards/rejected": -0.21250061690807343, "step": 458 }, { "epoch": 1.265101829478771, "grad_norm": 0.19308075308799744, "learning_rate": 3.5919243669354585e-06, "log_odds_chosen": 0.6114473342895508, "log_odds_ratio": -0.4423988461494446, "logits/chosen": -0.405925452709198, "logits/rejected": -1.2670363187789917, "logps/chosen": -1.5176535844802856, "logps/rejected": -2.0253612995147705, "loss": 1.623, "nll_loss": 1.5787138938903809, "rewards/accuracies": 1.0, "rewards/chosen": -0.1517653614282608, "rewards/margins": 0.05077076703310013, "rewards/rejected": -0.20253612101078033, "step": 459 }, { "epoch": 1.2678633068691751, "grad_norm": 0.2073579728603363, "learning_rate": 3.5847093477938955e-06, "log_odds_chosen": 0.6397342681884766, "log_odds_ratio": -0.4318113625049591, "logits/chosen": -0.4677776098251343, "logits/rejected": -1.5898178815841675, "logps/chosen": -1.5188252925872803, "logps/rejected": -2.048469066619873, "loss": 1.6166, "nll_loss": 1.5734236240386963, "rewards/accuracies": 1.0, "rewards/chosen": -0.15188252925872803, "rewards/margins": 0.05296438932418823, "rewards/rejected": -0.20484691858291626, "step": 460 }, { "epoch": 1.2706247842595788, "grad_norm": 0.18931885063648224, "learning_rate": 3.5774831815855017e-06, "log_odds_chosen": 0.8001710772514343, "log_odds_ratio": -0.3749019205570221, "logits/chosen": -0.4591723084449768, "logits/rejected": -1.6894080638885498, "logps/chosen": -1.4038267135620117, "logps/rejected": -2.0569498538970947, "loss": 1.5196, "nll_loss": 1.4821062088012695, "rewards/accuracies": 1.0, "rewards/chosen": -0.14038267731666565, "rewards/margins": 0.06531231105327606, "rewards/rejected": -0.2056949883699417, "step": 461 }, { "epoch": 1.2733862616499827, "grad_norm": 0.17583203315734863, "learning_rate": 3.5702459425703146e-06, "log_odds_chosen": 0.8868230581283569, "log_odds_ratio": -0.3547811210155487, "logits/chosen": -0.43302029371261597, "logits/rejected": -1.9412630796432495, "logps/chosen": -1.508098840713501, "logps/rejected": -2.258622884750366, "loss": 1.6074, "nll_loss": 1.5718796253204346, "rewards/accuracies": 1.0, "rewards/chosen": -0.1508098840713501, "rewards/margins": 0.0750524029135704, "rewards/rejected": -0.2258622944355011, "step": 462 }, { "epoch": 1.2761477390403866, "grad_norm": 0.18731163442134857, "learning_rate": 3.562997705122162e-06, "log_odds_chosen": 0.7847945094108582, "log_odds_ratio": -0.37928110361099243, "logits/chosen": -0.38437145948410034, "logits/rejected": -1.6156420707702637, "logps/chosen": -1.4260213375091553, "logps/rejected": -2.070751428604126, "loss": 1.5279, "nll_loss": 1.489925503730774, "rewards/accuracies": 1.0, "rewards/chosen": -0.14260214567184448, "rewards/margins": 0.0644729733467102, "rewards/rejected": -0.20707513391971588, "step": 463 }, { "epoch": 1.2789092164307905, "grad_norm": 0.17475299537181854, "learning_rate": 3.5557385437279e-06, "log_odds_chosen": 0.838699221611023, "log_odds_ratio": -0.36731863021850586, "logits/chosen": -0.3592332601547241, "logits/rejected": -1.4209747314453125, "logps/chosen": -1.4949580430984497, "logps/rejected": -2.196079730987549, "loss": 1.5793, "nll_loss": 1.5425434112548828, "rewards/accuracies": 1.0, "rewards/chosen": -0.14949579536914825, "rewards/margins": 0.07011217623949051, "rewards/rejected": -0.21960797905921936, "step": 464 }, { "epoch": 1.2816706938211944, "grad_norm": 0.19598983228206635, "learning_rate": 3.5484685329866424e-06, "log_odds_chosen": 0.5413783192634583, "log_odds_ratio": -0.4634351134300232, "logits/chosen": -0.3708513081073761, "logits/rejected": -1.4379222393035889, "logps/chosen": -1.5153756141662598, "logps/rejected": -1.9605016708374023, "loss": 1.6065, "nll_loss": 1.5601321458816528, "rewards/accuracies": 1.0, "rewards/chosen": -0.15153755247592926, "rewards/margins": 0.044512614607810974, "rewards/rejected": -0.19605018198490143, "step": 465 }, { "epoch": 1.2844321712115983, "grad_norm": 0.18149885535240173, "learning_rate": 3.541187747608998e-06, "log_odds_chosen": 0.9340339303016663, "log_odds_ratio": -0.34465107321739197, "logits/chosen": -0.48461854457855225, "logits/rejected": -1.7797619104385376, "logps/chosen": -1.5739936828613281, "logps/rejected": -2.3727049827575684, "loss": 1.6472, "nll_loss": 1.6127065420150757, "rewards/accuracies": 1.0, "rewards/chosen": -0.15739935636520386, "rewards/margins": 0.07987111061811447, "rewards/rejected": -0.23727048933506012, "step": 466 }, { "epoch": 1.2871936486020021, "grad_norm": 0.19978304207324982, "learning_rate": 3.533896262416302e-06, "log_odds_chosen": 0.7202765941619873, "log_odds_ratio": -0.4014014005661011, "logits/chosen": -0.4147559404373169, "logits/rejected": -1.5541059970855713, "logps/chosen": -1.4875589609146118, "logps/rejected": -2.0849876403808594, "loss": 1.5897, "nll_loss": 1.5495574474334717, "rewards/accuracies": 1.0, "rewards/chosen": -0.14875589311122894, "rewards/margins": 0.05974285677075386, "rewards/rejected": -0.2084987461566925, "step": 467 }, { "epoch": 1.2899551259924058, "grad_norm": 0.21298463642597198, "learning_rate": 3.5265941523398455e-06, "log_odds_chosen": 0.8375830054283142, "log_odds_ratio": -0.36393651366233826, "logits/chosen": -0.42123696208000183, "logits/rejected": -1.5165983438491821, "logps/chosen": -1.5110690593719482, "logps/rejected": -2.2138826847076416, "loss": 1.6014, "nll_loss": 1.5650498867034912, "rewards/accuracies": 1.0, "rewards/chosen": -0.15110690891742706, "rewards/margins": 0.07028134167194366, "rewards/rejected": -0.22138825058937073, "step": 468 }, { "epoch": 1.29271660338281, "grad_norm": 0.1818659007549286, "learning_rate": 3.519281492420108e-06, "log_odds_chosen": 0.5199939012527466, "log_odds_ratio": -0.47489961981773376, "logits/chosen": -0.4219110906124115, "logits/rejected": -1.4790453910827637, "logps/chosen": -1.5349076986312866, "logps/rejected": -1.9625968933105469, "loss": 1.6236, "nll_loss": 1.5760908126831055, "rewards/accuracies": 0.875, "rewards/chosen": -0.15349076688289642, "rewards/margins": 0.042768917977809906, "rewards/rejected": -0.19625969231128693, "step": 469 }, { "epoch": 1.2954780807732136, "grad_norm": 0.19590230286121368, "learning_rate": 3.5119583578059845e-06, "log_odds_chosen": 0.6841270327568054, "log_odds_ratio": -0.43147027492523193, "logits/chosen": -0.4008702337741852, "logits/rejected": -1.6819429397583008, "logps/chosen": -1.6086608171463013, "logps/rejected": -2.1926984786987305, "loss": 1.6814, "nll_loss": 1.6382827758789062, "rewards/accuracies": 1.0, "rewards/chosen": -0.16086608171463013, "rewards/margins": 0.05840376764535904, "rewards/rejected": -0.21926987171173096, "step": 470 }, { "epoch": 1.2982395581636175, "grad_norm": 0.17711324989795685, "learning_rate": 3.504624823754014e-06, "log_odds_chosen": 0.8761448860168457, "log_odds_ratio": -0.3517749607563019, "logits/chosen": -0.41212159395217896, "logits/rejected": -1.957320213317871, "logps/chosen": -1.4138109683990479, "logps/rejected": -2.131588935852051, "loss": 1.508, "nll_loss": 1.4727998971939087, "rewards/accuracies": 1.0, "rewards/chosen": -0.14138111472129822, "rewards/margins": 0.07177779823541641, "rewards/rejected": -0.21315890550613403, "step": 471 }, { "epoch": 1.3010010355540214, "grad_norm": 0.19494958221912384, "learning_rate": 3.4972809656276047e-06, "log_odds_chosen": 0.8046627640724182, "log_odds_ratio": -0.3767206370830536, "logits/chosen": -0.36885958909988403, "logits/rejected": -1.8168095350265503, "logps/chosen": -1.5026159286499023, "logps/rejected": -2.176860809326172, "loss": 1.6015, "nll_loss": 1.5638599395751953, "rewards/accuracies": 1.0, "rewards/chosen": -0.15026158094406128, "rewards/margins": 0.06742450594902039, "rewards/rejected": -0.21768608689308167, "step": 472 }, { "epoch": 1.3037625129444252, "grad_norm": 0.1766563355922699, "learning_rate": 3.4899268588962613e-06, "log_odds_chosen": 0.8576854467391968, "log_odds_ratio": -0.3584885597229004, "logits/chosen": -0.436603844165802, "logits/rejected": -1.6729352474212646, "logps/chosen": -1.3645590543746948, "logps/rejected": -2.0580813884735107, "loss": 1.4773, "nll_loss": 1.4414961338043213, "rewards/accuracies": 1.0, "rewards/chosen": -0.13645590841770172, "rewards/margins": 0.06935223937034607, "rewards/rejected": -0.20580816268920898, "step": 473 }, { "epoch": 1.3065239903348291, "grad_norm": 0.18817074596881866, "learning_rate": 3.4825625791348093e-06, "log_odds_chosen": 0.8350617289543152, "log_odds_ratio": -0.3708168864250183, "logits/chosen": -0.3747791051864624, "logits/rejected": -1.553459882736206, "logps/chosen": -1.4124037027359009, "logps/rejected": -2.1011507511138916, "loss": 1.5145, "nll_loss": 1.477430820465088, "rewards/accuracies": 1.0, "rewards/chosen": -0.14124037325382233, "rewards/margins": 0.06887470930814743, "rewards/rejected": -0.21011507511138916, "step": 474 }, { "epoch": 1.309285467725233, "grad_norm": 0.1850864142179489, "learning_rate": 3.4751882020226174e-06, "log_odds_chosen": 0.7951568365097046, "log_odds_ratio": -0.38151904940605164, "logits/chosen": -0.41285592317581177, "logits/rejected": -1.676164984703064, "logps/chosen": -1.519218921661377, "logps/rejected": -2.190464973449707, "loss": 1.6106, "nll_loss": 1.5724141597747803, "rewards/accuracies": 1.0, "rewards/chosen": -0.15192189812660217, "rewards/margins": 0.06712460517883301, "rewards/rejected": -0.21904650330543518, "step": 475 }, { "epoch": 1.312046945115637, "grad_norm": 0.19581526517868042, "learning_rate": 3.467803803342821e-06, "log_odds_chosen": 0.7770808339118958, "log_odds_ratio": -0.38217130303382874, "logits/chosen": -0.3372090756893158, "logits/rejected": -1.6060659885406494, "logps/chosen": -1.4900453090667725, "logps/rejected": -2.1375834941864014, "loss": 1.5763, "nll_loss": 1.5380480289459229, "rewards/accuracies": 1.0, "rewards/chosen": -0.14900454878807068, "rewards/margins": 0.06475377827882767, "rewards/rejected": -0.21375833451747894, "step": 476 }, { "epoch": 1.3148084225060408, "grad_norm": 0.19793154299259186, "learning_rate": 3.4604094589815402e-06, "log_odds_chosen": 0.6940678358078003, "log_odds_ratio": -0.40892940759658813, "logits/chosen": -0.42878496646881104, "logits/rejected": -1.5839377641677856, "logps/chosen": -1.4118523597717285, "logps/rejected": -1.9752473831176758, "loss": 1.5164, "nll_loss": 1.4755195379257202, "rewards/accuracies": 1.0, "rewards/chosen": -0.1411852389574051, "rewards/margins": 0.056339483708143234, "rewards/rejected": -0.19752474129199982, "step": 477 }, { "epoch": 1.3175698998964447, "grad_norm": 0.18727529048919678, "learning_rate": 3.4530052449271044e-06, "log_odds_chosen": 0.8616414666175842, "log_odds_ratio": -0.36362677812576294, "logits/chosen": -0.4634791910648346, "logits/rejected": -1.5600810050964355, "logps/chosen": -1.4240953922271729, "logps/rejected": -2.1343352794647217, "loss": 1.521, "nll_loss": 1.4846872091293335, "rewards/accuracies": 1.0, "rewards/chosen": -0.142409548163414, "rewards/margins": 0.07102398574352264, "rewards/rejected": -0.21343351900577545, "step": 478 }, { "epoch": 1.3203313772868484, "grad_norm": 0.19879932701587677, "learning_rate": 3.4455912372692696e-06, "log_odds_chosen": 0.5470461845397949, "log_odds_ratio": -0.46493563055992126, "logits/chosen": -0.3689558207988739, "logits/rejected": -1.5988726615905762, "logps/chosen": -1.4872140884399414, "logps/rejected": -1.9357192516326904, "loss": 1.5782, "nll_loss": 1.5316959619522095, "rewards/accuracies": 1.0, "rewards/chosen": -0.14872139692306519, "rewards/margins": 0.04485052451491356, "rewards/rejected": -0.19357194006443024, "step": 479 }, { "epoch": 1.3230928546772525, "grad_norm": 0.19459912180900574, "learning_rate": 3.438167512198436e-06, "log_odds_chosen": 0.5693470239639282, "log_odds_ratio": -0.4518805146217346, "logits/chosen": -0.41612759232521057, "logits/rejected": -1.5004498958587646, "logps/chosen": -1.5251268148422241, "logps/rejected": -1.994539499282837, "loss": 1.6246, "nll_loss": 1.5794237852096558, "rewards/accuracies": 1.0, "rewards/chosen": -0.15251268446445465, "rewards/margins": 0.046941258013248444, "rewards/rejected": -0.1994539499282837, "step": 480 }, { "epoch": 1.3258543320676561, "grad_norm": 0.1880672574043274, "learning_rate": 3.4307341460048633e-06, "log_odds_chosen": 0.7143114805221558, "log_odds_ratio": -0.40364253520965576, "logits/chosen": -0.4024220108985901, "logits/rejected": -1.5502820014953613, "logps/chosen": -1.5507985353469849, "logps/rejected": -2.1468398571014404, "loss": 1.6399, "nll_loss": 1.5995802879333496, "rewards/accuracies": 1.0, "rewards/chosen": -0.15507985651493073, "rewards/margins": 0.059604134410619736, "rewards/rejected": -0.21468399465084076, "step": 481 }, { "epoch": 1.32861580945806, "grad_norm": 0.19784514605998993, "learning_rate": 3.4232912150778914e-06, "log_odds_chosen": 0.7461116909980774, "log_odds_ratio": -0.3941587507724762, "logits/chosen": -0.43998983502388, "logits/rejected": -1.7154864072799683, "logps/chosen": -1.4538161754608154, "logps/rejected": -2.0666849613189697, "loss": 1.5731, "nll_loss": 1.5336874723434448, "rewards/accuracies": 1.0, "rewards/chosen": -0.1453816145658493, "rewards/margins": 0.06128688156604767, "rewards/rejected": -0.20666849613189697, "step": 482 }, { "epoch": 1.331377286848464, "grad_norm": 0.18405503034591675, "learning_rate": 3.415838795905151e-06, "log_odds_chosen": 0.9300730228424072, "log_odds_ratio": -0.34047600626945496, "logits/chosen": -0.5350120067596436, "logits/rejected": -1.9728281497955322, "logps/chosen": -1.4228546619415283, "logps/rejected": -2.1867175102233887, "loss": 1.5168, "nll_loss": 1.4828009605407715, "rewards/accuracies": 1.0, "rewards/chosen": -0.14228546619415283, "rewards/margins": 0.07638627290725708, "rewards/rejected": -0.2186717540025711, "step": 483 }, { "epoch": 1.3341387642388678, "grad_norm": 0.19047723710536957, "learning_rate": 3.408376965071779e-06, "log_odds_chosen": 0.6742444634437561, "log_odds_ratio": -0.41640961170196533, "logits/chosen": -0.37510067224502563, "logits/rejected": -1.5923813581466675, "logps/chosen": -1.5178049802780151, "logps/rejected": -2.0749661922454834, "loss": 1.6055, "nll_loss": 1.5638338327407837, "rewards/accuracies": 1.0, "rewards/chosen": -0.15178050100803375, "rewards/margins": 0.05571611970663071, "rewards/rejected": -0.20749662816524506, "step": 484 }, { "epoch": 1.3369002416292717, "grad_norm": 0.1780618578195572, "learning_rate": 3.400905799259634e-06, "log_odds_chosen": 0.653221607208252, "log_odds_ratio": -0.4260196387767792, "logits/chosen": -0.42139092087745667, "logits/rejected": -1.648254632949829, "logps/chosen": -1.4374747276306152, "logps/rejected": -1.9686298370361328, "loss": 1.5329, "nll_loss": 1.490256667137146, "rewards/accuracies": 1.0, "rewards/chosen": -0.143747478723526, "rewards/margins": 0.05311552435159683, "rewards/rejected": -0.19686299562454224, "step": 485 }, { "epoch": 1.3396617190196756, "grad_norm": 0.1804792881011963, "learning_rate": 3.393425375246503e-06, "log_odds_chosen": 0.6369755268096924, "log_odds_ratio": -0.4262813925743103, "logits/chosen": -0.4290909469127655, "logits/rejected": -1.7290550470352173, "logps/chosen": -1.4642930030822754, "logps/rejected": -1.98381507396698, "loss": 1.5519, "nll_loss": 1.509236454963684, "rewards/accuracies": 1.0, "rewards/chosen": -0.14642930030822754, "rewards/margins": 0.05195220559835434, "rewards/rejected": -0.1983814835548401, "step": 486 }, { "epoch": 1.3424231964100795, "grad_norm": 0.19493341445922852, "learning_rate": 3.3859357699053165e-06, "log_odds_chosen": 0.7284951210021973, "log_odds_ratio": -0.40332838892936707, "logits/chosen": -0.40109413862228394, "logits/rejected": -1.4352595806121826, "logps/chosen": -1.402358055114746, "logps/rejected": -1.9987090826034546, "loss": 1.4967, "nll_loss": 1.456404447555542, "rewards/accuracies": 1.0, "rewards/chosen": -0.1402358114719391, "rewards/margins": 0.059635113924741745, "rewards/rejected": -0.19987091422080994, "step": 487 }, { "epoch": 1.3451846738004831, "grad_norm": 0.19050319492816925, "learning_rate": 3.3784370602033572e-06, "log_odds_chosen": 0.8729457259178162, "log_odds_ratio": -0.35851162672042847, "logits/chosen": -0.4902225732803345, "logits/rejected": -1.7542881965637207, "logps/chosen": -1.4271174669265747, "logps/rejected": -2.151813507080078, "loss": 1.5277, "nll_loss": 1.4917997121810913, "rewards/accuracies": 1.0, "rewards/chosen": -0.14271175861358643, "rewards/margins": 0.07246959954500198, "rewards/rejected": -0.2151813805103302, "step": 488 }, { "epoch": 1.3479461511908872, "grad_norm": 0.18820421397686005, "learning_rate": 3.3709293232014705e-06, "log_odds_chosen": 0.73007732629776, "log_odds_ratio": -0.40136751532554626, "logits/chosen": -0.44850000739097595, "logits/rejected": -2.0052490234375, "logps/chosen": -1.4974483251571655, "logps/rejected": -2.1092348098754883, "loss": 1.5884, "nll_loss": 1.5482368469238281, "rewards/accuracies": 1.0, "rewards/chosen": -0.14974482357501984, "rewards/margins": 0.06117865815758705, "rewards/rejected": -0.21092349290847778, "step": 489 }, { "epoch": 1.350707628581291, "grad_norm": 0.18799753487110138, "learning_rate": 3.3634126360532694e-06, "log_odds_chosen": 0.8482156991958618, "log_odds_ratio": -0.36712414026260376, "logits/chosen": -0.32226884365081787, "logits/rejected": -1.778417944908142, "logps/chosen": -1.577460765838623, "logps/rejected": -2.302645206451416, "loss": 1.6644, "nll_loss": 1.6277295351028442, "rewards/accuracies": 1.0, "rewards/chosen": -0.1577460616827011, "rewards/margins": 0.07251846790313721, "rewards/rejected": -0.23026452958583832, "step": 490 }, { "epoch": 1.3534691059716948, "grad_norm": 0.20365531742572784, "learning_rate": 3.355887076004345e-06, "log_odds_chosen": 0.7709956765174866, "log_odds_ratio": -0.383152574300766, "logits/chosen": -0.5297287702560425, "logits/rejected": -1.6290040016174316, "logps/chosen": -1.4873484373092651, "logps/rejected": -2.12845778465271, "loss": 1.5899, "nll_loss": 1.551561713218689, "rewards/accuracies": 1.0, "rewards/chosen": -0.14873485267162323, "rewards/margins": 0.06411094218492508, "rewards/rejected": -0.21284577250480652, "step": 491 }, { "epoch": 1.3562305833620987, "grad_norm": 0.18781210482120514, "learning_rate": 3.3483527203914694e-06, "log_odds_chosen": 0.8746954798698425, "log_odds_ratio": -0.3559949994087219, "logits/chosen": -0.3871724307537079, "logits/rejected": -1.6837961673736572, "logps/chosen": -1.5181317329406738, "logps/rejected": -2.254382610321045, "loss": 1.613, "nll_loss": 1.5773670673370361, "rewards/accuracies": 1.0, "rewards/chosen": -0.15181316435337067, "rewards/margins": 0.0736251100897789, "rewards/rejected": -0.22543828189373016, "step": 492 }, { "epoch": 1.3589920607525026, "grad_norm": 0.18413974344730377, "learning_rate": 3.340809646641805e-06, "log_odds_chosen": 0.6966791749000549, "log_odds_ratio": -0.4073706269264221, "logits/chosen": -0.32745176553726196, "logits/rejected": -1.734323501586914, "logps/chosen": -1.5864430665969849, "logps/rejected": -2.1757755279541016, "loss": 1.6634, "nll_loss": 1.6226191520690918, "rewards/accuracies": 1.0, "rewards/chosen": -0.15864431858062744, "rewards/margins": 0.05893322825431824, "rewards/rejected": -0.21757756173610687, "step": 493 }, { "epoch": 1.3617535381429065, "grad_norm": 0.1795225441455841, "learning_rate": 3.333257932272105e-06, "log_odds_chosen": 0.7916382551193237, "log_odds_ratio": -0.3854186236858368, "logits/chosen": -0.3654117286205292, "logits/rejected": -1.731317400932312, "logps/chosen": -1.4387598037719727, "logps/rejected": -2.0977869033813477, "loss": 1.5401, "nll_loss": 1.5015188455581665, "rewards/accuracies": 1.0, "rewards/chosen": -0.14387598633766174, "rewards/margins": 0.0659027174115181, "rewards/rejected": -0.20977871119976044, "step": 494 }, { "epoch": 1.3645150155333103, "grad_norm": 0.19810736179351807, "learning_rate": 3.3256976548879183e-06, "log_odds_chosen": 0.7499178647994995, "log_odds_ratio": -0.391695499420166, "logits/chosen": -0.4586635231971741, "logits/rejected": -1.5185637474060059, "logps/chosen": -1.494145393371582, "logps/rejected": -2.1175270080566406, "loss": 1.591, "nll_loss": 1.551873803138733, "rewards/accuracies": 1.0, "rewards/chosen": -0.1494145393371582, "rewards/margins": 0.06233816593885422, "rewards/rejected": -0.21175269782543182, "step": 495 }, { "epoch": 1.3672764929237142, "grad_norm": 0.17777174711227417, "learning_rate": 3.3181288921827925e-06, "log_odds_chosen": 0.8323229551315308, "log_odds_ratio": -0.37165793776512146, "logits/chosen": -0.3486481308937073, "logits/rejected": -1.4729896783828735, "logps/chosen": -1.3969337940216064, "logps/rejected": -2.0784289836883545, "loss": 1.5068, "nll_loss": 1.4696803092956543, "rewards/accuracies": 1.0, "rewards/chosen": -0.13969337940216064, "rewards/margins": 0.06814949959516525, "rewards/rejected": -0.2078428864479065, "step": 496 }, { "epoch": 1.3700379703141181, "grad_norm": 0.18033576011657715, "learning_rate": 3.310551721937475e-06, "log_odds_chosen": 0.7983404994010925, "log_odds_ratio": -0.3745984435081482, "logits/chosen": -0.3823201656341553, "logits/rejected": -1.5644809007644653, "logps/chosen": -1.4277198314666748, "logps/rejected": -2.0840868949890137, "loss": 1.5007, "nll_loss": 1.4631915092468262, "rewards/accuracies": 1.0, "rewards/chosen": -0.14277197420597076, "rewards/margins": 0.06563669443130493, "rewards/rejected": -0.2084086835384369, "step": 497 }, { "epoch": 1.372799447704522, "grad_norm": 0.19242247939109802, "learning_rate": 3.3029662220191146e-06, "log_odds_chosen": 0.8101353645324707, "log_odds_ratio": -0.37228965759277344, "logits/chosen": -0.4509270191192627, "logits/rejected": -1.8056501150131226, "logps/chosen": -1.3381811380386353, "logps/rejected": -1.9927046298980713, "loss": 1.4395, "nll_loss": 1.4023054838180542, "rewards/accuracies": 1.0, "rewards/chosen": -0.133818119764328, "rewards/margins": 0.06545236706733704, "rewards/rejected": -0.19927047193050385, "step": 498 }, { "epoch": 1.3755609250949257, "grad_norm": 0.1753472536802292, "learning_rate": 3.2953724703804572e-06, "log_odds_chosen": 0.892001211643219, "log_odds_ratio": -0.35052934288978577, "logits/chosen": -0.4044141173362732, "logits/rejected": -1.8937467336654663, "logps/chosen": -1.407699704170227, "logps/rejected": -2.1476714611053467, "loss": 1.4972, "nll_loss": 1.4621860980987549, "rewards/accuracies": 1.0, "rewards/chosen": -0.14076997339725494, "rewards/margins": 0.07399718463420868, "rewards/rejected": -0.21476714313030243, "step": 499 }, { "epoch": 1.3783224024853298, "grad_norm": 0.1958237588405609, "learning_rate": 3.2877705450590525e-06, "log_odds_chosen": 0.7319251298904419, "log_odds_ratio": -0.396656334400177, "logits/chosen": -0.3846544325351715, "logits/rejected": -1.6542454957962036, "logps/chosen": -1.4686310291290283, "logps/rejected": -2.0715951919555664, "loss": 1.5948, "nll_loss": 1.5551836490631104, "rewards/accuracies": 1.0, "rewards/chosen": -0.14686310291290283, "rewards/margins": 0.06029640883207321, "rewards/rejected": -0.20715951919555664, "step": 500 } ], "logging_steps": 1, "max_steps": 1089, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }