{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 1270, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015760441292356187, "grad_norm": 0.711133599281311, "learning_rate": 0.0, "log_odds_chosen": 0.38395223021507263, "log_odds_ratio": -0.5362357497215271, "logits/chosen": -0.6395432353019714, "logits/rejected": -0.1508907973766327, "logps/chosen": -1.973930835723877, "logps/rejected": -2.3123726844787598, "loss": 2.2406, "nll_loss": 2.1870110034942627, "rewards/accuracies": 0.875, "rewards/chosen": -0.19739308953285217, "rewards/margins": 0.03384416550397873, "rewards/rejected": -0.2312372624874115, "step": 1 }, { "epoch": 0.0031520882584712374, "grad_norm": 0.6591355204582214, "learning_rate": 3.1496062992125985e-08, "log_odds_chosen": 0.39076143503189087, "log_odds_ratio": -0.5237792730331421, "logits/chosen": -0.6357220411300659, "logits/rejected": -0.09711451828479767, "logps/chosen": -1.8780229091644287, "logps/rejected": -2.2161149978637695, "loss": 2.1349, "nll_loss": 2.082494020462036, "rewards/accuracies": 1.0, "rewards/chosen": -0.1878022849559784, "rewards/margins": 0.03380918130278587, "rewards/rejected": -0.22161146998405457, "step": 2 }, { "epoch": 0.004728132387706856, "grad_norm": 0.74098140001297, "learning_rate": 6.299212598425197e-08, "log_odds_chosen": 0.5195883512496948, "log_odds_ratio": -0.4742986857891083, "logits/chosen": -0.7715582251548767, "logits/rejected": -0.26645177602767944, "logps/chosen": -1.9943277835845947, "logps/rejected": -2.4578633308410645, "loss": 2.2471, "nll_loss": 2.199704647064209, "rewards/accuracies": 1.0, "rewards/chosen": -0.19943277537822723, "rewards/margins": 0.046353571116924286, "rewards/rejected": -0.24578633904457092, "step": 3 }, { "epoch": 0.006304176516942475, "grad_norm": 0.6662443280220032, "learning_rate": 9.448818897637795e-08, "log_odds_chosen": 0.55958491563797, "log_odds_ratio": -0.46615538001060486, "logits/chosen": -0.5838385820388794, "logits/rejected": -0.1573001742362976, "logps/chosen": -2.007845163345337, "logps/rejected": -2.5115368366241455, "loss": 2.2568, "nll_loss": 2.210216760635376, "rewards/accuracies": 0.875, "rewards/chosen": -0.20078451931476593, "rewards/margins": 0.05036917328834534, "rewards/rejected": -0.2511536777019501, "step": 4 }, { "epoch": 0.007880220646178092, "grad_norm": 0.7787235379219055, "learning_rate": 1.2598425196850394e-07, "log_odds_chosen": 0.705410361289978, "log_odds_ratio": -0.42067474126815796, "logits/chosen": -0.7229734063148499, "logits/rejected": -0.27978262305259705, "logps/chosen": -1.9303616285324097, "logps/rejected": -2.555299758911133, "loss": 2.1734, "nll_loss": 2.1313021183013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.19303615391254425, "rewards/margins": 0.06249381601810455, "rewards/rejected": -0.2555299699306488, "step": 5 }, { "epoch": 0.009456264775413711, "grad_norm": 0.6482278108596802, "learning_rate": 1.5748031496062992e-07, "log_odds_chosen": 0.40409255027770996, "log_odds_ratio": -0.5151315331459045, "logits/chosen": -0.5276838541030884, "logits/rejected": -0.05200649052858353, "logps/chosen": -1.8561073541641235, "logps/rejected": -2.208005905151367, "loss": 2.1449, "nll_loss": 2.0933837890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.18561072647571564, "rewards/margins": 0.03518984466791153, "rewards/rejected": -0.22080056369304657, "step": 6 }, { "epoch": 0.01103230890464933, "grad_norm": 0.6663646697998047, "learning_rate": 1.889763779527559e-07, "log_odds_chosen": 0.546273410320282, "log_odds_ratio": -0.46661460399627686, "logits/chosen": -0.5808312296867371, "logits/rejected": -0.19844059646129608, "logps/chosen": -1.9260658025741577, "logps/rejected": -2.4119839668273926, "loss": 2.1774, "nll_loss": 2.1307828426361084, "rewards/accuracies": 1.0, "rewards/chosen": -0.1926065981388092, "rewards/margins": 0.04859180375933647, "rewards/rejected": -0.24119840562343597, "step": 7 }, { "epoch": 0.01260835303388495, "grad_norm": 0.782015323638916, "learning_rate": 2.2047244094488187e-07, "log_odds_chosen": 0.8110038638114929, "log_odds_ratio": -0.3770079016685486, "logits/chosen": -0.6198790669441223, "logits/rejected": -0.24129487574100494, "logps/chosen": -2.014923572540283, "logps/rejected": -2.744842290878296, "loss": 2.2715, "nll_loss": 2.2338366508483887, "rewards/accuracies": 1.0, "rewards/chosen": -0.20149235427379608, "rewards/margins": 0.07299190014600754, "rewards/rejected": -0.274484246969223, "step": 8 }, { "epoch": 0.014184397163120567, "grad_norm": 0.6285175085067749, "learning_rate": 2.519685039370079e-07, "log_odds_chosen": 0.4325932562351227, "log_odds_ratio": -0.5066515207290649, "logits/chosen": -0.5963254570960999, "logits/rejected": -0.07179142534732819, "logps/chosen": -1.9378471374511719, "logps/rejected": -2.31904673576355, "loss": 2.1726, "nll_loss": 2.1219170093536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.1937847137451172, "rewards/margins": 0.038119956851005554, "rewards/rejected": -0.23190467059612274, "step": 9 }, { "epoch": 0.015760441292356184, "grad_norm": 0.7205284833908081, "learning_rate": 2.8346456692913386e-07, "log_odds_chosen": 0.4166225790977478, "log_odds_ratio": -0.5110099911689758, "logits/chosen": -0.6733875274658203, "logits/rejected": -0.2730729877948761, "logps/chosen": -1.96082603931427, "logps/rejected": -2.3293237686157227, "loss": 2.21, "nll_loss": 2.158905267715454, "rewards/accuracies": 1.0, "rewards/chosen": -0.19608259201049805, "rewards/margins": 0.03684981167316437, "rewards/rejected": -0.2329324334859848, "step": 10 }, { "epoch": 0.017336485421591805, "grad_norm": 0.7679106593132019, "learning_rate": 3.1496062992125984e-07, "log_odds_chosen": 0.48609045147895813, "log_odds_ratio": -0.48330000042915344, "logits/chosen": -0.5869827270507812, "logits/rejected": -0.24110490083694458, "logps/chosen": -2.035675048828125, "logps/rejected": -2.4683971405029297, "loss": 2.2835, "nll_loss": 2.2351536750793457, "rewards/accuracies": 1.0, "rewards/chosen": -0.2035675048828125, "rewards/margins": 0.043272241950035095, "rewards/rejected": -0.2468397617340088, "step": 11 }, { "epoch": 0.018912529550827423, "grad_norm": 0.8531121015548706, "learning_rate": 3.464566929133858e-07, "log_odds_chosen": 0.5449747443199158, "log_odds_ratio": -0.461398184299469, "logits/chosen": -0.7053269147872925, "logits/rejected": -0.12370388209819794, "logps/chosen": -2.076099157333374, "logps/rejected": -2.565109968185425, "loss": 2.3324, "nll_loss": 2.286276340484619, "rewards/accuracies": 1.0, "rewards/chosen": -0.20760990679264069, "rewards/margins": 0.04890113323926926, "rewards/rejected": -0.25651103258132935, "step": 12 }, { "epoch": 0.02048857368006304, "grad_norm": 0.8578523397445679, "learning_rate": 3.779527559055118e-07, "log_odds_chosen": 0.5090824365615845, "log_odds_ratio": -0.47858256101608276, "logits/chosen": -0.7868019342422485, "logits/rejected": -0.07148027420043945, "logps/chosen": -1.9688328504562378, "logps/rejected": -2.4192914962768555, "loss": 2.2258, "nll_loss": 2.1779398918151855, "rewards/accuracies": 1.0, "rewards/chosen": -0.19688329100608826, "rewards/margins": 0.045045845210552216, "rewards/rejected": -0.24192912876605988, "step": 13 }, { "epoch": 0.02206461780929866, "grad_norm": 0.675309956073761, "learning_rate": 4.0944881889763777e-07, "log_odds_chosen": 0.3657957911491394, "log_odds_ratio": -0.5386834144592285, "logits/chosen": -0.6320536136627197, "logits/rejected": -0.3780551254749298, "logps/chosen": -1.9759397506713867, "logps/rejected": -2.300447463989258, "loss": 2.2257, "nll_loss": 2.171861171722412, "rewards/accuracies": 1.0, "rewards/chosen": -0.19759398698806763, "rewards/margins": 0.03245077282190323, "rewards/rejected": -0.23004476726055145, "step": 14 }, { "epoch": 0.02364066193853428, "grad_norm": 0.7733155488967896, "learning_rate": 4.4094488188976375e-07, "log_odds_chosen": 0.41122347116470337, "log_odds_ratio": -0.5111778378486633, "logits/chosen": -0.7448755502700806, "logits/rejected": -0.08966228365898132, "logps/chosen": -1.9629881381988525, "logps/rejected": -2.32719087600708, "loss": 2.2273, "nll_loss": 2.176145076751709, "rewards/accuracies": 1.0, "rewards/chosen": -0.19629880785942078, "rewards/margins": 0.03642028942704201, "rewards/rejected": -0.23271909356117249, "step": 15 }, { "epoch": 0.0252167060677699, "grad_norm": 0.7707588076591492, "learning_rate": 4.7244094488188973e-07, "log_odds_chosen": 0.4248766303062439, "log_odds_ratio": -0.5078074336051941, "logits/chosen": -0.6721003651618958, "logits/rejected": -0.19612114131450653, "logps/chosen": -1.9606484174728394, "logps/rejected": -2.336439609527588, "loss": 2.22, "nll_loss": 2.1692113876342773, "rewards/accuracies": 1.0, "rewards/chosen": -0.19606485962867737, "rewards/margins": 0.03757911175489426, "rewards/rejected": -0.23364394903182983, "step": 16 }, { "epoch": 0.026792750197005517, "grad_norm": 0.6388130784034729, "learning_rate": 5.039370078740158e-07, "log_odds_chosen": 0.6333746314048767, "log_odds_ratio": -0.43810510635375977, "logits/chosen": -0.5735284686088562, "logits/rejected": -0.31118011474609375, "logps/chosen": -1.9192208051681519, "logps/rejected": -2.4831156730651855, "loss": 2.1769, "nll_loss": 2.133085012435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.19192209839820862, "rewards/margins": 0.056389469653367996, "rewards/rejected": -0.24831153452396393, "step": 17 }, { "epoch": 0.028368794326241134, "grad_norm": 0.7463440299034119, "learning_rate": 5.354330708661418e-07, "log_odds_chosen": 0.3546241819858551, "log_odds_ratio": -0.544340193271637, "logits/chosen": -0.6276163458824158, "logits/rejected": 0.05960750952363014, "logps/chosen": -1.9554523229599, "logps/rejected": -2.2666845321655273, "loss": 2.2226, "nll_loss": 2.168125629425049, "rewards/accuracies": 0.875, "rewards/chosen": -0.1955452263355255, "rewards/margins": 0.031123224645853043, "rewards/rejected": -0.22666846215724945, "step": 18 }, { "epoch": 0.029944838455476755, "grad_norm": 0.6933729648590088, "learning_rate": 5.669291338582677e-07, "log_odds_chosen": 0.6329823732376099, "log_odds_ratio": -0.4341467022895813, "logits/chosen": -0.753471851348877, "logits/rejected": -0.27794983983039856, "logps/chosen": -1.8416026830673218, "logps/rejected": -2.3966176509857178, "loss": 2.115, "nll_loss": 2.07161283493042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1841602772474289, "rewards/margins": 0.05550149455666542, "rewards/rejected": -0.23966176807880402, "step": 19 }, { "epoch": 0.03152088258471237, "grad_norm": 0.7374937534332275, "learning_rate": 5.984251968503937e-07, "log_odds_chosen": 0.5668491125106812, "log_odds_ratio": -0.46694958209991455, "logits/chosen": -0.562913179397583, "logits/rejected": -0.25034084916114807, "logps/chosen": -1.995664119720459, "logps/rejected": -2.5026793479919434, "loss": 2.262, "nll_loss": 2.215284824371338, "rewards/accuracies": 0.875, "rewards/chosen": -0.19956642389297485, "rewards/margins": 0.05070152133703232, "rewards/rejected": -0.25026795268058777, "step": 20 }, { "epoch": 0.03309692671394799, "grad_norm": 0.75541090965271, "learning_rate": 6.299212598425197e-07, "log_odds_chosen": 0.35937565565109253, "log_odds_ratio": -0.5362535715103149, "logits/chosen": -0.65813809633255, "logits/rejected": -0.1279284954071045, "logps/chosen": -2.04636812210083, "logps/rejected": -2.3680672645568848, "loss": 2.3058, "nll_loss": 2.2522225379943848, "rewards/accuracies": 0.75, "rewards/chosen": -0.20463679730892181, "rewards/margins": 0.032169945538043976, "rewards/rejected": -0.2368067502975464, "step": 21 }, { "epoch": 0.03467297084318361, "grad_norm": 0.7607430219650269, "learning_rate": 6.614173228346457e-07, "log_odds_chosen": 0.36671358346939087, "log_odds_ratio": -0.5375123620033264, "logits/chosen": -0.7311565279960632, "logits/rejected": -0.08119023591279984, "logps/chosen": -1.9978885650634766, "logps/rejected": -2.3232674598693848, "loss": 2.2612, "nll_loss": 2.207477331161499, "rewards/accuracies": 0.875, "rewards/chosen": -0.1997888684272766, "rewards/margins": 0.03253789618611336, "rewards/rejected": -0.23232676088809967, "step": 22 }, { "epoch": 0.036249014972419225, "grad_norm": 0.7234435081481934, "learning_rate": 6.929133858267716e-07, "log_odds_chosen": 0.39767685532569885, "log_odds_ratio": -0.5170127153396606, "logits/chosen": -0.7005606293678284, "logits/rejected": -0.19814857840538025, "logps/chosen": -2.0349388122558594, "logps/rejected": -2.389338254928589, "loss": 2.2728, "nll_loss": 2.221050977706909, "rewards/accuracies": 1.0, "rewards/chosen": -0.2034938931465149, "rewards/margins": 0.03543993830680847, "rewards/rejected": -0.23893383145332336, "step": 23 }, { "epoch": 0.037825059101654845, "grad_norm": 0.7151917815208435, "learning_rate": 7.244094488188977e-07, "log_odds_chosen": 0.4232009947299957, "log_odds_ratio": -0.5118768215179443, "logits/chosen": -0.42520439624786377, "logits/rejected": -0.13911336660385132, "logps/chosen": -1.974929690361023, "logps/rejected": -2.349339723587036, "loss": 2.2336, "nll_loss": 2.1824424266815186, "rewards/accuracies": 1.0, "rewards/chosen": -0.19749295711517334, "rewards/margins": 0.03744099289178848, "rewards/rejected": -0.23493395745754242, "step": 24 }, { "epoch": 0.039401103230890466, "grad_norm": 0.7771602272987366, "learning_rate": 7.559055118110236e-07, "log_odds_chosen": 0.3602335751056671, "log_odds_ratio": -0.5373751521110535, "logits/chosen": -0.7194356918334961, "logits/rejected": -0.2223992645740509, "logps/chosen": -2.008389711380005, "logps/rejected": -2.332648277282715, "loss": 2.2619, "nll_loss": 2.208117961883545, "rewards/accuracies": 0.875, "rewards/chosen": -0.20083898305892944, "rewards/margins": 0.03242585435509682, "rewards/rejected": -0.23326483368873596, "step": 25 }, { "epoch": 0.04097714736012608, "grad_norm": 0.7429983615875244, "learning_rate": 7.874015748031496e-07, "log_odds_chosen": 0.46550512313842773, "log_odds_ratio": -0.4939187467098236, "logits/chosen": -0.8255457282066345, "logits/rejected": -0.3358853757381439, "logps/chosen": -1.954911708831787, "logps/rejected": -2.3684439659118652, "loss": 2.2146, "nll_loss": 2.1652259826660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.19549117982387543, "rewards/margins": 0.041353195905685425, "rewards/rejected": -0.23684436082839966, "step": 26 }, { "epoch": 0.0425531914893617, "grad_norm": 0.7892085313796997, "learning_rate": 8.188976377952755e-07, "log_odds_chosen": 0.7098885774612427, "log_odds_ratio": -0.41376522183418274, "logits/chosen": -0.6670718789100647, "logits/rejected": -0.18330247700214386, "logps/chosen": -2.0041563510894775, "logps/rejected": -2.642397165298462, "loss": 2.2622, "nll_loss": 2.2208266258239746, "rewards/accuracies": 1.0, "rewards/chosen": -0.20041564106941223, "rewards/margins": 0.06382407993078232, "rewards/rejected": -0.26423972845077515, "step": 27 }, { "epoch": 0.04412923561859732, "grad_norm": 0.7302277684211731, "learning_rate": 8.503937007874016e-07, "log_odds_chosen": 0.4039226770401001, "log_odds_ratio": -0.5200110673904419, "logits/chosen": -0.6893348097801208, "logits/rejected": -0.09423862397670746, "logps/chosen": -1.9387571811676025, "logps/rejected": -2.295835494995117, "loss": 2.2167, "nll_loss": 2.1646535396575928, "rewards/accuracies": 0.875, "rewards/chosen": -0.19387571513652802, "rewards/margins": 0.03570783883333206, "rewards/rejected": -0.22958354651927948, "step": 28 }, { "epoch": 0.045705279747832936, "grad_norm": 0.6707211136817932, "learning_rate": 8.818897637795275e-07, "log_odds_chosen": 0.35679179430007935, "log_odds_ratio": -0.5352319478988647, "logits/chosen": -0.5461763739585876, "logits/rejected": -0.14010290801525116, "logps/chosen": -2.050537109375, "logps/rejected": -2.372122287750244, "loss": 2.3054, "nll_loss": 2.2518763542175293, "rewards/accuracies": 1.0, "rewards/chosen": -0.20505373179912567, "rewards/margins": 0.032158493995666504, "rewards/rejected": -0.23721222579479218, "step": 29 }, { "epoch": 0.04728132387706856, "grad_norm": 0.8279486894607544, "learning_rate": 9.133858267716535e-07, "log_odds_chosen": 0.5706252455711365, "log_odds_ratio": -0.45796477794647217, "logits/chosen": -0.6770057678222656, "logits/rejected": -0.2806178331375122, "logps/chosen": -2.0110249519348145, "logps/rejected": -2.521954298019409, "loss": 2.3011, "nll_loss": 2.2553153038024902, "rewards/accuracies": 0.875, "rewards/chosen": -0.20110251009464264, "rewards/margins": 0.05109292268753052, "rewards/rejected": -0.25219541788101196, "step": 30 }, { "epoch": 0.04885736800630418, "grad_norm": 0.7405450344085693, "learning_rate": 9.448818897637795e-07, "log_odds_chosen": 0.6274739503860474, "log_odds_ratio": -0.4411022663116455, "logits/chosen": -0.6367984414100647, "logits/rejected": 0.004103410989046097, "logps/chosen": -2.0546810626983643, "logps/rejected": -2.6223583221435547, "loss": 2.3002, "nll_loss": 2.2560579776763916, "rewards/accuracies": 1.0, "rewards/chosen": -0.20546811819076538, "rewards/margins": 0.056767746806144714, "rewards/rejected": -0.2622358202934265, "step": 31 }, { "epoch": 0.0504334121355398, "grad_norm": 0.638395369052887, "learning_rate": 9.763779527559055e-07, "log_odds_chosen": 0.6788234114646912, "log_odds_ratio": -0.42549028992652893, "logits/chosen": -0.570993185043335, "logits/rejected": -0.3356379568576813, "logps/chosen": -1.9266096353530884, "logps/rejected": -2.533726692199707, "loss": 2.1793, "nll_loss": 2.136709690093994, "rewards/accuracies": 1.0, "rewards/chosen": -0.19266097247600555, "rewards/margins": 0.060711681842803955, "rewards/rejected": -0.2533726394176483, "step": 32 }, { "epoch": 0.05200945626477541, "grad_norm": 0.6255950331687927, "learning_rate": 1.0078740157480315e-06, "log_odds_chosen": 0.5599108338356018, "log_odds_ratio": -0.4542999565601349, "logits/chosen": -0.5288444757461548, "logits/rejected": -0.19333161413669586, "logps/chosen": -1.9585925340652466, "logps/rejected": -2.4559030532836914, "loss": 2.2007, "nll_loss": 2.155308961868286, "rewards/accuracies": 1.0, "rewards/chosen": -0.19585925340652466, "rewards/margins": 0.04973103851079941, "rewards/rejected": -0.24559029936790466, "step": 33 }, { "epoch": 0.05358550039401103, "grad_norm": 0.6921458840370178, "learning_rate": 1.0393700787401573e-06, "log_odds_chosen": 0.3519511818885803, "log_odds_ratio": -0.5428643226623535, "logits/chosen": -0.536325216293335, "logits/rejected": -0.2403833270072937, "logps/chosen": -1.9013547897338867, "logps/rejected": -2.205166816711426, "loss": 2.1703, "nll_loss": 2.116063117980957, "rewards/accuracies": 1.0, "rewards/chosen": -0.19013547897338867, "rewards/margins": 0.030381204560399055, "rewards/rejected": -0.22051668167114258, "step": 34 }, { "epoch": 0.055161544523246654, "grad_norm": 0.8423165082931519, "learning_rate": 1.0708661417322836e-06, "log_odds_chosen": 0.42774710059165955, "log_odds_ratio": -0.5038249492645264, "logits/chosen": -0.7048341631889343, "logits/rejected": 0.0643918514251709, "logps/chosen": -2.080242872238159, "logps/rejected": -2.46270489692688, "loss": 2.3413, "nll_loss": 2.2909011840820312, "rewards/accuracies": 1.0, "rewards/chosen": -0.2080242931842804, "rewards/margins": 0.03824619948863983, "rewards/rejected": -0.24627049267292023, "step": 35 }, { "epoch": 0.05673758865248227, "grad_norm": 0.7374855875968933, "learning_rate": 1.1023622047244094e-06, "log_odds_chosen": 0.5830976963043213, "log_odds_ratio": -0.44582459330558777, "logits/chosen": -0.8039961457252502, "logits/rejected": -0.06987367570400238, "logps/chosen": -1.92990243434906, "logps/rejected": -2.4467267990112305, "loss": 2.1837, "nll_loss": 2.139101266860962, "rewards/accuracies": 1.0, "rewards/chosen": -0.192990243434906, "rewards/margins": 0.05168245732784271, "rewards/rejected": -0.2446727156639099, "step": 36 }, { "epoch": 0.05831363278171789, "grad_norm": 0.6995700001716614, "learning_rate": 1.1338582677165354e-06, "log_odds_chosen": 0.4410613477230072, "log_odds_ratio": -0.5076866149902344, "logits/chosen": -0.6389314532279968, "logits/rejected": -0.06876173615455627, "logps/chosen": -1.9238637685775757, "logps/rejected": -2.3091113567352295, "loss": 2.1937, "nll_loss": 2.1429271697998047, "rewards/accuracies": 0.875, "rewards/chosen": -0.19238635897636414, "rewards/margins": 0.03852475434541702, "rewards/rejected": -0.23091113567352295, "step": 37 }, { "epoch": 0.05988967691095351, "grad_norm": 0.7543565034866333, "learning_rate": 1.1653543307086612e-06, "log_odds_chosen": 0.3667663633823395, "log_odds_ratio": -0.5343418121337891, "logits/chosen": -0.7570109367370605, "logits/rejected": -0.23115745186805725, "logps/chosen": -1.921524167060852, "logps/rejected": -2.239858865737915, "loss": 2.1977, "nll_loss": 2.1442925930023193, "rewards/accuracies": 0.875, "rewards/chosen": -0.19215241074562073, "rewards/margins": 0.03183349221944809, "rewards/rejected": -0.22398591041564941, "step": 38 }, { "epoch": 0.061465721040189124, "grad_norm": 0.7876814007759094, "learning_rate": 1.1968503937007875e-06, "log_odds_chosen": 0.2030404657125473, "log_odds_ratio": -0.6065970659255981, "logits/chosen": -0.6633272171020508, "logits/rejected": -0.040516383945941925, "logps/chosen": -2.038562774658203, "logps/rejected": -2.220552444458008, "loss": 2.3176, "nll_loss": 2.256978988647461, "rewards/accuracies": 0.75, "rewards/chosen": -0.20385627448558807, "rewards/margins": 0.01819896697998047, "rewards/rejected": -0.22205524146556854, "step": 39 }, { "epoch": 0.06304176516942474, "grad_norm": 0.7621078491210938, "learning_rate": 1.2283464566929133e-06, "log_odds_chosen": 0.37610867619514465, "log_odds_ratio": -0.5284633040428162, "logits/chosen": -0.7413150668144226, "logits/rejected": -0.18302536010742188, "logps/chosen": -1.9204038381576538, "logps/rejected": -2.2485194206237793, "loss": 2.1889, "nll_loss": 2.1360361576080322, "rewards/accuracies": 1.0, "rewards/chosen": -0.19204038381576538, "rewards/margins": 0.03281155228614807, "rewards/rejected": -0.22485193610191345, "step": 40 }, { "epoch": 0.06461780929866036, "grad_norm": 0.6990500688552856, "learning_rate": 1.2598425196850393e-06, "log_odds_chosen": 0.27543094754219055, "log_odds_ratio": -0.5724075436592102, "logits/chosen": -0.6147856712341309, "logits/rejected": -0.09826792776584625, "logps/chosen": -1.9605408906936646, "logps/rejected": -2.205174446105957, "loss": 2.2093, "nll_loss": 2.152068614959717, "rewards/accuracies": 0.75, "rewards/chosen": -0.1960541009902954, "rewards/margins": 0.024463361129164696, "rewards/rejected": -0.22051745653152466, "step": 41 }, { "epoch": 0.06619385342789598, "grad_norm": 0.6998929381370544, "learning_rate": 1.2913385826771652e-06, "log_odds_chosen": 0.3089278042316437, "log_odds_ratio": -0.5570518374443054, "logits/chosen": -0.6978001594543457, "logits/rejected": -0.15154145658016205, "logps/chosen": -1.960097074508667, "logps/rejected": -2.2320406436920166, "loss": 2.2221, "nll_loss": 2.166372537612915, "rewards/accuracies": 0.875, "rewards/chosen": -0.19600971043109894, "rewards/margins": 0.02719433605670929, "rewards/rejected": -0.22320404648780823, "step": 42 }, { "epoch": 0.0677698975571316, "grad_norm": 0.6787785291671753, "learning_rate": 1.3228346456692914e-06, "log_odds_chosen": 0.16384947299957275, "log_odds_ratio": -0.6235592365264893, "logits/chosen": -0.5829145312309265, "logits/rejected": -0.14386498928070068, "logps/chosen": -1.978913426399231, "logps/rejected": -2.123264789581299, "loss": 2.2412, "nll_loss": 2.1788277626037598, "rewards/accuracies": 0.625, "rewards/chosen": -0.19789133965969086, "rewards/margins": 0.014435119926929474, "rewards/rejected": -0.21232648193836212, "step": 43 }, { "epoch": 0.06934594168636722, "grad_norm": 0.7161246538162231, "learning_rate": 1.3543307086614172e-06, "log_odds_chosen": 0.5076345801353455, "log_odds_ratio": -0.4807063937187195, "logits/chosen": -0.7080238461494446, "logits/rejected": 0.010522328317165375, "logps/chosen": -1.9329512119293213, "logps/rejected": -2.384503126144409, "loss": 2.1908, "nll_loss": 2.142735004425049, "rewards/accuracies": 1.0, "rewards/chosen": -0.19329513609409332, "rewards/margins": 0.045155204832553864, "rewards/rejected": -0.2384503185749054, "step": 44 }, { "epoch": 0.07092198581560284, "grad_norm": 0.6544040441513062, "learning_rate": 1.3858267716535433e-06, "log_odds_chosen": 0.5975862145423889, "log_odds_ratio": -0.4529675245285034, "logits/chosen": -0.5176110863685608, "logits/rejected": -0.06739248335361481, "logps/chosen": -1.9081940650939941, "logps/rejected": -2.438753843307495, "loss": 2.1874, "nll_loss": 2.1420602798461914, "rewards/accuracies": 0.875, "rewards/chosen": -0.1908193975687027, "rewards/margins": 0.05305597186088562, "rewards/rejected": -0.24387536942958832, "step": 45 }, { "epoch": 0.07249802994483845, "grad_norm": 0.7946521043777466, "learning_rate": 1.417322834645669e-06, "log_odds_chosen": 0.5836816430091858, "log_odds_ratio": -0.458423912525177, "logits/chosen": -0.7441750764846802, "logits/rejected": -0.32784658670425415, "logps/chosen": -1.9207260608673096, "logps/rejected": -2.439450740814209, "loss": 2.1926, "nll_loss": 2.14674973487854, "rewards/accuracies": 1.0, "rewards/chosen": -0.19207260012626648, "rewards/margins": 0.05187246948480606, "rewards/rejected": -0.24394509196281433, "step": 46 }, { "epoch": 0.07407407407407407, "grad_norm": 0.6708806753158569, "learning_rate": 1.4488188976377953e-06, "log_odds_chosen": 0.2689046561717987, "log_odds_ratio": -0.578476071357727, "logits/chosen": -0.7736371159553528, "logits/rejected": -0.19334951043128967, "logps/chosen": -1.9722037315368652, "logps/rejected": -2.2119808197021484, "loss": 2.2323, "nll_loss": 2.1744863986968994, "rewards/accuracies": 0.875, "rewards/chosen": -0.19722038507461548, "rewards/margins": 0.023977704346179962, "rewards/rejected": -0.22119809687137604, "step": 47 }, { "epoch": 0.07565011820330969, "grad_norm": 0.7817642688751221, "learning_rate": 1.4803149606299211e-06, "log_odds_chosen": 0.413425475358963, "log_odds_ratio": -0.5176951289176941, "logits/chosen": -0.5976810455322266, "logits/rejected": -0.1483435034751892, "logps/chosen": -2.023184061050415, "logps/rejected": -2.3940744400024414, "loss": 2.2969, "nll_loss": 2.2451674938201904, "rewards/accuracies": 1.0, "rewards/chosen": -0.20231840014457703, "rewards/margins": 0.03708904981613159, "rewards/rejected": -0.23940744996070862, "step": 48 }, { "epoch": 0.07722616233254531, "grad_norm": 0.631397008895874, "learning_rate": 1.5118110236220472e-06, "log_odds_chosen": 0.4730543792247772, "log_odds_ratio": -0.49180155992507935, "logits/chosen": -0.622490406036377, "logits/rejected": -0.015296130441129208, "logps/chosen": -1.9089866876602173, "logps/rejected": -2.3237972259521484, "loss": 2.1401, "nll_loss": 2.0909266471862793, "rewards/accuracies": 1.0, "rewards/chosen": -0.19089870154857635, "rewards/margins": 0.04148102179169655, "rewards/rejected": -0.2323797196149826, "step": 49 }, { "epoch": 0.07880220646178093, "grad_norm": 0.6548082232475281, "learning_rate": 1.543307086614173e-06, "log_odds_chosen": 0.2945685088634491, "log_odds_ratio": -0.5625656843185425, "logits/chosen": -0.638104259967804, "logits/rejected": -0.22048690915107727, "logps/chosen": -1.9748042821884155, "logps/rejected": -2.235903024673462, "loss": 2.2419, "nll_loss": 2.185655355453491, "rewards/accuracies": 1.0, "rewards/chosen": -0.19748042523860931, "rewards/margins": 0.026109864935278893, "rewards/rejected": -0.22359029948711395, "step": 50 }, { "epoch": 0.08037825059101655, "grad_norm": 0.6355348825454712, "learning_rate": 1.5748031496062992e-06, "log_odds_chosen": 0.6154743432998657, "log_odds_ratio": -0.4387228488922119, "logits/chosen": -0.5254096984863281, "logits/rejected": -0.08727583289146423, "logps/chosen": -1.8662408590316772, "logps/rejected": -2.4071710109710693, "loss": 2.1184, "nll_loss": 2.0745527744293213, "rewards/accuracies": 1.0, "rewards/chosen": -0.18662410974502563, "rewards/margins": 0.054093025624752045, "rewards/rejected": -0.24071712791919708, "step": 51 }, { "epoch": 0.08195429472025216, "grad_norm": 0.6476663947105408, "learning_rate": 1.6062992125984253e-06, "log_odds_chosen": 0.5350648760795593, "log_odds_ratio": -0.46677446365356445, "logits/chosen": -0.4835931360721588, "logits/rejected": -0.016967706382274628, "logps/chosen": -2.0277915000915527, "logps/rejected": -2.507091522216797, "loss": 2.2718, "nll_loss": 2.2251675128936768, "rewards/accuracies": 1.0, "rewards/chosen": -0.202779158949852, "rewards/margins": 0.04792997986078262, "rewards/rejected": -0.2507091462612152, "step": 52 }, { "epoch": 0.08353033884948778, "grad_norm": 0.718711793422699, "learning_rate": 1.637795275590551e-06, "log_odds_chosen": 0.3033553957939148, "log_odds_ratio": -0.5565884709358215, "logits/chosen": -0.5977045297622681, "logits/rejected": -0.13718965649604797, "logps/chosen": -1.9643840789794922, "logps/rejected": -2.230160713195801, "loss": 2.2233, "nll_loss": 2.1675939559936523, "rewards/accuracies": 1.0, "rewards/chosen": -0.19643841683864594, "rewards/margins": 0.026577647775411606, "rewards/rejected": -0.22301605343818665, "step": 53 }, { "epoch": 0.0851063829787234, "grad_norm": 0.6878601908683777, "learning_rate": 1.6692913385826771e-06, "log_odds_chosen": 0.30879032611846924, "log_odds_ratio": -0.5532358288764954, "logits/chosen": -0.6751337051391602, "logits/rejected": -0.14412644505500793, "logps/chosen": -1.903916835784912, "logps/rejected": -2.1728036403656006, "loss": 2.1706, "nll_loss": 2.1152570247650146, "rewards/accuracies": 1.0, "rewards/chosen": -0.1903916895389557, "rewards/margins": 0.026888679713010788, "rewards/rejected": -0.2172803431749344, "step": 54 }, { "epoch": 0.08668242710795902, "grad_norm": 0.7093151211738586, "learning_rate": 1.7007874015748031e-06, "log_odds_chosen": 0.42612695693969727, "log_odds_ratio": -0.5059034824371338, "logits/chosen": -0.6053857803344727, "logits/rejected": -0.21064752340316772, "logps/chosen": -1.9867243766784668, "logps/rejected": -2.36510968208313, "loss": 2.2557, "nll_loss": 2.205127716064453, "rewards/accuracies": 1.0, "rewards/chosen": -0.19867242872714996, "rewards/margins": 0.037838518619537354, "rewards/rejected": -0.23651094734668732, "step": 55 }, { "epoch": 0.08825847123719464, "grad_norm": 0.8093518614768982, "learning_rate": 1.7322834645669292e-06, "log_odds_chosen": 0.352740615606308, "log_odds_ratio": -0.5422862768173218, "logits/chosen": -0.5116597414016724, "logits/rejected": -0.0041604433208703995, "logps/chosen": -2.0882434844970703, "logps/rejected": -2.4050841331481934, "loss": 2.3456, "nll_loss": 2.291337728500366, "rewards/accuracies": 0.875, "rewards/chosen": -0.20882436633110046, "rewards/margins": 0.03168405592441559, "rewards/rejected": -0.24050842225551605, "step": 56 }, { "epoch": 0.08983451536643026, "grad_norm": 0.6063317656517029, "learning_rate": 1.763779527559055e-06, "log_odds_chosen": 0.37979212403297424, "log_odds_ratio": -0.5244497656822205, "logits/chosen": -0.5742859840393066, "logits/rejected": -0.12732906639575958, "logps/chosen": -1.9634265899658203, "logps/rejected": -2.2987239360809326, "loss": 2.2111, "nll_loss": 2.158637285232544, "rewards/accuracies": 1.0, "rewards/chosen": -0.19634266197681427, "rewards/margins": 0.03352972865104675, "rewards/rejected": -0.22987240552902222, "step": 57 }, { "epoch": 0.09141055949566587, "grad_norm": 0.7049276828765869, "learning_rate": 1.795275590551181e-06, "log_odds_chosen": 0.38489818572998047, "log_odds_ratio": -0.5272811651229858, "logits/chosen": -0.603106677532196, "logits/rejected": -0.32046759128570557, "logps/chosen": -1.931801199913025, "logps/rejected": -2.274812936782837, "loss": 2.1909, "nll_loss": 2.138176918029785, "rewards/accuracies": 1.0, "rewards/chosen": -0.193180114030838, "rewards/margins": 0.034301191568374634, "rewards/rejected": -0.22748132050037384, "step": 58 }, { "epoch": 0.09298660362490149, "grad_norm": 0.7724094390869141, "learning_rate": 1.826771653543307e-06, "log_odds_chosen": 0.6565301418304443, "log_odds_ratio": -0.4238013029098511, "logits/chosen": -0.5493816137313843, "logits/rejected": -0.04481405392289162, "logps/chosen": -2.03456711769104, "logps/rejected": -2.6273677349090576, "loss": 2.2666, "nll_loss": 2.2242276668548584, "rewards/accuracies": 1.0, "rewards/chosen": -0.20345671474933624, "rewards/margins": 0.05928007513284683, "rewards/rejected": -0.2627367675304413, "step": 59 }, { "epoch": 0.09456264775413711, "grad_norm": 0.7019063830375671, "learning_rate": 1.858267716535433e-06, "log_odds_chosen": 0.48238804936408997, "log_odds_ratio": -0.4877777695655823, "logits/chosen": -0.6007660627365112, "logits/rejected": -0.25237271189689636, "logps/chosen": -1.9639641046524048, "logps/rejected": -2.3943371772766113, "loss": 2.223, "nll_loss": 2.174217462539673, "rewards/accuracies": 1.0, "rewards/chosen": -0.19639641046524048, "rewards/margins": 0.04303732514381409, "rewards/rejected": -0.23943373560905457, "step": 60 }, { "epoch": 0.09613869188337273, "grad_norm": 0.7680268883705139, "learning_rate": 1.889763779527559e-06, "log_odds_chosen": 0.25518718361854553, "log_odds_ratio": -0.5833750367164612, "logits/chosen": -0.5432047247886658, "logits/rejected": -0.21941693127155304, "logps/chosen": -2.100229501724243, "logps/rejected": -2.330411195755005, "loss": 2.3455, "nll_loss": 2.2871248722076416, "rewards/accuracies": 0.75, "rewards/chosen": -0.2100229412317276, "rewards/margins": 0.023018185049295425, "rewards/rejected": -0.23304113745689392, "step": 61 }, { "epoch": 0.09771473601260836, "grad_norm": 0.8415902853012085, "learning_rate": 1.9212598425196847e-06, "log_odds_chosen": 0.3082142472267151, "log_odds_ratio": -0.5612497329711914, "logits/chosen": -0.6062051653862, "logits/rejected": -0.11909964680671692, "logps/chosen": -2.0706098079681396, "logps/rejected": -2.3465754985809326, "loss": 2.3287, "nll_loss": 2.272575616836548, "rewards/accuracies": 0.75, "rewards/chosen": -0.20706097781658173, "rewards/margins": 0.02759658358991146, "rewards/rejected": -0.23465755581855774, "step": 62 }, { "epoch": 0.09929078014184398, "grad_norm": 0.6543618440628052, "learning_rate": 1.952755905511811e-06, "log_odds_chosen": 0.5749139785766602, "log_odds_ratio": -0.45239800214767456, "logits/chosen": -0.7079123258590698, "logits/rejected": -0.2599683403968811, "logps/chosen": -1.8045680522918701, "logps/rejected": -2.300361394882202, "loss": 2.0753, "nll_loss": 2.030048370361328, "rewards/accuracies": 1.0, "rewards/chosen": -0.18045681715011597, "rewards/margins": 0.04957934468984604, "rewards/rejected": -0.23003613948822021, "step": 63 }, { "epoch": 0.1008668242710796, "grad_norm": 0.6648881435394287, "learning_rate": 1.9842519685039368e-06, "log_odds_chosen": 0.6830120086669922, "log_odds_ratio": -0.4217277765274048, "logits/chosen": -0.6593011617660522, "logits/rejected": -0.08247893303632736, "logps/chosen": -1.8293054103851318, "logps/rejected": -2.4285809993743896, "loss": 2.0953, "nll_loss": 2.0531651973724365, "rewards/accuracies": 1.0, "rewards/chosen": -0.18293055891990662, "rewards/margins": 0.059927552938461304, "rewards/rejected": -0.24285811185836792, "step": 64 }, { "epoch": 0.1024428684003152, "grad_norm": 0.6505405902862549, "learning_rate": 2.015748031496063e-06, "log_odds_chosen": 0.6114057898521423, "log_odds_ratio": -0.4410432279109955, "logits/chosen": -0.6462138295173645, "logits/rejected": -0.3533778786659241, "logps/chosen": -1.9271063804626465, "logps/rejected": -2.47088360786438, "loss": 2.1845, "nll_loss": 2.1404013633728027, "rewards/accuracies": 1.0, "rewards/chosen": -0.19271063804626465, "rewards/margins": 0.0543777197599411, "rewards/rejected": -0.24708837270736694, "step": 65 }, { "epoch": 0.10401891252955082, "grad_norm": 0.6485214829444885, "learning_rate": 2.047244094488189e-06, "log_odds_chosen": 0.452458918094635, "log_odds_ratio": -0.49970927834510803, "logits/chosen": -0.6230807900428772, "logits/rejected": -0.13434045016765594, "logps/chosen": -2.0179097652435303, "logps/rejected": -2.419290781021118, "loss": 2.2838, "nll_loss": 2.233837842941284, "rewards/accuracies": 0.875, "rewards/chosen": -0.20179098844528198, "rewards/margins": 0.04013810679316521, "rewards/rejected": -0.2419290840625763, "step": 66 }, { "epoch": 0.10559495665878645, "grad_norm": 0.6492588520050049, "learning_rate": 2.0787401574803147e-06, "log_odds_chosen": 0.5289919972419739, "log_odds_ratio": -0.4675235152244568, "logits/chosen": -0.534376859664917, "logits/rejected": -0.03799459710717201, "logps/chosen": -1.955706238746643, "logps/rejected": -2.4241867065429688, "loss": 2.1946, "nll_loss": 2.1478805541992188, "rewards/accuracies": 1.0, "rewards/chosen": -0.19557063281536102, "rewards/margins": 0.04684804379940033, "rewards/rejected": -0.24241869151592255, "step": 67 }, { "epoch": 0.10717100078802207, "grad_norm": 0.774642288684845, "learning_rate": 2.1102362204724405e-06, "log_odds_chosen": 0.41134878993034363, "log_odds_ratio": -0.5170325040817261, "logits/chosen": -0.6746619939804077, "logits/rejected": -0.185140922665596, "logps/chosen": -1.947751760482788, "logps/rejected": -2.311985969543457, "loss": 2.2474, "nll_loss": 2.1957201957702637, "rewards/accuracies": 0.875, "rewards/chosen": -0.19477517902851105, "rewards/margins": 0.036423418670892715, "rewards/rejected": -0.23119859397411346, "step": 68 }, { "epoch": 0.10874704491725769, "grad_norm": 0.8252844214439392, "learning_rate": 2.141732283464567e-06, "log_odds_chosen": 0.22346967458724976, "log_odds_ratio": -0.5949736833572388, "logits/chosen": -0.41877222061157227, "logits/rejected": -0.11722514033317566, "logps/chosen": -2.0319464206695557, "logps/rejected": -2.2292232513427734, "loss": 2.3097, "nll_loss": 2.250192403793335, "rewards/accuracies": 0.875, "rewards/chosen": -0.20319463312625885, "rewards/margins": 0.019727692008018494, "rewards/rejected": -0.22292232513427734, "step": 69 }, { "epoch": 0.11032308904649331, "grad_norm": 0.6770060658454895, "learning_rate": 2.173228346456693e-06, "log_odds_chosen": 0.4207773804664612, "log_odds_ratio": -0.5108780264854431, "logits/chosen": -0.5316891670227051, "logits/rejected": -0.10831936448812485, "logps/chosen": -1.9700641632080078, "logps/rejected": -2.342879056930542, "loss": 2.2395, "nll_loss": 2.188405752182007, "rewards/accuracies": 1.0, "rewards/chosen": -0.19700641930103302, "rewards/margins": 0.03728148713707924, "rewards/rejected": -0.23428791761398315, "step": 70 }, { "epoch": 0.11189913317572892, "grad_norm": 0.8300355672836304, "learning_rate": 2.204724409448819e-06, "log_odds_chosen": 0.5063481330871582, "log_odds_ratio": -0.48265108466148376, "logits/chosen": -0.7789384126663208, "logits/rejected": -0.24630165100097656, "logps/chosen": -1.9212646484375, "logps/rejected": -2.370161771774292, "loss": 2.1808, "nll_loss": 2.1325840950012207, "rewards/accuracies": 1.0, "rewards/chosen": -0.19212648272514343, "rewards/margins": 0.04488971084356308, "rewards/rejected": -0.23701618611812592, "step": 71 }, { "epoch": 0.11347517730496454, "grad_norm": 0.6843920946121216, "learning_rate": 2.2362204724409446e-06, "log_odds_chosen": 0.32779812812805176, "log_odds_ratio": -0.5560404062271118, "logits/chosen": -0.5591788291931152, "logits/rejected": -0.06292789429426193, "logps/chosen": -1.936835527420044, "logps/rejected": -2.2265937328338623, "loss": 2.1968, "nll_loss": 2.1412172317504883, "rewards/accuracies": 0.875, "rewards/chosen": -0.19368356466293335, "rewards/margins": 0.02897578477859497, "rewards/rejected": -0.22265934944152832, "step": 72 }, { "epoch": 0.11505122143420016, "grad_norm": 0.6691780090332031, "learning_rate": 2.267716535433071e-06, "log_odds_chosen": 0.3424806594848633, "log_odds_ratio": -0.5391549468040466, "logits/chosen": -0.5084943175315857, "logits/rejected": -0.18799816071987152, "logps/chosen": -1.982399821281433, "logps/rejected": -2.2857072353363037, "loss": 2.2447, "nll_loss": 2.190774917602539, "rewards/accuracies": 1.0, "rewards/chosen": -0.1982399970293045, "rewards/margins": 0.0303307194262743, "rewards/rejected": -0.22857069969177246, "step": 73 }, { "epoch": 0.11662726556343578, "grad_norm": 0.7013347744941711, "learning_rate": 2.2992125984251967e-06, "log_odds_chosen": 0.42650213837623596, "log_odds_ratio": -0.5057096481323242, "logits/chosen": -0.6094076037406921, "logits/rejected": -0.11771736294031143, "logps/chosen": -1.8428946733474731, "logps/rejected": -2.2101123332977295, "loss": 2.1276, "nll_loss": 2.0770435333251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.18428948521614075, "rewards/margins": 0.03672178089618683, "rewards/rejected": -0.22101125121116638, "step": 74 }, { "epoch": 0.1182033096926714, "grad_norm": 0.6637840867042542, "learning_rate": 2.3307086614173225e-06, "log_odds_chosen": 0.5096178650856018, "log_odds_ratio": -0.476134717464447, "logits/chosen": -0.5619401335716248, "logits/rejected": -0.2677369713783264, "logps/chosen": -1.8805265426635742, "logps/rejected": -2.32961106300354, "loss": 2.1385, "nll_loss": 2.0909037590026855, "rewards/accuracies": 1.0, "rewards/chosen": -0.18805265426635742, "rewards/margins": 0.044908471405506134, "rewards/rejected": -0.23296113312244415, "step": 75 }, { "epoch": 0.11977935382190702, "grad_norm": 0.6581621170043945, "learning_rate": 2.3622047244094483e-06, "log_odds_chosen": 0.5206456184387207, "log_odds_ratio": -0.47490978240966797, "logits/chosen": -0.5504530072212219, "logits/rejected": -0.03281405568122864, "logps/chosen": -1.9286243915557861, "logps/rejected": -2.3910675048828125, "loss": 2.1717, "nll_loss": 2.124224901199341, "rewards/accuracies": 1.0, "rewards/chosen": -0.19286245107650757, "rewards/margins": 0.04624428227543831, "rewards/rejected": -0.23910671472549438, "step": 76 }, { "epoch": 0.12135539795114263, "grad_norm": 0.7063953876495361, "learning_rate": 2.393700787401575e-06, "log_odds_chosen": 0.4808962941169739, "log_odds_ratio": -0.49262717366218567, "logits/chosen": -0.5677655339241028, "logits/rejected": -0.21073025465011597, "logps/chosen": -1.9511951208114624, "logps/rejected": -2.377624988555908, "loss": 2.2192, "nll_loss": 2.169985771179199, "rewards/accuracies": 1.0, "rewards/chosen": -0.19511950016021729, "rewards/margins": 0.042642995715141296, "rewards/rejected": -0.23776251077651978, "step": 77 }, { "epoch": 0.12293144208037825, "grad_norm": 0.5751843452453613, "learning_rate": 2.425196850393701e-06, "log_odds_chosen": 0.3792150020599365, "log_odds_ratio": -0.5299723744392395, "logits/chosen": -0.37227052450180054, "logits/rejected": -0.4450679123401642, "logps/chosen": -2.0182228088378906, "logps/rejected": -2.355861186981201, "loss": 2.2537, "nll_loss": 2.200679302215576, "rewards/accuracies": 0.875, "rewards/chosen": -0.20182228088378906, "rewards/margins": 0.03376384079456329, "rewards/rejected": -0.23558615148067474, "step": 78 }, { "epoch": 0.12450748620961387, "grad_norm": 0.6594578623771667, "learning_rate": 2.4566929133858266e-06, "log_odds_chosen": 0.518637478351593, "log_odds_ratio": -0.47430098056793213, "logits/chosen": -0.4779280126094818, "logits/rejected": -0.2910279631614685, "logps/chosen": -1.9429047107696533, "logps/rejected": -2.402376174926758, "loss": 2.1903, "nll_loss": 2.1428279876708984, "rewards/accuracies": 1.0, "rewards/chosen": -0.19429044425487518, "rewards/margins": 0.04594714939594269, "rewards/rejected": -0.24023759365081787, "step": 79 }, { "epoch": 0.12608353033884948, "grad_norm": 0.6245352625846863, "learning_rate": 2.488188976377953e-06, "log_odds_chosen": 0.5585002303123474, "log_odds_ratio": -0.45803701877593994, "logits/chosen": -0.5769734978675842, "logits/rejected": -0.25027596950531006, "logps/chosen": -1.8669335842132568, "logps/rejected": -2.356663227081299, "loss": 2.1157, "nll_loss": 2.06986403465271, "rewards/accuracies": 1.0, "rewards/chosen": -0.18669337034225464, "rewards/margins": 0.048972949385643005, "rewards/rejected": -0.23566631972789764, "step": 80 }, { "epoch": 0.1276595744680851, "grad_norm": 0.5566908717155457, "learning_rate": 2.5196850393700787e-06, "log_odds_chosen": 0.3883778750896454, "log_odds_ratio": -0.5254943370819092, "logits/chosen": -0.42693546414375305, "logits/rejected": -0.2633028030395508, "logps/chosen": -1.93135666847229, "logps/rejected": -2.2755541801452637, "loss": 2.17, "nll_loss": 2.1174182891845703, "rewards/accuracies": 1.0, "rewards/chosen": -0.19313567876815796, "rewards/margins": 0.034419745206832886, "rewards/rejected": -0.22755542397499084, "step": 81 }, { "epoch": 0.12923561859732072, "grad_norm": 0.5781261324882507, "learning_rate": 2.5511811023622045e-06, "log_odds_chosen": 0.524163544178009, "log_odds_ratio": -0.4806976020336151, "logits/chosen": -0.4674437940120697, "logits/rejected": -0.23945724964141846, "logps/chosen": -1.8828051090240479, "logps/rejected": -2.3516387939453125, "loss": 2.1336, "nll_loss": 2.085569381713867, "rewards/accuracies": 0.875, "rewards/chosen": -0.18828049302101135, "rewards/margins": 0.046883389353752136, "rewards/rejected": -0.23516389727592468, "step": 82 }, { "epoch": 0.13081166272655634, "grad_norm": 0.6088637709617615, "learning_rate": 2.5826771653543303e-06, "log_odds_chosen": 0.36324411630630493, "log_odds_ratio": -0.5395435690879822, "logits/chosen": -0.39306196570396423, "logits/rejected": -0.1700359582901001, "logps/chosen": -1.9157882928848267, "logps/rejected": -2.237780809402466, "loss": 2.1604, "nll_loss": 2.106419563293457, "rewards/accuracies": 1.0, "rewards/chosen": -0.19157883524894714, "rewards/margins": 0.032199256122112274, "rewards/rejected": -0.22377808392047882, "step": 83 }, { "epoch": 0.13238770685579196, "grad_norm": 0.6803274750709534, "learning_rate": 2.6141732283464566e-06, "log_odds_chosen": 0.5524423122406006, "log_odds_ratio": -0.46847474575042725, "logits/chosen": -0.5545064210891724, "logits/rejected": -0.24267421662807465, "logps/chosen": -1.8639458417892456, "logps/rejected": -2.3503522872924805, "loss": 2.1458, "nll_loss": 2.0989749431610107, "rewards/accuracies": 1.0, "rewards/chosen": -0.1863945871591568, "rewards/margins": 0.048640646040439606, "rewards/rejected": -0.235035240650177, "step": 84 }, { "epoch": 0.13396375098502758, "grad_norm": 0.5811371207237244, "learning_rate": 2.645669291338583e-06, "log_odds_chosen": 0.5210408568382263, "log_odds_ratio": -0.46715638041496277, "logits/chosen": -0.3822883367538452, "logits/rejected": -0.1786729395389557, "logps/chosen": -1.9316679239273071, "logps/rejected": -2.3919014930725098, "loss": 2.1698, "nll_loss": 2.1230902671813965, "rewards/accuracies": 1.0, "rewards/chosen": -0.1931667923927307, "rewards/margins": 0.04602333903312683, "rewards/rejected": -0.23919013142585754, "step": 85 }, { "epoch": 0.1355397951142632, "grad_norm": 0.6602110266685486, "learning_rate": 2.6771653543307086e-06, "log_odds_chosen": 0.48973286151885986, "log_odds_ratio": -0.48310309648513794, "logits/chosen": -0.5846769213676453, "logits/rejected": -0.20886988937854767, "logps/chosen": -1.9184874296188354, "logps/rejected": -2.349517822265625, "loss": 2.1703, "nll_loss": 2.1220102310180664, "rewards/accuracies": 1.0, "rewards/chosen": -0.1918487399816513, "rewards/margins": 0.043103061616420746, "rewards/rejected": -0.23495177924633026, "step": 86 }, { "epoch": 0.13711583924349882, "grad_norm": 0.5744991302490234, "learning_rate": 2.7086614173228344e-06, "log_odds_chosen": 0.3423335552215576, "log_odds_ratio": -0.5400257110595703, "logits/chosen": -0.37106236815452576, "logits/rejected": -0.360477089881897, "logps/chosen": -1.922955870628357, "logps/rejected": -2.2213680744171143, "loss": 2.1639, "nll_loss": 2.109863042831421, "rewards/accuracies": 1.0, "rewards/chosen": -0.19229556620121002, "rewards/margins": 0.02984124794602394, "rewards/rejected": -0.22213682532310486, "step": 87 }, { "epoch": 0.13869188337273444, "grad_norm": 0.6963973045349121, "learning_rate": 2.7401574803149607e-06, "log_odds_chosen": 0.5418673753738403, "log_odds_ratio": -0.475824773311615, "logits/chosen": -0.6239266991615295, "logits/rejected": -0.2058069109916687, "logps/chosen": -1.848305583000183, "logps/rejected": -2.326037645339966, "loss": 2.1293, "nll_loss": 2.0817408561706543, "rewards/accuracies": 0.875, "rewards/chosen": -0.18483057618141174, "rewards/margins": 0.04777318611741066, "rewards/rejected": -0.2326037585735321, "step": 88 }, { "epoch": 0.14026792750197006, "grad_norm": 0.6552391648292542, "learning_rate": 2.7716535433070865e-06, "log_odds_chosen": 0.500076174736023, "log_odds_ratio": -0.4754161834716797, "logits/chosen": -0.44256362318992615, "logits/rejected": -0.20719635486602783, "logps/chosen": -1.9562758207321167, "logps/rejected": -2.3987882137298584, "loss": 2.2135, "nll_loss": 2.165970802307129, "rewards/accuracies": 1.0, "rewards/chosen": -0.1956275850534439, "rewards/margins": 0.04425125569105148, "rewards/rejected": -0.2398788183927536, "step": 89 }, { "epoch": 0.14184397163120568, "grad_norm": 0.6141228079795837, "learning_rate": 2.8031496062992123e-06, "log_odds_chosen": 0.428195595741272, "log_odds_ratio": -0.5067068934440613, "logits/chosen": -0.3649379312992096, "logits/rejected": -0.1602931022644043, "logps/chosen": -2.002263307571411, "logps/rejected": -2.3845441341400146, "loss": 2.2267, "nll_loss": 2.1760218143463135, "rewards/accuracies": 1.0, "rewards/chosen": -0.20022635161876678, "rewards/margins": 0.038228072226047516, "rewards/rejected": -0.2384544163942337, "step": 90 }, { "epoch": 0.1434200157604413, "grad_norm": 0.5524324774742126, "learning_rate": 2.834645669291338e-06, "log_odds_chosen": 0.4524117112159729, "log_odds_ratio": -0.49862101674079895, "logits/chosen": -0.36407744884490967, "logits/rejected": -0.2811731696128845, "logps/chosen": -1.8061617612838745, "logps/rejected": -2.198903799057007, "loss": 2.0471, "nll_loss": 1.997222900390625, "rewards/accuracies": 1.0, "rewards/chosen": -0.18061619997024536, "rewards/margins": 0.0392741933465004, "rewards/rejected": -0.21989038586616516, "step": 91 }, { "epoch": 0.1449960598896769, "grad_norm": 0.5921797752380371, "learning_rate": 2.8661417322834644e-06, "log_odds_chosen": 0.5024532675743103, "log_odds_ratio": -0.47816595435142517, "logits/chosen": -0.44370609521865845, "logits/rejected": -0.18724730610847473, "logps/chosen": -1.829555869102478, "logps/rejected": -2.2681233882904053, "loss": 2.0879, "nll_loss": 2.040083169937134, "rewards/accuracies": 1.0, "rewards/chosen": -0.18295560777187347, "rewards/margins": 0.04385674372315407, "rewards/rejected": -0.22681234776973724, "step": 92 }, { "epoch": 0.14657210401891252, "grad_norm": 0.628095805644989, "learning_rate": 2.8976377952755906e-06, "log_odds_chosen": 0.43953385949134827, "log_odds_ratio": -0.5222434401512146, "logits/chosen": -0.4026286005973816, "logits/rejected": -0.23625504970550537, "logps/chosen": -1.9340780973434448, "logps/rejected": -2.3274731636047363, "loss": 2.18, "nll_loss": 2.127760410308838, "rewards/accuracies": 0.75, "rewards/chosen": -0.19340780377388, "rewards/margins": 0.03933952748775482, "rewards/rejected": -0.23274733126163483, "step": 93 }, { "epoch": 0.14814814814814814, "grad_norm": 0.5658537149429321, "learning_rate": 2.9291338582677165e-06, "log_odds_chosen": 0.45617368817329407, "log_odds_ratio": -0.4992842674255371, "logits/chosen": -0.36675694584846497, "logits/rejected": -0.3160392642021179, "logps/chosen": -1.9161105155944824, "logps/rejected": -2.315382480621338, "loss": 2.1513, "nll_loss": 2.1013565063476562, "rewards/accuracies": 0.875, "rewards/chosen": -0.19161105155944824, "rewards/margins": 0.03992719575762749, "rewards/rejected": -0.23153826594352722, "step": 94 }, { "epoch": 0.14972419227738376, "grad_norm": 0.639642596244812, "learning_rate": 2.9606299212598423e-06, "log_odds_chosen": 0.5549046993255615, "log_odds_ratio": -0.463879257440567, "logits/chosen": -0.43291109800338745, "logits/rejected": -0.15716485679149628, "logps/chosen": -1.9124903678894043, "logps/rejected": -2.4041366577148438, "loss": 2.1648, "nll_loss": 2.1183886528015137, "rewards/accuracies": 1.0, "rewards/chosen": -0.1912490427494049, "rewards/margins": 0.049164604395627975, "rewards/rejected": -0.240413635969162, "step": 95 }, { "epoch": 0.15130023640661938, "grad_norm": 0.569778323173523, "learning_rate": 2.9921259842519685e-06, "log_odds_chosen": 0.5220546722412109, "log_odds_ratio": -0.46969637274742126, "logits/chosen": -0.42379647493362427, "logits/rejected": -0.3440685570240021, "logps/chosen": -1.9186266660690308, "logps/rejected": -2.3788628578186035, "loss": 2.141, "nll_loss": 2.0940771102905273, "rewards/accuracies": 1.0, "rewards/chosen": -0.19186267256736755, "rewards/margins": 0.04602360725402832, "rewards/rejected": -0.23788626492023468, "step": 96 }, { "epoch": 0.152876280535855, "grad_norm": 0.56700599193573, "learning_rate": 3.0236220472440943e-06, "log_odds_chosen": 0.350454717874527, "log_odds_ratio": -0.5345020294189453, "logits/chosen": -0.47989675402641296, "logits/rejected": -0.17767037451267242, "logps/chosen": -1.8971889019012451, "logps/rejected": -2.200305461883545, "loss": 2.1227, "nll_loss": 2.069289445877075, "rewards/accuracies": 1.0, "rewards/chosen": -0.18971890211105347, "rewards/margins": 0.03031165711581707, "rewards/rejected": -0.22003056108951569, "step": 97 }, { "epoch": 0.15445232466509062, "grad_norm": 0.5734896063804626, "learning_rate": 3.05511811023622e-06, "log_odds_chosen": 0.4143810570240021, "log_odds_ratio": -0.512945294380188, "logits/chosen": -0.31117746233940125, "logits/rejected": -0.24713104963302612, "logps/chosen": -2.0033698081970215, "logps/rejected": -2.3717055320739746, "loss": 2.2459, "nll_loss": 2.1946518421173096, "rewards/accuracies": 1.0, "rewards/chosen": -0.2003369927406311, "rewards/margins": 0.03683357313275337, "rewards/rejected": -0.23717054724693298, "step": 98 }, { "epoch": 0.15602836879432624, "grad_norm": 0.6178714036941528, "learning_rate": 3.086614173228346e-06, "log_odds_chosen": 0.24748294055461884, "log_odds_ratio": -0.5794985890388489, "logits/chosen": -0.5359123945236206, "logits/rejected": -0.3172164559364319, "logps/chosen": -1.911259651184082, "logps/rejected": -2.12546443939209, "loss": 2.1682, "nll_loss": 2.1102778911590576, "rewards/accuracies": 1.0, "rewards/chosen": -0.19112597405910492, "rewards/margins": 0.021420463919639587, "rewards/rejected": -0.2125464230775833, "step": 99 }, { "epoch": 0.15760441292356187, "grad_norm": 0.590636670589447, "learning_rate": 3.1181102362204722e-06, "log_odds_chosen": 0.36373111605644226, "log_odds_ratio": -0.533814549446106, "logits/chosen": -0.4497278034687042, "logits/rejected": -0.2689790427684784, "logps/chosen": -1.8333114385604858, "logps/rejected": -2.1512179374694824, "loss": 2.0894, "nll_loss": 2.036029815673828, "rewards/accuracies": 1.0, "rewards/chosen": -0.18333116173744202, "rewards/margins": 0.031790636479854584, "rewards/rejected": -0.2151218056678772, "step": 100 }, { "epoch": 0.15918045705279749, "grad_norm": 0.5497896075248718, "learning_rate": 3.1496062992125985e-06, "log_odds_chosen": 0.42619574069976807, "log_odds_ratio": -0.5083091855049133, "logits/chosen": -0.3605027198791504, "logits/rejected": -0.24103917181491852, "logps/chosen": -1.8285539150238037, "logps/rejected": -2.200834035873413, "loss": 2.0895, "nll_loss": 2.038670301437378, "rewards/accuracies": 1.0, "rewards/chosen": -0.18285538256168365, "rewards/margins": 0.03722800686955452, "rewards/rejected": -0.22008340060710907, "step": 101 }, { "epoch": 0.1607565011820331, "grad_norm": 0.5178012251853943, "learning_rate": 3.1811023622047243e-06, "log_odds_chosen": 0.43138545751571655, "log_odds_ratio": -0.5122407078742981, "logits/chosen": -0.3726266324520111, "logits/rejected": -0.37165510654449463, "logps/chosen": -1.7784022092819214, "logps/rejected": -2.1532044410705566, "loss": 2.0323, "nll_loss": 1.9810512065887451, "rewards/accuracies": 1.0, "rewards/chosen": -0.1778402179479599, "rewards/margins": 0.03748023882508278, "rewards/rejected": -0.21532045304775238, "step": 102 }, { "epoch": 0.16233254531126873, "grad_norm": 0.5599470734596252, "learning_rate": 3.2125984251968505e-06, "log_odds_chosen": 0.4818297326564789, "log_odds_ratio": -0.49377134442329407, "logits/chosen": -0.2876685857772827, "logits/rejected": -0.038474664092063904, "logps/chosen": -1.7715296745300293, "logps/rejected": -2.1862025260925293, "loss": 2.0313, "nll_loss": 1.9819062948226929, "rewards/accuracies": 0.875, "rewards/chosen": -0.17715296149253845, "rewards/margins": 0.04146728664636612, "rewards/rejected": -0.21862025558948517, "step": 103 }, { "epoch": 0.16390858944050432, "grad_norm": 0.5964917540550232, "learning_rate": 3.2440944881889763e-06, "log_odds_chosen": 0.4325031638145447, "log_odds_ratio": -0.5093069672584534, "logits/chosen": -0.23736125230789185, "logits/rejected": -0.06159596145153046, "logps/chosen": -1.9128376245498657, "logps/rejected": -2.294654369354248, "loss": 2.1641, "nll_loss": 2.113180637359619, "rewards/accuracies": 1.0, "rewards/chosen": -0.19128376245498657, "rewards/margins": 0.03818168863654137, "rewards/rejected": -0.22946545481681824, "step": 104 }, { "epoch": 0.16548463356973994, "grad_norm": 0.4927206039428711, "learning_rate": 3.275590551181102e-06, "log_odds_chosen": 0.5299558043479919, "log_odds_ratio": -0.4695979058742523, "logits/chosen": -0.377352774143219, "logits/rejected": -0.17640215158462524, "logps/chosen": -1.7906301021575928, "logps/rejected": -2.250798225402832, "loss": 2.0519, "nll_loss": 2.0049400329589844, "rewards/accuracies": 1.0, "rewards/chosen": -0.17906302213668823, "rewards/margins": 0.04601679742336273, "rewards/rejected": -0.22507980465888977, "step": 105 }, { "epoch": 0.16706067769897556, "grad_norm": 0.5032868385314941, "learning_rate": 3.307086614173228e-06, "log_odds_chosen": 0.4509121775627136, "log_odds_ratio": -0.5058165788650513, "logits/chosen": -0.35679227113723755, "logits/rejected": -0.1107356995344162, "logps/chosen": -1.8084826469421387, "logps/rejected": -2.190898895263672, "loss": 2.0546, "nll_loss": 2.003988742828369, "rewards/accuracies": 0.875, "rewards/chosen": -0.18084825575351715, "rewards/margins": 0.03824164718389511, "rewards/rejected": -0.21908989548683167, "step": 106 }, { "epoch": 0.16863672182821118, "grad_norm": 0.5300337672233582, "learning_rate": 3.3385826771653542e-06, "log_odds_chosen": 0.43685051798820496, "log_odds_ratio": -0.5093865394592285, "logits/chosen": -0.2651398479938507, "logits/rejected": -0.28625091910362244, "logps/chosen": -1.8772473335266113, "logps/rejected": -2.2612218856811523, "loss": 2.118, "nll_loss": 2.0670440196990967, "rewards/accuracies": 0.875, "rewards/chosen": -0.1877247393131256, "rewards/margins": 0.03839743137359619, "rewards/rejected": -0.226122185587883, "step": 107 }, { "epoch": 0.1702127659574468, "grad_norm": 0.49163365364074707, "learning_rate": 3.37007874015748e-06, "log_odds_chosen": 0.3183455765247345, "log_odds_ratio": -0.5563209056854248, "logits/chosen": -0.23882922530174255, "logits/rejected": -0.3078491985797882, "logps/chosen": -1.85860013961792, "logps/rejected": -2.1333258152008057, "loss": 2.102, "nll_loss": 2.0463998317718506, "rewards/accuracies": 0.875, "rewards/chosen": -0.1858600229024887, "rewards/margins": 0.02747257985174656, "rewards/rejected": -0.21333259344100952, "step": 108 }, { "epoch": 0.17178881008668243, "grad_norm": 0.5118012428283691, "learning_rate": 3.4015748031496063e-06, "log_odds_chosen": 0.4563853442668915, "log_odds_ratio": -0.49264228343963623, "logits/chosen": -0.16680516302585602, "logits/rejected": -0.2830784320831299, "logps/chosen": -1.9145252704620361, "logps/rejected": -2.3134219646453857, "loss": 2.1528, "nll_loss": 2.1035213470458984, "rewards/accuracies": 1.0, "rewards/chosen": -0.19145254790782928, "rewards/margins": 0.03988967090845108, "rewards/rejected": -0.23134221136569977, "step": 109 }, { "epoch": 0.17336485421591805, "grad_norm": 0.505682110786438, "learning_rate": 3.433070866141732e-06, "log_odds_chosen": 0.49830734729766846, "log_odds_ratio": -0.48064208030700684, "logits/chosen": -0.27441835403442383, "logits/rejected": -0.24226327240467072, "logps/chosen": -1.8005558252334595, "logps/rejected": -2.2267792224884033, "loss": 2.0182, "nll_loss": 1.9701097011566162, "rewards/accuracies": 1.0, "rewards/chosen": -0.18005558848381042, "rewards/margins": 0.04262235015630722, "rewards/rejected": -0.22267794609069824, "step": 110 }, { "epoch": 0.17494089834515367, "grad_norm": 0.5464332103729248, "learning_rate": 3.4645669291338583e-06, "log_odds_chosen": 0.5071738362312317, "log_odds_ratio": -0.4801686406135559, "logits/chosen": -0.23766781389713287, "logits/rejected": -0.30691930651664734, "logps/chosen": -1.902522087097168, "logps/rejected": -2.3450286388397217, "loss": 2.1483, "nll_loss": 2.10026478767395, "rewards/accuracies": 1.0, "rewards/chosen": -0.19025221467018127, "rewards/margins": 0.044250644743442535, "rewards/rejected": -0.2345028668642044, "step": 111 }, { "epoch": 0.1765169424743893, "grad_norm": 0.5223987102508545, "learning_rate": 3.496062992125984e-06, "log_odds_chosen": 0.3665584325790405, "log_odds_ratio": -0.5315839052200317, "logits/chosen": -0.20885254442691803, "logits/rejected": -0.12856575846672058, "logps/chosen": -1.9509872198104858, "logps/rejected": -2.271317481994629, "loss": 2.1848, "nll_loss": 2.131622314453125, "rewards/accuracies": 1.0, "rewards/chosen": -0.19509869813919067, "rewards/margins": 0.0320330373942852, "rewards/rejected": -0.22713173925876617, "step": 112 }, { "epoch": 0.1780929866036249, "grad_norm": 0.5140707492828369, "learning_rate": 3.52755905511811e-06, "log_odds_chosen": 0.38427114486694336, "log_odds_ratio": -0.526176929473877, "logits/chosen": -0.3687783181667328, "logits/rejected": -0.2154863178730011, "logps/chosen": -1.7950341701507568, "logps/rejected": -2.127782106399536, "loss": 2.0463, "nll_loss": 1.9936522245407104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1795034110546112, "rewards/margins": 0.03327479213476181, "rewards/rejected": -0.21277819573879242, "step": 113 }, { "epoch": 0.17966903073286053, "grad_norm": 0.48041167855262756, "learning_rate": 3.559055118110236e-06, "log_odds_chosen": 0.2929834723472595, "log_odds_ratio": -0.5727202892303467, "logits/chosen": -0.11503149569034576, "logits/rejected": -0.22482499480247498, "logps/chosen": -1.921325922012329, "logps/rejected": -2.181408405303955, "loss": 2.167, "nll_loss": 2.1096832752227783, "rewards/accuracies": 0.875, "rewards/chosen": -0.1921325922012329, "rewards/margins": 0.026008253917098045, "rewards/rejected": -0.2181408405303955, "step": 114 }, { "epoch": 0.18124507486209615, "grad_norm": 0.483804315328598, "learning_rate": 3.590551181102362e-06, "log_odds_chosen": 0.46048107743263245, "log_odds_ratio": -0.4994431436061859, "logits/chosen": -0.21135865151882172, "logits/rejected": -0.4032437205314636, "logps/chosen": -1.8342138528823853, "logps/rejected": -2.2355825901031494, "loss": 2.0671, "nll_loss": 2.017176389694214, "rewards/accuracies": 0.875, "rewards/chosen": -0.18342137336730957, "rewards/margins": 0.040136873722076416, "rewards/rejected": -0.2235582321882248, "step": 115 }, { "epoch": 0.18282111899133174, "grad_norm": 0.4813881814479828, "learning_rate": 3.622047244094488e-06, "log_odds_chosen": 0.3711977005004883, "log_odds_ratio": -0.5412157773971558, "logits/chosen": -0.22255532443523407, "logits/rejected": -0.2175833135843277, "logps/chosen": -1.8901299238204956, "logps/rejected": -2.2157669067382812, "loss": 2.1143, "nll_loss": 2.060180187225342, "rewards/accuracies": 0.875, "rewards/chosen": -0.18901298940181732, "rewards/margins": 0.032563693821430206, "rewards/rejected": -0.22157667577266693, "step": 116 }, { "epoch": 0.18439716312056736, "grad_norm": 0.4870263934135437, "learning_rate": 3.653543307086614e-06, "log_odds_chosen": 0.526931881904602, "log_odds_ratio": -0.47742602229118347, "logits/chosen": -0.2706168293952942, "logits/rejected": -0.5030975341796875, "logps/chosen": -1.8207095861434937, "logps/rejected": -2.2792084217071533, "loss": 2.0652, "nll_loss": 2.0174200534820557, "rewards/accuracies": 0.875, "rewards/chosen": -0.18207095563411713, "rewards/margins": 0.045849889516830444, "rewards/rejected": -0.22792083024978638, "step": 117 }, { "epoch": 0.18597320724980299, "grad_norm": 0.47839877009391785, "learning_rate": 3.68503937007874e-06, "log_odds_chosen": 0.30926501750946045, "log_odds_ratio": -0.5605666637420654, "logits/chosen": -0.08142746239900589, "logits/rejected": -0.2830328047275543, "logps/chosen": -1.83827805519104, "logps/rejected": -2.1060383319854736, "loss": 2.0833, "nll_loss": 2.0272810459136963, "rewards/accuracies": 0.875, "rewards/chosen": -0.18382780253887177, "rewards/margins": 0.026776034384965897, "rewards/rejected": -0.21060383319854736, "step": 118 }, { "epoch": 0.1875492513790386, "grad_norm": 0.4611856937408447, "learning_rate": 3.716535433070866e-06, "log_odds_chosen": 0.30054840445518494, "log_odds_ratio": -0.5679630041122437, "logits/chosen": -0.13648174703121185, "logits/rejected": -0.2771826982498169, "logps/chosen": -1.8114385604858398, "logps/rejected": -2.073773145675659, "loss": 2.0738, "nll_loss": 2.016970157623291, "rewards/accuracies": 0.875, "rewards/chosen": -0.1811438798904419, "rewards/margins": 0.02623344026505947, "rewards/rejected": -0.20737731456756592, "step": 119 }, { "epoch": 0.18912529550827423, "grad_norm": 0.4521031677722931, "learning_rate": 3.748031496062992e-06, "log_odds_chosen": 0.6006217002868652, "log_odds_ratio": -0.44487234950065613, "logits/chosen": -0.18391111493110657, "logits/rejected": -0.23368988931179047, "logps/chosen": -1.713165283203125, "logps/rejected": -2.2281906604766846, "loss": 1.9444, "nll_loss": 1.8999552726745605, "rewards/accuracies": 1.0, "rewards/chosen": -0.17131653428077698, "rewards/margins": 0.05150254815816879, "rewards/rejected": -0.22281907498836517, "step": 120 }, { "epoch": 0.19070133963750985, "grad_norm": 0.4375765323638916, "learning_rate": 3.779527559055118e-06, "log_odds_chosen": 0.607134222984314, "log_odds_ratio": -0.4383259415626526, "logits/chosen": -0.2432423233985901, "logits/rejected": -0.42568087577819824, "logps/chosen": -1.8271077871322632, "logps/rejected": -2.3578097820281982, "loss": 2.0471, "nll_loss": 2.0032753944396973, "rewards/accuracies": 1.0, "rewards/chosen": -0.18271078169345856, "rewards/margins": 0.05307020992040634, "rewards/rejected": -0.2357809841632843, "step": 121 }, { "epoch": 0.19227738376674547, "grad_norm": 0.4764109253883362, "learning_rate": 3.8110236220472436e-06, "log_odds_chosen": 0.5355339050292969, "log_odds_ratio": -0.47745591402053833, "logits/chosen": -0.30387943983078003, "logits/rejected": -0.23297454416751862, "logps/chosen": -1.7462990283966064, "logps/rejected": -2.2124016284942627, "loss": 1.9695, "nll_loss": 1.9217469692230225, "rewards/accuracies": 0.875, "rewards/chosen": -0.17462992668151855, "rewards/margins": 0.04661024361848831, "rewards/rejected": -0.22124016284942627, "step": 122 }, { "epoch": 0.1938534278959811, "grad_norm": 0.4778152108192444, "learning_rate": 3.8425196850393695e-06, "log_odds_chosen": 0.39439237117767334, "log_odds_ratio": -0.5168735384941101, "logits/chosen": -0.06380043923854828, "logits/rejected": -0.3037663400173187, "logps/chosen": -1.8502298593521118, "logps/rejected": -2.1919362545013428, "loss": 2.0886, "nll_loss": 2.036864757537842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1850229799747467, "rewards/margins": 0.03417064994573593, "rewards/rejected": -0.21919363737106323, "step": 123 }, { "epoch": 0.1954294720252167, "grad_norm": 0.42974886298179626, "learning_rate": 3.874015748031496e-06, "log_odds_chosen": 0.4634344279766083, "log_odds_ratio": -0.5001177787780762, "logits/chosen": -0.25694918632507324, "logits/rejected": -0.22004857659339905, "logps/chosen": -1.7307007312774658, "logps/rejected": -2.1188673973083496, "loss": 1.9529, "nll_loss": 1.9028429985046387, "rewards/accuracies": 1.0, "rewards/chosen": -0.17307007312774658, "rewards/margins": 0.03881664574146271, "rewards/rejected": -0.2118867039680481, "step": 124 }, { "epoch": 0.19700551615445233, "grad_norm": 0.4418664276599884, "learning_rate": 3.905511811023622e-06, "log_odds_chosen": 0.4357145130634308, "log_odds_ratio": -0.505855143070221, "logits/chosen": -0.15186086297035217, "logits/rejected": -0.4162241220474243, "logps/chosen": -1.7670007944107056, "logps/rejected": -2.140122890472412, "loss": 2.0107, "nll_loss": 1.9601045846939087, "rewards/accuracies": 1.0, "rewards/chosen": -0.17670010030269623, "rewards/margins": 0.03731219470500946, "rewards/rejected": -0.2140122801065445, "step": 125 }, { "epoch": 0.19858156028368795, "grad_norm": 0.47172582149505615, "learning_rate": 3.937007874015748e-06, "log_odds_chosen": 0.6300610899925232, "log_odds_ratio": -0.4334939122200012, "logits/chosen": -0.1414909064769745, "logits/rejected": -0.23909084498882294, "logps/chosen": -1.773337483406067, "logps/rejected": -2.3242154121398926, "loss": 1.998, "nll_loss": 1.9546327590942383, "rewards/accuracies": 1.0, "rewards/chosen": -0.1773337423801422, "rewards/margins": 0.05508778989315033, "rewards/rejected": -0.23242153227329254, "step": 126 }, { "epoch": 0.20015760441292357, "grad_norm": 0.4991217255592346, "learning_rate": 3.9685039370078736e-06, "log_odds_chosen": 0.4967314600944519, "log_odds_ratio": -0.4841119050979614, "logits/chosen": -0.11699728667736053, "logits/rejected": -0.20840948820114136, "logps/chosen": -1.8117221593856812, "logps/rejected": -2.245494842529297, "loss": 2.0711, "nll_loss": 2.0227043628692627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1811722218990326, "rewards/margins": 0.04337725043296814, "rewards/rejected": -0.22454948723316193, "step": 127 }, { "epoch": 0.2017336485421592, "grad_norm": 0.45142993330955505, "learning_rate": 4e-06, "log_odds_chosen": 0.3186233639717102, "log_odds_ratio": -0.5520759224891663, "logits/chosen": -0.007777207065373659, "logits/rejected": -0.0923052504658699, "logps/chosen": -1.8868976831436157, "logps/rejected": -2.1636693477630615, "loss": 2.1041, "nll_loss": 2.0488626956939697, "rewards/accuracies": 0.75, "rewards/chosen": -0.18868978321552277, "rewards/margins": 0.02767716720700264, "rewards/rejected": -0.2163669317960739, "step": 128 }, { "epoch": 0.2033096926713948, "grad_norm": 0.4361562430858612, "learning_rate": 3.999992445477635e-06, "log_odds_chosen": 0.45227304100990295, "log_odds_ratio": -0.5005910396575928, "logits/chosen": -0.04890897497534752, "logits/rejected": -0.3455308675765991, "logps/chosen": -1.8800231218338013, "logps/rejected": -2.2768943309783936, "loss": 2.0833, "nll_loss": 2.0331974029541016, "rewards/accuracies": 0.875, "rewards/chosen": -0.1880023181438446, "rewards/margins": 0.03968711942434311, "rewards/rejected": -0.2276894450187683, "step": 129 }, { "epoch": 0.2048857368006304, "grad_norm": 0.43126383423805237, "learning_rate": 3.999969781967615e-06, "log_odds_chosen": 0.27507030963897705, "log_odds_ratio": -0.5684081315994263, "logits/chosen": -0.03611285984516144, "logits/rejected": -0.18150243163108826, "logps/chosen": -1.980247139930725, "logps/rejected": -2.222841501235962, "loss": 2.1813, "nll_loss": 2.1244537830352783, "rewards/accuracies": 1.0, "rewards/chosen": -0.198024719953537, "rewards/margins": 0.024259435012936592, "rewards/rejected": -0.22228413820266724, "step": 130 }, { "epoch": 0.20646178092986603, "grad_norm": 0.39808744192123413, "learning_rate": 3.99993200964115e-06, "log_odds_chosen": 0.37286853790283203, "log_odds_ratio": -0.5256955623626709, "logits/chosen": -0.13878293335437775, "logits/rejected": -0.38629403710365295, "logps/chosen": -1.8050813674926758, "logps/rejected": -2.1250405311584473, "loss": 2.0229, "nll_loss": 1.9703779220581055, "rewards/accuracies": 1.0, "rewards/chosen": -0.18050813674926758, "rewards/margins": 0.031995922327041626, "rewards/rejected": -0.2125040590763092, "step": 131 }, { "epoch": 0.20803782505910165, "grad_norm": 0.49582305550575256, "learning_rate": 3.99987912878359e-06, "log_odds_chosen": 0.1634901463985443, "log_odds_ratio": -0.6200548410415649, "logits/chosen": -0.09574344009160995, "logits/rejected": -0.01687694527208805, "logps/chosen": -1.7705621719360352, "logps/rejected": -1.9035704135894775, "loss": 2.0395, "nll_loss": 1.977489709854126, "rewards/accuracies": 0.75, "rewards/chosen": -0.1770562380552292, "rewards/margins": 0.013300813734531403, "rewards/rejected": -0.19035704433918, "step": 132 }, { "epoch": 0.20961386918833727, "grad_norm": 0.42744600772857666, "learning_rate": 3.999811139794429e-06, "log_odds_chosen": 0.3788083493709564, "log_odds_ratio": -0.5247887372970581, "logits/chosen": -0.06136152893304825, "logits/rejected": -0.26565802097320557, "logps/chosen": -1.8101221323013306, "logps/rejected": -2.135667324066162, "loss": 2.0189, "nll_loss": 1.9664689302444458, "rewards/accuracies": 1.0, "rewards/chosen": -0.18101221323013306, "rewards/margins": 0.032554514706134796, "rewards/rejected": -0.21356670558452606, "step": 133 }, { "epoch": 0.2111899133175729, "grad_norm": 0.4283032715320587, "learning_rate": 3.999728043187288e-06, "log_odds_chosen": 0.35587507486343384, "log_odds_ratio": -0.5397139191627502, "logits/chosen": -0.04053102061152458, "logits/rejected": -0.2791404724121094, "logps/chosen": -1.8541502952575684, "logps/rejected": -2.164419412612915, "loss": 2.0887, "nll_loss": 2.0347094535827637, "rewards/accuracies": 0.875, "rewards/chosen": -0.18541501462459564, "rewards/margins": 0.03102692775428295, "rewards/rejected": -0.21644194424152374, "step": 134 }, { "epoch": 0.2127659574468085, "grad_norm": 0.42146554589271545, "learning_rate": 3.999629839589922e-06, "log_odds_chosen": 0.22164756059646606, "log_odds_ratio": -0.5917123556137085, "logits/chosen": 0.012096976861357689, "logits/rejected": -0.4592039883136749, "logps/chosen": -1.9410529136657715, "logps/rejected": -2.1342885494232178, "loss": 2.1714, "nll_loss": 2.1121792793273926, "rewards/accuracies": 0.875, "rewards/chosen": -0.19410529732704163, "rewards/margins": 0.01932355761528015, "rewards/rejected": -0.21342885494232178, "step": 135 }, { "epoch": 0.21434200157604413, "grad_norm": 0.4338121712207794, "learning_rate": 3.999516529744215e-06, "log_odds_chosen": 0.3292469382286072, "log_odds_ratio": -0.5459690690040588, "logits/chosen": 0.07608616352081299, "logits/rejected": -0.40737682580947876, "logps/chosen": -1.8194847106933594, "logps/rejected": -2.10274600982666, "loss": 2.0403, "nll_loss": 1.9856888055801392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819484829902649, "rewards/margins": 0.02832612209022045, "rewards/rejected": -0.2102746069431305, "step": 136 }, { "epoch": 0.21591804570527975, "grad_norm": 0.3987630605697632, "learning_rate": 3.999388114506166e-06, "log_odds_chosen": 0.4010235667228699, "log_odds_ratio": -0.518801212310791, "logits/chosen": -0.046873223036527634, "logits/rejected": -0.4238424599170685, "logps/chosen": -1.7170600891113281, "logps/rejected": -2.0600481033325195, "loss": 1.931, "nll_loss": 1.8791390657424927, "rewards/accuracies": 1.0, "rewards/chosen": -0.17170599102973938, "rewards/margins": 0.034298814833164215, "rewards/rejected": -0.206004798412323, "step": 137 }, { "epoch": 0.21749408983451538, "grad_norm": 0.4006027579307556, "learning_rate": 3.999244594845892e-06, "log_odds_chosen": 0.25786924362182617, "log_odds_ratio": -0.5790534019470215, "logits/chosen": -0.023555610328912735, "logits/rejected": -0.35695680975914, "logps/chosen": -1.7636680603027344, "logps/rejected": -1.985347032546997, "loss": 1.9912, "nll_loss": 1.9332479238510132, "rewards/accuracies": 0.875, "rewards/chosen": -0.17636680603027344, "rewards/margins": 0.02216789685189724, "rewards/rejected": -0.19853469729423523, "step": 138 }, { "epoch": 0.219070133963751, "grad_norm": 0.4049820303916931, "learning_rate": 3.999085971847616e-06, "log_odds_chosen": 0.24562738835811615, "log_odds_ratio": -0.58580482006073, "logits/chosen": -0.012129198759794235, "logits/rejected": -0.512136697769165, "logps/chosen": -1.8218495845794678, "logps/rejected": -2.0348801612854004, "loss": 2.0389, "nll_loss": 1.980314016342163, "rewards/accuracies": 0.875, "rewards/chosen": -0.18218494951725006, "rewards/margins": 0.02130305953323841, "rewards/rejected": -0.20348800718784332, "step": 139 }, { "epoch": 0.22064617809298662, "grad_norm": 0.3915075361728668, "learning_rate": 3.998912246709658e-06, "log_odds_chosen": 0.3664124310016632, "log_odds_ratio": -0.5369927287101746, "logits/chosen": -0.08141148090362549, "logits/rejected": -0.4445778429508209, "logps/chosen": -1.7210732698440552, "logps/rejected": -2.033111572265625, "loss": 1.9395, "nll_loss": 1.885791301727295, "rewards/accuracies": 0.875, "rewards/chosen": -0.17210730910301208, "rewards/margins": 0.0312038566917181, "rewards/rejected": -0.20331117510795593, "step": 140 }, { "epoch": 0.2222222222222222, "grad_norm": 0.42792966961860657, "learning_rate": 3.9987234207444295e-06, "log_odds_chosen": 0.3271371126174927, "log_odds_ratio": -0.5472476482391357, "logits/chosen": -0.05070888251066208, "logits/rejected": -0.30135494470596313, "logps/chosen": -1.817905068397522, "logps/rejected": -2.099316358566284, "loss": 2.0393, "nll_loss": 1.984588384628296, "rewards/accuracies": 1.0, "rewards/chosen": -0.1817905157804489, "rewards/margins": 0.028141150251030922, "rewards/rejected": -0.2099316567182541, "step": 141 }, { "epoch": 0.22379826635145783, "grad_norm": 0.40370792150497437, "learning_rate": 3.998519495378419e-06, "log_odds_chosen": 0.34052586555480957, "log_odds_ratio": -0.5426381826400757, "logits/chosen": 0.0529387891292572, "logits/rejected": -0.22841407358646393, "logps/chosen": -1.896314024925232, "logps/rejected": -2.194129705429077, "loss": 2.0889, "nll_loss": 2.0345916748046875, "rewards/accuracies": 1.0, "rewards/chosen": -0.189631387591362, "rewards/margins": 0.029781583696603775, "rewards/rejected": -0.21941299736499786, "step": 142 }, { "epoch": 0.22537431048069345, "grad_norm": 0.36527448892593384, "learning_rate": 3.998300472152187e-06, "log_odds_chosen": 0.3244752287864685, "log_odds_ratio": -0.5529875755310059, "logits/chosen": -0.07975448668003082, "logits/rejected": -0.5476751923561096, "logps/chosen": -1.6814563274383545, "logps/rejected": -1.9556654691696167, "loss": 1.9047, "nll_loss": 1.8494292497634888, "rewards/accuracies": 0.875, "rewards/chosen": -0.16814564168453217, "rewards/margins": 0.02742091566324234, "rewards/rejected": -0.1955665647983551, "step": 143 }, { "epoch": 0.22695035460992907, "grad_norm": 0.39946654438972473, "learning_rate": 3.998066352720347e-06, "log_odds_chosen": 0.4114699959754944, "log_odds_ratio": -0.5125808715820312, "logits/chosen": 0.10884220898151398, "logits/rejected": -0.2769298553466797, "logps/chosen": -1.807681679725647, "logps/rejected": -2.163328170776367, "loss": 2.0156, "nll_loss": 1.9643856287002563, "rewards/accuracies": 1.0, "rewards/chosen": -0.1807681769132614, "rewards/margins": 0.035564638674259186, "rewards/rejected": -0.2163328230381012, "step": 144 }, { "epoch": 0.2285263987391647, "grad_norm": 0.3883441388607025, "learning_rate": 3.997817138851562e-06, "log_odds_chosen": 0.3042789101600647, "log_odds_ratio": -0.5641068816184998, "logits/chosen": 0.047148481011390686, "logits/rejected": -0.2664666771888733, "logps/chosen": -1.894091248512268, "logps/rejected": -2.1613922119140625, "loss": 2.1037, "nll_loss": 2.0473220348358154, "rewards/accuracies": 0.875, "rewards/chosen": -0.18940910696983337, "rewards/margins": 0.0267301294952631, "rewards/rejected": -0.21613925695419312, "step": 145 }, { "epoch": 0.23010244286840031, "grad_norm": 0.3664163053035736, "learning_rate": 3.997552832428522e-06, "log_odds_chosen": 0.40218284726142883, "log_odds_ratio": -0.5150843262672424, "logits/chosen": -0.06997128576040268, "logits/rejected": -0.30114221572875977, "logps/chosen": -1.683500051498413, "logps/rejected": -2.0218000411987305, "loss": 1.9221, "nll_loss": 1.8706339597702026, "rewards/accuracies": 1.0, "rewards/chosen": -0.1683500111103058, "rewards/margins": 0.03382997214794159, "rewards/rejected": -0.20217998325824738, "step": 146 }, { "epoch": 0.23167848699763594, "grad_norm": 0.35738253593444824, "learning_rate": 3.9972734354479366e-06, "log_odds_chosen": 0.33341336250305176, "log_odds_ratio": -0.5498776435852051, "logits/chosen": 0.05102141201496124, "logits/rejected": -0.588348925113678, "logps/chosen": -1.6781635284423828, "logps/rejected": -1.957244634628296, "loss": 1.9026, "nll_loss": 1.84757661819458, "rewards/accuracies": 0.875, "rewards/chosen": -0.1678163707256317, "rewards/margins": 0.027908099815249443, "rewards/rejected": -0.1957244724035263, "step": 147 }, { "epoch": 0.23325453112687156, "grad_norm": 0.40279892086982727, "learning_rate": 3.996978950020517e-06, "log_odds_chosen": 0.21276284754276276, "log_odds_ratio": -0.6048003435134888, "logits/chosen": 0.1419999599456787, "logits/rejected": -0.28115570545196533, "logps/chosen": -1.8639137744903564, "logps/rejected": -2.0437545776367188, "loss": 2.0862, "nll_loss": 2.0256824493408203, "rewards/accuracies": 0.75, "rewards/chosen": -0.18639138340950012, "rewards/margins": 0.01798408292233944, "rewards/rejected": -0.2043754756450653, "step": 148 }, { "epoch": 0.23483057525610718, "grad_norm": 0.35266202688217163, "learning_rate": 3.996669378370959e-06, "log_odds_chosen": 0.5316495299339294, "log_odds_ratio": -0.46872708201408386, "logits/chosen": 0.010169305838644505, "logits/rejected": -0.3486666679382324, "logps/chosen": -1.6488699913024902, "logps/rejected": -2.1011159420013428, "loss": 1.8663, "nll_loss": 1.8194066286087036, "rewards/accuracies": 1.0, "rewards/chosen": -0.16488701105117798, "rewards/margins": 0.045224592089653015, "rewards/rejected": -0.2101115882396698, "step": 149 }, { "epoch": 0.2364066193853428, "grad_norm": 0.39699894189834595, "learning_rate": 3.996344722837929e-06, "log_odds_chosen": 0.49894896149635315, "log_odds_ratio": -0.4788591265678406, "logits/chosen": -0.0008517892565578222, "logits/rejected": -0.31570884585380554, "logps/chosen": -1.8067049980163574, "logps/rejected": -2.239896535873413, "loss": 2.0051, "nll_loss": 1.95723295211792, "rewards/accuracies": 1.0, "rewards/chosen": -0.18067049980163574, "rewards/margins": 0.043319158256053925, "rewards/rejected": -0.22398965060710907, "step": 150 }, { "epoch": 0.23798266351457842, "grad_norm": 0.33054688572883606, "learning_rate": 3.996004985874043e-06, "log_odds_chosen": 0.2428944855928421, "log_odds_ratio": -0.5818551778793335, "logits/chosen": 0.11997652053833008, "logits/rejected": -0.2498077005147934, "logps/chosen": -1.698814868927002, "logps/rejected": -1.9012809991836548, "loss": 1.9266, "nll_loss": 1.8684155941009521, "rewards/accuracies": 1.0, "rewards/chosen": -0.16988149285316467, "rewards/margins": 0.020246637985110283, "rewards/rejected": -0.19012810289859772, "step": 151 }, { "epoch": 0.23955870764381404, "grad_norm": 0.3768727481365204, "learning_rate": 3.995650170045855e-06, "log_odds_chosen": 0.28013092279434204, "log_odds_ratio": -0.5691770315170288, "logits/chosen": 0.045284271240234375, "logits/rejected": -0.3556942641735077, "logps/chosen": -1.7669368982315063, "logps/rejected": -2.0047860145568848, "loss": 1.9804, "nll_loss": 1.9234654903411865, "rewards/accuracies": 0.875, "rewards/chosen": -0.17669367790222168, "rewards/margins": 0.023784920573234558, "rewards/rejected": -0.20047861337661743, "step": 152 }, { "epoch": 0.24113475177304963, "grad_norm": 0.3691398501396179, "learning_rate": 3.995280278033825e-06, "log_odds_chosen": 0.20656853914260864, "log_odds_ratio": -0.6042188405990601, "logits/chosen": 0.11217048764228821, "logits/rejected": -0.24175406992435455, "logps/chosen": -1.8632197380065918, "logps/rejected": -2.039140462875366, "loss": 2.0785, "nll_loss": 2.0180578231811523, "rewards/accuracies": 0.75, "rewards/chosen": -0.18632197380065918, "rewards/margins": 0.017592042684555054, "rewards/rejected": -0.20391403138637543, "step": 153 }, { "epoch": 0.24271079590228525, "grad_norm": 0.35842734575271606, "learning_rate": 3.994895312632314e-06, "log_odds_chosen": 0.4981134533882141, "log_odds_ratio": -0.4802461564540863, "logits/chosen": 0.16929033398628235, "logits/rejected": -0.7549564838409424, "logps/chosen": -1.7397829294204712, "logps/rejected": -2.1666393280029297, "loss": 1.9514, "nll_loss": 1.903334140777588, "rewards/accuracies": 1.0, "rewards/chosen": -0.1739783138036728, "rewards/margins": 0.04268564283847809, "rewards/rejected": -0.21666395664215088, "step": 154 }, { "epoch": 0.24428684003152087, "grad_norm": 0.33668240904808044, "learning_rate": 3.994495276749549e-06, "log_odds_chosen": 0.47896263003349304, "log_odds_ratio": -0.48850810527801514, "logits/chosen": 0.07264027744531631, "logits/rejected": -0.33893775939941406, "logps/chosen": -1.7592413425445557, "logps/rejected": -2.172550916671753, "loss": 1.9568, "nll_loss": 1.9079933166503906, "rewards/accuracies": 1.0, "rewards/chosen": -0.175924152135849, "rewards/margins": 0.04133095592260361, "rewards/rejected": -0.217255100607872, "step": 155 }, { "epoch": 0.2458628841607565, "grad_norm": 0.34656822681427, "learning_rate": 3.994080173407612e-06, "log_odds_chosen": 0.23366691172122955, "log_odds_ratio": -0.5879085063934326, "logits/chosen": 0.1407233029603958, "logits/rejected": -0.3865904211997986, "logps/chosen": -1.7929620742797852, "logps/rejected": -1.9902442693710327, "loss": 1.9994, "nll_loss": 1.9406037330627441, "rewards/accuracies": 0.75, "rewards/chosen": -0.17929621040821075, "rewards/margins": 0.01972820609807968, "rewards/rejected": -0.19902442395687103, "step": 156 }, { "epoch": 0.24743892828999212, "grad_norm": 0.34390729665756226, "learning_rate": 3.993650005742409e-06, "log_odds_chosen": 0.3310392498970032, "log_odds_ratio": -0.546916127204895, "logits/chosen": 0.06716110557317734, "logits/rejected": -0.23844899237155914, "logps/chosen": -1.7643970251083374, "logps/rejected": -2.046977996826172, "loss": 1.9724, "nll_loss": 1.9176855087280273, "rewards/accuracies": 1.0, "rewards/chosen": -0.17643971741199493, "rewards/margins": 0.028258096426725388, "rewards/rejected": -0.20469780266284943, "step": 157 }, { "epoch": 0.24901497241922774, "grad_norm": 0.3150049149990082, "learning_rate": 3.993204777003652e-06, "log_odds_chosen": 0.337340384721756, "log_odds_ratio": -0.5445095896720886, "logits/chosen": 0.07445216178894043, "logits/rejected": -0.441582053899765, "logps/chosen": -1.6941965818405151, "logps/rejected": -1.980376124382019, "loss": 1.898, "nll_loss": 1.843545913696289, "rewards/accuracies": 1.0, "rewards/chosen": -0.16941964626312256, "rewards/margins": 0.02861795574426651, "rewards/rejected": -0.19803762435913086, "step": 158 }, { "epoch": 0.25059101654846333, "grad_norm": 0.3337211012840271, "learning_rate": 3.992744490554832e-06, "log_odds_chosen": 0.33180904388427734, "log_odds_ratio": -0.5461182594299316, "logits/chosen": 0.21238459646701813, "logits/rejected": -0.269389808177948, "logps/chosen": -1.7364294528961182, "logps/rejected": -2.0181472301483154, "loss": 1.9565, "nll_loss": 1.9018971920013428, "rewards/accuracies": 1.0, "rewards/chosen": -0.17364296317100525, "rewards/margins": 0.028171781450510025, "rewards/rejected": -0.20181472599506378, "step": 159 }, { "epoch": 0.25216706067769895, "grad_norm": 0.31171178817749023, "learning_rate": 3.992269149873192e-06, "log_odds_chosen": 0.32716354727745056, "log_odds_ratio": -0.547857403755188, "logits/chosen": 0.028445789590477943, "logits/rejected": -0.35958513617515564, "logps/chosen": -1.7089133262634277, "logps/rejected": -1.9816714525222778, "loss": 1.9143, "nll_loss": 1.8594882488250732, "rewards/accuracies": 1.0, "rewards/chosen": -0.17089134454727173, "rewards/margins": 0.02727578952908516, "rewards/rejected": -0.19816714525222778, "step": 160 }, { "epoch": 0.25374310480693457, "grad_norm": 0.30663299560546875, "learning_rate": 3.991778758549705e-06, "log_odds_chosen": 0.195327490568161, "log_odds_ratio": -0.6073517799377441, "logits/chosen": 0.2354024052619934, "logits/rejected": -0.21214549243450165, "logps/chosen": -1.8341491222381592, "logps/rejected": -2.0042359828948975, "loss": 2.023, "nll_loss": 1.9623081684112549, "rewards/accuracies": 0.75, "rewards/chosen": -0.18341490626335144, "rewards/margins": 0.017008693888783455, "rewards/rejected": -0.20042361319065094, "step": 161 }, { "epoch": 0.2553191489361702, "grad_norm": 0.31510376930236816, "learning_rate": 3.9912733202890415e-06, "log_odds_chosen": 0.32201409339904785, "log_odds_ratio": -0.548933744430542, "logits/chosen": 0.10407942533493042, "logits/rejected": -0.4005528688430786, "logps/chosen": -1.7570445537567139, "logps/rejected": -2.0296471118927, "loss": 1.96, "nll_loss": 1.905151128768921, "rewards/accuracies": 1.0, "rewards/chosen": -0.1757044792175293, "rewards/margins": 0.027260230854153633, "rewards/rejected": -0.20296470820903778, "step": 162 }, { "epoch": 0.2568951930654058, "grad_norm": 0.3028022050857544, "learning_rate": 3.990752838909548e-06, "log_odds_chosen": 0.1706855297088623, "log_odds_ratio": -0.6158415079116821, "logits/chosen": 0.1903667449951172, "logits/rejected": -0.2845366299152374, "logps/chosen": -1.8539068698883057, "logps/rejected": -1.9989856481552124, "loss": 2.0475, "nll_loss": 1.9859018325805664, "rewards/accuracies": 0.75, "rewards/chosen": -0.18539069592952728, "rewards/margins": 0.014507867395877838, "rewards/rejected": -0.19989855587482452, "step": 163 }, { "epoch": 0.25847123719464143, "grad_norm": 0.3319096267223358, "learning_rate": 3.990217318343213e-06, "log_odds_chosen": 0.5035750269889832, "log_odds_ratio": -0.4748569130897522, "logits/chosen": 0.15394604206085205, "logits/rejected": -0.5184996128082275, "logps/chosen": -1.921760082244873, "logps/rejected": -2.365640163421631, "loss": 2.1041, "nll_loss": 2.0565714836120605, "rewards/accuracies": 1.0, "rewards/chosen": -0.19217601418495178, "rewards/margins": 0.044387996196746826, "rewards/rejected": -0.2365640103816986, "step": 164 }, { "epoch": 0.26004728132387706, "grad_norm": 0.3104284405708313, "learning_rate": 3.989666762635637e-06, "log_odds_chosen": 0.2950212061405182, "log_odds_ratio": -0.5599942803382874, "logits/chosen": 0.1402396708726883, "logits/rejected": -0.27953556180000305, "logps/chosen": -1.767073631286621, "logps/rejected": -2.017169713973999, "loss": 1.9826, "nll_loss": 1.926632285118103, "rewards/accuracies": 1.0, "rewards/chosen": -0.17670737206935883, "rewards/margins": 0.025009607896208763, "rewards/rejected": -0.20171695947647095, "step": 165 }, { "epoch": 0.2616233254531127, "grad_norm": 0.30276185274124146, "learning_rate": 3.9891011759460056e-06, "log_odds_chosen": 0.3634149432182312, "log_odds_ratio": -0.5344185829162598, "logits/chosen": 0.1359172910451889, "logits/rejected": -0.3355481028556824, "logps/chosen": -1.75217866897583, "logps/rejected": -2.055960178375244, "loss": 1.958, "nll_loss": 1.9045321941375732, "rewards/accuracies": 1.0, "rewards/chosen": -0.1752178817987442, "rewards/margins": 0.03037814423441887, "rewards/rejected": -0.20559601485729218, "step": 166 }, { "epoch": 0.2631993695823483, "grad_norm": 0.2769591212272644, "learning_rate": 3.988520562547057e-06, "log_odds_chosen": 0.3933558762073517, "log_odds_ratio": -0.5260743498802185, "logits/chosen": 0.15155696868896484, "logits/rejected": -0.42779073119163513, "logps/chosen": -1.7146203517913818, "logps/rejected": -2.04492449760437, "loss": 1.9032, "nll_loss": 1.8506273031234741, "rewards/accuracies": 1.0, "rewards/chosen": -0.1714620441198349, "rewards/margins": 0.033030424267053604, "rewards/rejected": -0.2044924646615982, "step": 167 }, { "epoch": 0.2647754137115839, "grad_norm": 0.29901036620140076, "learning_rate": 3.987924926825047e-06, "log_odds_chosen": 0.40397408604621887, "log_odds_ratio": -0.5212303996086121, "logits/chosen": 0.12876684963703156, "logits/rejected": -0.32701608538627625, "logps/chosen": -1.790124773979187, "logps/rejected": -2.1377172470092773, "loss": 2.0015, "nll_loss": 1.9493852853775024, "rewards/accuracies": 0.875, "rewards/chosen": -0.1790124773979187, "rewards/margins": 0.03475925698876381, "rewards/rejected": -0.2137717306613922, "step": 168 }, { "epoch": 0.26635145784081954, "grad_norm": 0.2879612147808075, "learning_rate": 3.98731427327972e-06, "log_odds_chosen": 0.38767939805984497, "log_odds_ratio": -0.5189070105552673, "logits/chosen": 0.09615111351013184, "logits/rejected": -0.552105724811554, "logps/chosen": -1.7201054096221924, "logps/rejected": -2.047496795654297, "loss": 1.9089, "nll_loss": 1.8569788932800293, "rewards/accuracies": 1.0, "rewards/chosen": -0.17201054096221924, "rewards/margins": 0.03273913264274597, "rewards/rejected": -0.2047496885061264, "step": 169 }, { "epoch": 0.26792750197005516, "grad_norm": 0.2887536585330963, "learning_rate": 3.986688606524273e-06, "log_odds_chosen": 0.32007887959480286, "log_odds_ratio": -0.5568875670433044, "logits/chosen": 0.20907297730445862, "logits/rejected": -0.717089831829071, "logps/chosen": -1.7479276657104492, "logps/rejected": -2.0232884883880615, "loss": 1.9428, "nll_loss": 1.8871328830718994, "rewards/accuracies": 0.75, "rewards/chosen": -0.17479278147220612, "rewards/margins": 0.02753606252372265, "rewards/rejected": -0.20232883095741272, "step": 170 }, { "epoch": 0.2695035460992908, "grad_norm": 0.2829509675502777, "learning_rate": 3.986047931285315e-06, "log_odds_chosen": 0.493367999792099, "log_odds_ratio": -0.4808065891265869, "logits/chosen": 0.08703712373971939, "logits/rejected": -0.6400361657142639, "logps/chosen": -1.6581294536590576, "logps/rejected": -2.072545051574707, "loss": 1.848, "nll_loss": 1.7999612092971802, "rewards/accuracies": 1.0, "rewards/chosen": -0.16581295430660248, "rewards/margins": 0.041441574692726135, "rewards/rejected": -0.20725451409816742, "step": 171 }, { "epoch": 0.2710795902285264, "grad_norm": 0.27343836426734924, "learning_rate": 3.985392252402847e-06, "log_odds_chosen": 0.23880890011787415, "log_odds_ratio": -0.5835365653038025, "logits/chosen": 0.08178934454917908, "logits/rejected": -0.431425541639328, "logps/chosen": -1.655697226524353, "logps/rejected": -1.8529317378997803, "loss": 1.8557, "nll_loss": 1.7973511219024658, "rewards/accuracies": 1.0, "rewards/chosen": -0.1655697375535965, "rewards/margins": 0.0197234395891428, "rewards/rejected": -0.18529316782951355, "step": 172 }, { "epoch": 0.272655634357762, "grad_norm": 0.2862318754196167, "learning_rate": 3.984721574830206e-06, "log_odds_chosen": 0.27102503180503845, "log_odds_ratio": -0.5708762407302856, "logits/chosen": 0.14478133618831635, "logits/rejected": -0.510442316532135, "logps/chosen": -1.7600399255752563, "logps/rejected": -1.9898476600646973, "loss": 1.952, "nll_loss": 1.894890308380127, "rewards/accuracies": 1.0, "rewards/chosen": -0.17600399255752563, "rewards/margins": 0.02298077940940857, "rewards/rejected": -0.1989847719669342, "step": 173 }, { "epoch": 0.27423167848699764, "grad_norm": 0.26644009351730347, "learning_rate": 3.984035903634041e-06, "log_odds_chosen": 0.2790209650993347, "log_odds_ratio": -0.5701457262039185, "logits/chosen": 0.1971094161272049, "logits/rejected": -0.5762545466423035, "logps/chosen": -1.7363693714141846, "logps/rejected": -1.9710581302642822, "loss": 1.914, "nll_loss": 1.8569939136505127, "rewards/accuracies": 0.75, "rewards/chosen": -0.17363695800304413, "rewards/margins": 0.023468857631087303, "rewards/rejected": -0.19710581004619598, "step": 174 }, { "epoch": 0.27580772261623326, "grad_norm": 0.2771873474121094, "learning_rate": 3.983335243994273e-06, "log_odds_chosen": 0.37392759323120117, "log_odds_ratio": -0.5302526354789734, "logits/chosen": 0.21129730343818665, "logits/rejected": -0.5397756099700928, "logps/chosen": -1.726618766784668, "logps/rejected": -2.042837381362915, "loss": 1.9182, "nll_loss": 1.8651602268218994, "rewards/accuracies": 0.875, "rewards/chosen": -0.17266185581684113, "rewards/margins": 0.03162187710404396, "rewards/rejected": -0.20428375899791718, "step": 175 }, { "epoch": 0.2773837667454689, "grad_norm": 0.2980581223964691, "learning_rate": 3.982619601204049e-06, "log_odds_chosen": 0.2845771908760071, "log_odds_ratio": -0.565645158290863, "logits/chosen": 0.1371021270751953, "logits/rejected": -0.544967770576477, "logps/chosen": -1.784250259399414, "logps/rejected": -2.0266098976135254, "loss": 1.9752, "nll_loss": 1.9185881614685059, "rewards/accuracies": 1.0, "rewards/chosen": -0.17842502892017365, "rewards/margins": 0.02423596940934658, "rewards/rejected": -0.20266100764274597, "step": 176 }, { "epoch": 0.2789598108747045, "grad_norm": 0.28183117508888245, "learning_rate": 3.9818889806697085e-06, "log_odds_chosen": 0.37239906191825867, "log_odds_ratio": -0.5329021215438843, "logits/chosen": 0.24540948867797852, "logits/rejected": -0.537138044834137, "logps/chosen": -1.824477195739746, "logps/rejected": -2.1465721130371094, "loss": 1.9904, "nll_loss": 1.9371103048324585, "rewards/accuracies": 0.875, "rewards/chosen": -0.18244771659374237, "rewards/margins": 0.032209500670433044, "rewards/rejected": -0.21465720236301422, "step": 177 }, { "epoch": 0.2805358550039401, "grad_norm": 0.2755196988582611, "learning_rate": 3.98114338791074e-06, "log_odds_chosen": 0.4614989757537842, "log_odds_ratio": -0.5006829500198364, "logits/chosen": 0.11655398458242416, "logits/rejected": -0.4944975674152374, "logps/chosen": -1.6953446865081787, "logps/rejected": -2.088918685913086, "loss": 1.89, "nll_loss": 1.8398841619491577, "rewards/accuracies": 1.0, "rewards/chosen": -0.16953447461128235, "rewards/margins": 0.03935740143060684, "rewards/rejected": -0.2088918834924698, "step": 178 }, { "epoch": 0.28211189913317575, "grad_norm": 0.262363463640213, "learning_rate": 3.980382828559742e-06, "log_odds_chosen": 0.37132197618484497, "log_odds_ratio": -0.53047114610672, "logits/chosen": 0.1178860068321228, "logits/rejected": -0.5356312990188599, "logps/chosen": -1.7367223501205444, "logps/rejected": -2.051452159881592, "loss": 1.9086, "nll_loss": 1.8555084466934204, "rewards/accuracies": 0.875, "rewards/chosen": -0.17367224395275116, "rewards/margins": 0.03147297352552414, "rewards/rejected": -0.2051452100276947, "step": 179 }, { "epoch": 0.28368794326241137, "grad_norm": 0.2845723032951355, "learning_rate": 3.9796073083623774e-06, "log_odds_chosen": 0.33220580220222473, "log_odds_ratio": -0.5529859066009521, "logits/chosen": 0.13935938477516174, "logits/rejected": -0.5945897698402405, "logps/chosen": -1.805295705795288, "logps/rejected": -2.08857798576355, "loss": 1.9753, "nll_loss": 1.9200148582458496, "rewards/accuracies": 0.875, "rewards/chosen": -0.18052956461906433, "rewards/margins": 0.028328238055109978, "rewards/rejected": -0.20885780453681946, "step": 180 }, { "epoch": 0.285263987391647, "grad_norm": 0.2820357382297516, "learning_rate": 3.978816833177329e-06, "log_odds_chosen": 0.3633921444416046, "log_odds_ratio": -0.5330725312232971, "logits/chosen": 0.2247752994298935, "logits/rejected": -0.42544880509376526, "logps/chosen": -1.7320586442947388, "logps/rejected": -2.0406675338745117, "loss": 1.91, "nll_loss": 1.8566887378692627, "rewards/accuracies": 1.0, "rewards/chosen": -0.17320585250854492, "rewards/margins": 0.03086087293922901, "rewards/rejected": -0.20406673848628998, "step": 181 }, { "epoch": 0.2868400315208826, "grad_norm": 0.2795039713382721, "learning_rate": 3.978011408976261e-06, "log_odds_chosen": 0.2320139855146408, "log_odds_ratio": -0.5891825556755066, "logits/chosen": 0.19194500148296356, "logits/rejected": -0.5741883516311646, "logps/chosen": -1.750451683998108, "logps/rejected": -1.9439057111740112, "loss": 1.9425, "nll_loss": 1.8835428953170776, "rewards/accuracies": 0.75, "rewards/chosen": -0.1750451624393463, "rewards/margins": 0.019345413893461227, "rewards/rejected": -0.19439058005809784, "step": 182 }, { "epoch": 0.28841607565011823, "grad_norm": 0.2846316993236542, "learning_rate": 3.9771910418437674e-06, "log_odds_chosen": 0.3140004575252533, "log_odds_ratio": -0.54939204454422, "logits/chosen": 0.1681434065103531, "logits/rejected": -0.3001997172832489, "logps/chosen": -1.7808198928833008, "logps/rejected": -2.049476385116577, "loss": 1.956, "nll_loss": 1.9010967016220093, "rewards/accuracies": 1.0, "rewards/chosen": -0.17808198928833008, "rewards/margins": 0.026865659281611443, "rewards/rejected": -0.20494765043258667, "step": 183 }, { "epoch": 0.2899921197793538, "grad_norm": 0.2878468334674835, "learning_rate": 3.976355737977332e-06, "log_odds_chosen": 0.3773011863231659, "log_odds_ratio": -0.5303803086280823, "logits/chosen": 0.18860210478305817, "logits/rejected": -0.5023236274719238, "logps/chosen": -1.7276164293289185, "logps/rejected": -2.046980381011963, "loss": 1.9104, "nll_loss": 1.8573479652404785, "rewards/accuracies": 0.875, "rewards/chosen": -0.17276166379451752, "rewards/margins": 0.03193638473749161, "rewards/rejected": -0.20469802618026733, "step": 184 }, { "epoch": 0.2915681639085894, "grad_norm": 0.2533093988895416, "learning_rate": 3.975505503687274e-06, "log_odds_chosen": 0.3036259412765503, "log_odds_ratio": -0.5556970238685608, "logits/chosen": 0.23977245390415192, "logits/rejected": -0.28354203701019287, "logps/chosen": -1.7183681726455688, "logps/rejected": -1.9746772050857544, "loss": 1.8965, "nll_loss": 1.8409373760223389, "rewards/accuracies": 1.0, "rewards/chosen": -0.17183682322502136, "rewards/margins": 0.025630896911025047, "rewards/rejected": -0.19746771454811096, "step": 185 }, { "epoch": 0.29314420803782504, "grad_norm": 0.2554759383201599, "learning_rate": 3.974640345396708e-06, "log_odds_chosen": 0.458074152469635, "log_odds_ratio": -0.4967803359031677, "logits/chosen": 0.09675043821334839, "logits/rejected": -0.8243634104728699, "logps/chosen": -1.6910256147384644, "logps/rejected": -2.077892780303955, "loss": 1.8667, "nll_loss": 1.8170145750045776, "rewards/accuracies": 1.0, "rewards/chosen": -0.16910257935523987, "rewards/margins": 0.03868672996759415, "rewards/rejected": -0.20778930187225342, "step": 186 }, { "epoch": 0.29472025216706066, "grad_norm": 0.2935222387313843, "learning_rate": 3.9737602696414925e-06, "log_odds_chosen": 0.159349724650383, "log_odds_ratio": -0.6268662214279175, "logits/chosen": 0.21252146363258362, "logits/rejected": -0.6300286650657654, "logps/chosen": -1.78038489818573, "logps/rejected": -1.914283037185669, "loss": 1.9361, "nll_loss": 1.8734400272369385, "rewards/accuracies": 0.625, "rewards/chosen": -0.17803849279880524, "rewards/margins": 0.013389825820922852, "rewards/rejected": -0.1914283186197281, "step": 187 }, { "epoch": 0.2962962962962963, "grad_norm": 0.2445816546678543, "learning_rate": 3.972865283070179e-06, "log_odds_chosen": 0.38453128933906555, "log_odds_ratio": -0.5239704847335815, "logits/chosen": 0.16407378017902374, "logits/rejected": -0.7210597991943359, "logps/chosen": -1.6779310703277588, "logps/rejected": -2.003727436065674, "loss": 1.8617, "nll_loss": 1.8092904090881348, "rewards/accuracies": 1.0, "rewards/chosen": -0.16779311001300812, "rewards/margins": 0.032579630613327026, "rewards/rejected": -0.20037274062633514, "step": 188 }, { "epoch": 0.2978723404255319, "grad_norm": 0.2602602243423462, "learning_rate": 3.971955392443965e-06, "log_odds_chosen": 0.3589805066585541, "log_odds_ratio": -0.535193920135498, "logits/chosen": 0.19359144568443298, "logits/rejected": -0.5294786691665649, "logps/chosen": -1.7104607820510864, "logps/rejected": -2.017254114151001, "loss": 1.8819, "nll_loss": 1.8284274339675903, "rewards/accuracies": 1.0, "rewards/chosen": -0.17104607820510864, "rewards/margins": 0.03067934513092041, "rewards/rejected": -0.20172543823719025, "step": 189 }, { "epoch": 0.2994483845547675, "grad_norm": 0.2570762634277344, "learning_rate": 3.971030604636637e-06, "log_odds_chosen": 0.20797011256217957, "log_odds_ratio": -0.5995408892631531, "logits/chosen": 0.24219730496406555, "logits/rejected": -0.49577596783638, "logps/chosen": -1.726780891418457, "logps/rejected": -1.9002296924591064, "loss": 1.8985, "nll_loss": 1.8385004997253418, "rewards/accuracies": 0.875, "rewards/chosen": -0.17267809808254242, "rewards/margins": 0.017344871535897255, "rewards/rejected": -0.19002296030521393, "step": 190 }, { "epoch": 0.30102442868400314, "grad_norm": 0.27055835723876953, "learning_rate": 3.970090926634526e-06, "log_odds_chosen": 0.29205840826034546, "log_odds_ratio": -0.5609118938446045, "logits/chosen": 0.3021741211414337, "logits/rejected": -0.7257508635520935, "logps/chosen": -1.7830076217651367, "logps/rejected": -2.0307276248931885, "loss": 1.9615, "nll_loss": 1.9053698778152466, "rewards/accuracies": 1.0, "rewards/chosen": -0.17830076813697815, "rewards/margins": 0.024771984666585922, "rewards/rejected": -0.20307274162769318, "step": 191 }, { "epoch": 0.30260047281323876, "grad_norm": 0.27686426043510437, "learning_rate": 3.9691363655364526e-06, "log_odds_chosen": 0.3535913825035095, "log_odds_ratio": -0.5396946668624878, "logits/chosen": 0.19939753413200378, "logits/rejected": -0.527854859828949, "logps/chosen": -1.7547531127929688, "logps/rejected": -2.056049108505249, "loss": 1.9315, "nll_loss": 1.8775546550750732, "rewards/accuracies": 0.875, "rewards/chosen": -0.1754753142595291, "rewards/margins": 0.030129600316286087, "rewards/rejected": -0.2056049108505249, "step": 192 }, { "epoch": 0.3041765169424744, "grad_norm": 0.24814869463443756, "learning_rate": 3.968166928553666e-06, "log_odds_chosen": 0.29018789529800415, "log_odds_ratio": -0.5631506443023682, "logits/chosen": 0.23991963267326355, "logits/rejected": -0.6883436441421509, "logps/chosen": -1.708446741104126, "logps/rejected": -1.9508640766143799, "loss": 1.9016, "nll_loss": 1.8453017473220825, "rewards/accuracies": 0.875, "rewards/chosen": -0.1708446592092514, "rewards/margins": 0.02424173429608345, "rewards/rejected": -0.19508640468120575, "step": 193 }, { "epoch": 0.30575256107171, "grad_norm": 0.2546519637107849, "learning_rate": 3.967182623009804e-06, "log_odds_chosen": 0.42340725660324097, "log_odds_ratio": -0.5090901851654053, "logits/chosen": 0.18889212608337402, "logits/rejected": -0.5412212014198303, "logps/chosen": -1.7333446741104126, "logps/rejected": -2.0966084003448486, "loss": 1.8946, "nll_loss": 1.8436520099639893, "rewards/accuracies": 1.0, "rewards/chosen": -0.17333447933197021, "rewards/margins": 0.03632635623216629, "rewards/rejected": -0.2096608430147171, "step": 194 }, { "epoch": 0.3073286052009456, "grad_norm": 0.27097785472869873, "learning_rate": 3.966183456340821e-06, "log_odds_chosen": 0.19589565694332123, "log_odds_ratio": -0.6020736694335938, "logits/chosen": 0.23710918426513672, "logits/rejected": -0.5323060154914856, "logps/chosen": -1.7581830024719238, "logps/rejected": -1.9214140176773071, "loss": 1.9344, "nll_loss": 1.8741968870162964, "rewards/accuracies": 1.0, "rewards/chosen": -0.1758182942867279, "rewards/margins": 0.01632309891283512, "rewards/rejected": -0.19214141368865967, "step": 195 }, { "epoch": 0.30890464933018125, "grad_norm": 0.2723829448223114, "learning_rate": 3.965169436094947e-06, "log_odds_chosen": 0.35698461532592773, "log_odds_ratio": -0.5402282476425171, "logits/chosen": 0.22953951358795166, "logits/rejected": -0.668543815612793, "logps/chosen": -1.6529057025909424, "logps/rejected": -1.9540297985076904, "loss": 1.8363, "nll_loss": 1.7822985649108887, "rewards/accuracies": 0.875, "rewards/chosen": -0.16529057919979095, "rewards/margins": 0.030112413689494133, "rewards/rejected": -0.19540299475193024, "step": 196 }, { "epoch": 0.31048069345941687, "grad_norm": 0.26106390357017517, "learning_rate": 3.964140569932618e-06, "log_odds_chosen": 0.29018843173980713, "log_odds_ratio": -0.5803290009498596, "logits/chosen": 0.1857983022928238, "logits/rejected": -0.8773932456970215, "logps/chosen": -1.7773064374923706, "logps/rejected": -2.021932601928711, "loss": 1.9428, "nll_loss": 1.884739637374878, "rewards/accuracies": 0.75, "rewards/chosen": -0.17773064970970154, "rewards/margins": 0.02446262538433075, "rewards/rejected": -0.2021932750940323, "step": 197 }, { "epoch": 0.3120567375886525, "grad_norm": 0.25397989153862, "learning_rate": 3.9630968656264285e-06, "log_odds_chosen": 0.42598769068717957, "log_odds_ratio": -0.509061872959137, "logits/chosen": 0.21715356409549713, "logits/rejected": -0.4817379415035248, "logps/chosen": -1.7002034187316895, "logps/rejected": -2.0615005493164062, "loss": 1.8609, "nll_loss": 1.810002326965332, "rewards/accuracies": 1.0, "rewards/chosen": -0.17002034187316895, "rewards/margins": 0.036129724234342575, "rewards/rejected": -0.20615006983280182, "step": 198 }, { "epoch": 0.3136327817178881, "grad_norm": 0.26781895756721497, "learning_rate": 3.962038331061065e-06, "log_odds_chosen": 0.1461435854434967, "log_odds_ratio": -0.6345757246017456, "logits/chosen": 0.28389307856559753, "logits/rejected": -0.6472858786582947, "logps/chosen": -1.7552485466003418, "logps/rejected": -1.883392333984375, "loss": 1.9384, "nll_loss": 1.8749713897705078, "rewards/accuracies": 0.625, "rewards/chosen": -0.17552484571933746, "rewards/margins": 0.012814389541745186, "rewards/rejected": -0.1883392333984375, "step": 199 }, { "epoch": 0.31520882584712373, "grad_norm": 0.26174989342689514, "learning_rate": 3.96096497423325e-06, "log_odds_chosen": 0.26020583510398865, "log_odds_ratio": -0.5759395360946655, "logits/chosen": 0.3217354714870453, "logits/rejected": -0.2811959683895111, "logps/chosen": -1.6606853008270264, "logps/rejected": -1.8771640062332153, "loss": 1.8637, "nll_loss": 1.806121826171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.16606852412223816, "rewards/margins": 0.021647876128554344, "rewards/rejected": -0.18771640956401825, "step": 200 }, { "epoch": 0.31678486997635935, "grad_norm": 0.24145232141017914, "learning_rate": 3.959876803251684e-06, "log_odds_chosen": 0.3890625238418579, "log_odds_ratio": -0.524219274520874, "logits/chosen": 0.1805240660905838, "logits/rejected": -0.5619311928749084, "logps/chosen": -1.6790175437927246, "logps/rejected": -2.00699782371521, "loss": 1.8352, "nll_loss": 1.782778263092041, "rewards/accuracies": 1.0, "rewards/chosen": -0.16790175437927246, "rewards/margins": 0.03279803693294525, "rewards/rejected": -0.2006998062133789, "step": 201 }, { "epoch": 0.31836091410559497, "grad_norm": 0.25001904368400574, "learning_rate": 3.958773826336977e-06, "log_odds_chosen": 0.27317214012145996, "log_odds_ratio": -0.5676698088645935, "logits/chosen": 0.27888408303260803, "logits/rejected": -0.415998250246048, "logps/chosen": -1.7196669578552246, "logps/rejected": -1.9488410949707031, "loss": 1.8963, "nll_loss": 1.8395037651062012, "rewards/accuracies": 1.0, "rewards/chosen": -0.17196668684482574, "rewards/margins": 0.02291741594672203, "rewards/rejected": -0.19488412141799927, "step": 202 }, { "epoch": 0.3199369582348306, "grad_norm": 0.2554585933685303, "learning_rate": 3.957656051821592e-06, "log_odds_chosen": 0.23165369033813477, "log_odds_ratio": -0.5892881751060486, "logits/chosen": 0.3021984100341797, "logits/rejected": -0.6948502063751221, "logps/chosen": -1.7531200647354126, "logps/rejected": -1.9506298303604126, "loss": 1.9028, "nll_loss": 1.8439098596572876, "rewards/accuracies": 0.875, "rewards/chosen": -0.17531201243400574, "rewards/margins": 0.019750984385609627, "rewards/rejected": -0.19506299495697021, "step": 203 }, { "epoch": 0.3215130023640662, "grad_norm": 0.26098933815956116, "learning_rate": 3.956523488149783e-06, "log_odds_chosen": 0.3122096061706543, "log_odds_ratio": -0.5535508394241333, "logits/chosen": 0.3232036828994751, "logits/rejected": -0.47777220606803894, "logps/chosen": -1.8531510829925537, "logps/rejected": -2.122044563293457, "loss": 2.0121, "nll_loss": 1.9567644596099854, "rewards/accuracies": 1.0, "rewards/chosen": -0.1853151172399521, "rewards/margins": 0.02688935585319996, "rewards/rejected": -0.2122044712305069, "step": 204 }, { "epoch": 0.32308904649330183, "grad_norm": 0.2530037462711334, "learning_rate": 3.9553761438775285e-06, "log_odds_chosen": 0.5448867678642273, "log_odds_ratio": -0.4623796045780182, "logits/chosen": 0.1774434596300125, "logits/rejected": -0.509428083896637, "logps/chosen": -1.5850623846054077, "logps/rejected": -2.0427145957946777, "loss": 1.747, "nll_loss": 1.7007546424865723, "rewards/accuracies": 1.0, "rewards/chosen": -0.15850622951984406, "rewards/margins": 0.04576525092124939, "rewards/rejected": -0.20427148044109344, "step": 205 }, { "epoch": 0.32466509062253746, "grad_norm": 0.23750121891498566, "learning_rate": 3.954214027672465e-06, "log_odds_chosen": 0.48206406831741333, "log_odds_ratio": -0.48811841011047363, "logits/chosen": 0.18673132359981537, "logits/rejected": -0.9018082022666931, "logps/chosen": -1.67806077003479, "logps/rejected": -2.0901315212249756, "loss": 1.8315, "nll_loss": 1.7826416492462158, "rewards/accuracies": 1.0, "rewards/chosen": -0.16780607402324677, "rewards/margins": 0.041207075119018555, "rewards/rejected": -0.2090131640434265, "step": 206 }, { "epoch": 0.3262411347517731, "grad_norm": 0.25891193747520447, "learning_rate": 3.953037148313825e-06, "log_odds_chosen": 0.28911662101745605, "log_odds_ratio": -0.5642296075820923, "logits/chosen": 0.27622562646865845, "logits/rejected": -0.581394612789154, "logps/chosen": -1.6668041944503784, "logps/rejected": -1.9111988544464111, "loss": 1.8538, "nll_loss": 1.7974015474319458, "rewards/accuracies": 0.875, "rewards/chosen": -0.16668042540550232, "rewards/margins": 0.024439461529254913, "rewards/rejected": -0.19111987948417664, "step": 207 }, { "epoch": 0.32781717888100864, "grad_norm": 0.2777508497238159, "learning_rate": 3.951845514692371e-06, "log_odds_chosen": 0.39269790053367615, "log_odds_ratio": -0.5250096321105957, "logits/chosen": 0.2141711413860321, "logits/rejected": -0.47686973214149475, "logps/chosen": -1.729377269744873, "logps/rejected": -2.064727783203125, "loss": 1.9033, "nll_loss": 1.8507862091064453, "rewards/accuracies": 1.0, "rewards/chosen": -0.17293773591518402, "rewards/margins": 0.03353503346443176, "rewards/rejected": -0.20647276937961578, "step": 208 }, { "epoch": 0.32939322301024426, "grad_norm": 0.2944742441177368, "learning_rate": 3.950639135810325e-06, "log_odds_chosen": 0.3137563467025757, "log_odds_ratio": -0.5539580583572388, "logits/chosen": 0.3174862265586853, "logits/rejected": -0.45376986265182495, "logps/chosen": -1.850959062576294, "logps/rejected": -2.1199288368225098, "loss": 1.9963, "nll_loss": 1.9408817291259766, "rewards/accuracies": 0.875, "rewards/chosen": -0.1850959062576294, "rewards/margins": 0.02689695730805397, "rewards/rejected": -0.21199287474155426, "step": 209 }, { "epoch": 0.3309692671394799, "grad_norm": 0.3148304224014282, "learning_rate": 3.9494180207813044e-06, "log_odds_chosen": 0.23787932097911835, "log_odds_ratio": -0.5915074348449707, "logits/chosen": 0.30660662055015564, "logits/rejected": -0.7329879403114319, "logps/chosen": -1.717742919921875, "logps/rejected": -1.916438102722168, "loss": 1.8905, "nll_loss": 1.8313184976577759, "rewards/accuracies": 0.875, "rewards/chosen": -0.17177429795265198, "rewards/margins": 0.019869530573487282, "rewards/rejected": -0.1916438341140747, "step": 210 }, { "epoch": 0.3325453112687155, "grad_norm": 0.2588319778442383, "learning_rate": 3.948182178830249e-06, "log_odds_chosen": 0.29779791831970215, "log_odds_ratio": -0.5746859908103943, "logits/chosen": 0.2758365273475647, "logits/rejected": -0.8558934926986694, "logps/chosen": -1.7486122846603394, "logps/rejected": -2.0061802864074707, "loss": 1.904, "nll_loss": 1.8465510606765747, "rewards/accuracies": 0.75, "rewards/chosen": -0.17486125230789185, "rewards/margins": 0.025756794959306717, "rewards/rejected": -0.20061802864074707, "step": 211 }, { "epoch": 0.3341213553979511, "grad_norm": 0.27347490191459656, "learning_rate": 3.9469316192933545e-06, "log_odds_chosen": 0.5377534627914429, "log_odds_ratio": -0.4662073850631714, "logits/chosen": 0.16830803453922272, "logits/rejected": -0.490761935710907, "logps/chosen": -1.6018506288528442, "logps/rejected": -2.05351185798645, "loss": 1.7708, "nll_loss": 1.7242186069488525, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601850688457489, "rewards/margins": 0.04516611993312836, "rewards/rejected": -0.20535118877887726, "step": 212 }, { "epoch": 0.33569739952718675, "grad_norm": 0.2533399164676666, "learning_rate": 3.945666351618001e-06, "log_odds_chosen": 0.2882734537124634, "log_odds_ratio": -0.562584638595581, "logits/chosen": 0.2559697926044464, "logits/rejected": -0.5387569069862366, "logps/chosen": -1.6552236080169678, "logps/rejected": -1.8957173824310303, "loss": 1.823, "nll_loss": 1.7667841911315918, "rewards/accuracies": 1.0, "rewards/chosen": -0.16552235186100006, "rewards/margins": 0.02404937893152237, "rewards/rejected": -0.18957173824310303, "step": 213 }, { "epoch": 0.33727344365642237, "grad_norm": 0.25932711362838745, "learning_rate": 3.9443863853626825e-06, "log_odds_chosen": 0.3108881711959839, "log_odds_ratio": -0.5589509010314941, "logits/chosen": 0.2859116494655609, "logits/rejected": -0.6345618367195129, "logps/chosen": -1.6215680837631226, "logps/rejected": -1.8729337453842163, "loss": 1.8125, "nll_loss": 1.7566089630126953, "rewards/accuracies": 0.875, "rewards/chosen": -0.1621568202972412, "rewards/margins": 0.025136563926935196, "rewards/rejected": -0.1872933804988861, "step": 214 }, { "epoch": 0.338849487785658, "grad_norm": 0.250918984413147, "learning_rate": 3.943091730196931e-06, "log_odds_chosen": 0.2744632363319397, "log_odds_ratio": -0.5730749368667603, "logits/chosen": 0.3415977954864502, "logits/rejected": -0.41705650091171265, "logps/chosen": -1.6209944486618042, "logps/rejected": -1.8511853218078613, "loss": 1.7975, "nll_loss": 1.740174651145935, "rewards/accuracies": 1.0, "rewards/chosen": -0.1620994359254837, "rewards/margins": 0.023019105195999146, "rewards/rejected": -0.18511852622032166, "step": 215 }, { "epoch": 0.3404255319148936, "grad_norm": 0.27474015951156616, "learning_rate": 3.941782395901249e-06, "log_odds_chosen": 0.3491838574409485, "log_odds_ratio": -0.5398426055908203, "logits/chosen": 0.2213699072599411, "logits/rejected": -0.549167811870575, "logps/chosen": -1.69877028465271, "logps/rejected": -1.9932255744934082, "loss": 1.8755, "nll_loss": 1.8215399980545044, "rewards/accuracies": 0.875, "rewards/chosen": -0.16987702250480652, "rewards/margins": 0.02944553829729557, "rewards/rejected": -0.19932258129119873, "step": 216 }, { "epoch": 0.34200157604412923, "grad_norm": 0.25679028034210205, "learning_rate": 3.940458392367032e-06, "log_odds_chosen": 0.3104505240917206, "log_odds_ratio": -0.5546755194664001, "logits/chosen": 0.2595285475254059, "logits/rejected": -0.8533272743225098, "logps/chosen": -1.7510833740234375, "logps/rejected": -2.0138750076293945, "loss": 1.8934, "nll_loss": 1.8379460573196411, "rewards/accuracies": 1.0, "rewards/chosen": -0.17510835826396942, "rewards/margins": 0.026279138401150703, "rewards/rejected": -0.20138749480247498, "step": 217 }, { "epoch": 0.34357762017336485, "grad_norm": 0.2611715495586395, "learning_rate": 3.939119729596493e-06, "log_odds_chosen": 0.35739466547966003, "log_odds_ratio": -0.5335213541984558, "logits/chosen": 0.3199889659881592, "logits/rejected": -0.6550436019897461, "logps/chosen": -1.7533951997756958, "logps/rejected": -2.057891368865967, "loss": 1.9075, "nll_loss": 1.8541964292526245, "rewards/accuracies": 1.0, "rewards/chosen": -0.17533953487873077, "rewards/margins": 0.03044959530234337, "rewards/rejected": -0.20578913390636444, "step": 218 }, { "epoch": 0.34515366430260047, "grad_norm": 0.26392099261283875, "learning_rate": 3.93776641770259e-06, "log_odds_chosen": 0.2948194742202759, "log_odds_ratio": -0.562857449054718, "logits/chosen": 0.28813865780830383, "logits/rejected": -0.7187209129333496, "logps/chosen": -1.7230334281921387, "logps/rejected": -1.973144292831421, "loss": 1.8747, "nll_loss": 1.8184067010879517, "rewards/accuracies": 1.0, "rewards/chosen": -0.17230333387851715, "rewards/margins": 0.025011096149683, "rewards/rejected": -0.19731444120407104, "step": 219 }, { "epoch": 0.3467297084318361, "grad_norm": 0.26364782452583313, "learning_rate": 3.93639846690895e-06, "log_odds_chosen": 0.4254459738731384, "log_odds_ratio": -0.5071126222610474, "logits/chosen": 0.31134355068206787, "logits/rejected": -0.9344062805175781, "logps/chosen": -1.7416664361953735, "logps/rejected": -2.10628080368042, "loss": 1.8893, "nll_loss": 1.8385860919952393, "rewards/accuracies": 1.0, "rewards/chosen": -0.17416664958000183, "rewards/margins": 0.03646141290664673, "rewards/rejected": -0.21062806248664856, "step": 220 }, { "epoch": 0.3483057525610717, "grad_norm": 0.2517707943916321, "learning_rate": 3.9350158875497855e-06, "log_odds_chosen": 0.5023624300956726, "log_odds_ratio": -0.4771158695220947, "logits/chosen": 0.31009235978126526, "logits/rejected": -0.771135687828064, "logps/chosen": -1.6524683237075806, "logps/rejected": -2.075108289718628, "loss": 1.8136, "nll_loss": 1.765908122062683, "rewards/accuracies": 1.0, "rewards/chosen": -0.165246844291687, "rewards/margins": 0.04226400703191757, "rewards/rejected": -0.20751085877418518, "step": 221 }, { "epoch": 0.34988179669030733, "grad_norm": 0.24314001202583313, "learning_rate": 3.933618690069824e-06, "log_odds_chosen": 0.4676588773727417, "log_odds_ratio": -0.4905795454978943, "logits/chosen": 0.17770220339298248, "logits/rejected": -0.7084282636642456, "logps/chosen": -1.6020326614379883, "logps/rejected": -1.9884483814239502, "loss": 1.7733, "nll_loss": 1.7242058515548706, "rewards/accuracies": 1.0, "rewards/chosen": -0.16020327806472778, "rewards/margins": 0.038641560822725296, "rewards/rejected": -0.19884483516216278, "step": 222 }, { "epoch": 0.35145784081954295, "grad_norm": 0.2550933361053467, "learning_rate": 3.932206885024226e-06, "log_odds_chosen": 0.23466067016124725, "log_odds_ratio": -0.5852788090705872, "logits/chosen": 0.22302168607711792, "logits/rejected": -0.6709792017936707, "logps/chosen": -1.7109097242355347, "logps/rejected": -1.9076846837997437, "loss": 1.8546, "nll_loss": 1.7960247993469238, "rewards/accuracies": 0.875, "rewards/chosen": -0.1710909754037857, "rewards/margins": 0.019677501171827316, "rewards/rejected": -0.19076848030090332, "step": 223 }, { "epoch": 0.3530338849487786, "grad_norm": 0.22619383037090302, "learning_rate": 3.930780483078502e-06, "log_odds_chosen": 0.3614213466644287, "log_odds_ratio": -0.5334374904632568, "logits/chosen": 0.1996021270751953, "logits/rejected": -0.7123557329177856, "logps/chosen": -1.6352108716964722, "logps/rejected": -1.9351240396499634, "loss": 1.807, "nll_loss": 1.7537031173706055, "rewards/accuracies": 1.0, "rewards/chosen": -0.16352108120918274, "rewards/margins": 0.02999131567776203, "rewards/rejected": -0.19351240992546082, "step": 224 }, { "epoch": 0.3546099290780142, "grad_norm": 0.2543198764324188, "learning_rate": 3.92933949500844e-06, "log_odds_chosen": 0.39015451073646545, "log_odds_ratio": -0.5326002836227417, "logits/chosen": 0.28930386900901794, "logits/rejected": -0.7000013589859009, "logps/chosen": -1.609468936920166, "logps/rejected": -1.9277451038360596, "loss": 1.7804, "nll_loss": 1.727099895477295, "rewards/accuracies": 0.75, "rewards/chosen": -0.16094687581062317, "rewards/margins": 0.031827617436647415, "rewards/rejected": -0.19277450442314148, "step": 225 }, { "epoch": 0.3561859732072498, "grad_norm": 0.23875081539154053, "learning_rate": 3.9278839317000155e-06, "log_odds_chosen": 0.40506240725517273, "log_odds_ratio": -0.5156422853469849, "logits/chosen": 0.30614709854125977, "logits/rejected": -0.6908950805664062, "logps/chosen": -1.635833978652954, "logps/rejected": -1.9742740392684937, "loss": 1.7931, "nll_loss": 1.7415223121643066, "rewards/accuracies": 1.0, "rewards/chosen": -0.1635833978652954, "rewards/margins": 0.0338440015912056, "rewards/rejected": -0.1974273920059204, "step": 226 }, { "epoch": 0.35776201733648544, "grad_norm": 0.24680069088935852, "learning_rate": 3.926413804149314e-06, "log_odds_chosen": 0.1316530406475067, "log_odds_ratio": -0.6378493309020996, "logits/chosen": 0.28156548738479614, "logits/rejected": -0.6600248217582703, "logps/chosen": -1.7210869789123535, "logps/rejected": -1.8304848670959473, "loss": 1.8788, "nll_loss": 1.815049171447754, "rewards/accuracies": 0.625, "rewards/chosen": -0.1721086949110031, "rewards/margins": 0.010939793661236763, "rewards/rejected": -0.18304848670959473, "step": 227 }, { "epoch": 0.35933806146572106, "grad_norm": 0.2515822649002075, "learning_rate": 3.924929123462447e-06, "log_odds_chosen": 0.2854158878326416, "log_odds_ratio": -0.5666006207466125, "logits/chosen": 0.33641237020492554, "logits/rejected": -0.7290589809417725, "logps/chosen": -1.7506561279296875, "logps/rejected": -1.9922963380813599, "loss": 1.8996, "nll_loss": 1.84292733669281, "rewards/accuracies": 0.875, "rewards/chosen": -0.17506560683250427, "rewards/margins": 0.024164030328392982, "rewards/rejected": -0.1992296427488327, "step": 228 }, { "epoch": 0.3609141055949567, "grad_norm": 0.26427891850471497, "learning_rate": 3.923429900855468e-06, "log_odds_chosen": 0.21544109284877777, "log_odds_ratio": -0.5983462333679199, "logits/chosen": 0.2909620702266693, "logits/rejected": -0.438875675201416, "logps/chosen": -1.8233458995819092, "logps/rejected": -2.004505157470703, "loss": 1.9767, "nll_loss": 1.9168180227279663, "rewards/accuracies": 0.75, "rewards/chosen": -0.18233460187911987, "rewards/margins": 0.018115926533937454, "rewards/rejected": -0.20045052468776703, "step": 229 }, { "epoch": 0.3624901497241923, "grad_norm": 0.241230309009552, "learning_rate": 3.921916147654287e-06, "log_odds_chosen": 0.4597613215446472, "log_odds_ratio": -0.49515679478645325, "logits/chosen": 0.24242226779460907, "logits/rejected": -0.7508520483970642, "logps/chosen": -1.6597049236297607, "logps/rejected": -2.044846773147583, "loss": 1.7986, "nll_loss": 1.7491000890731812, "rewards/accuracies": 1.0, "rewards/chosen": -0.16597048938274384, "rewards/margins": 0.03851418197154999, "rewards/rejected": -0.20448468625545502, "step": 230 }, { "epoch": 0.3640661938534279, "grad_norm": 0.2590387761592865, "learning_rate": 3.920387875294588e-06, "log_odds_chosen": 0.30678579211235046, "log_odds_ratio": -0.5545358657836914, "logits/chosen": 0.2767443060874939, "logits/rejected": -0.5900214910507202, "logps/chosen": -1.6316543817520142, "logps/rejected": -1.8850141763687134, "loss": 1.7885, "nll_loss": 1.7330236434936523, "rewards/accuracies": 1.0, "rewards/chosen": -0.16316545009613037, "rewards/margins": 0.02533598057925701, "rewards/rejected": -0.18850143253803253, "step": 231 }, { "epoch": 0.3656422379826635, "grad_norm": 0.24193865060806274, "learning_rate": 3.918845095321737e-06, "log_odds_chosen": 0.29498574137687683, "log_odds_ratio": -0.5670905113220215, "logits/chosen": 0.23279045522212982, "logits/rejected": -0.841462254524231, "logps/chosen": -1.6641700267791748, "logps/rejected": -1.9037166833877563, "loss": 1.8125, "nll_loss": 1.755805492401123, "rewards/accuracies": 0.875, "rewards/chosen": -0.16641701757907867, "rewards/margins": 0.023954641073942184, "rewards/rejected": -0.19037166237831116, "step": 232 }, { "epoch": 0.3672182821118991, "grad_norm": 0.2594136595726013, "learning_rate": 3.9172878193907004e-06, "log_odds_chosen": 0.3280632793903351, "log_odds_ratio": -0.551827609539032, "logits/chosen": 0.18091654777526855, "logits/rejected": -0.6931451559066772, "logps/chosen": -1.7367044687271118, "logps/rejected": -2.016188859939575, "loss": 1.8981, "nll_loss": 1.8429385423660278, "rewards/accuracies": 0.75, "rewards/chosen": -0.1736704558134079, "rewards/margins": 0.02794845588505268, "rewards/rejected": -0.20161890983581543, "step": 233 }, { "epoch": 0.36879432624113473, "grad_norm": 0.2600264549255371, "learning_rate": 3.915716059265955e-06, "log_odds_chosen": 0.34488314390182495, "log_odds_ratio": -0.543495774269104, "logits/chosen": 0.25614839792251587, "logits/rejected": -0.785013735294342, "logps/chosen": -1.6906204223632812, "logps/rejected": -1.9822051525115967, "loss": 1.8406, "nll_loss": 1.7862180471420288, "rewards/accuracies": 0.75, "rewards/chosen": -0.1690620481967926, "rewards/margins": 0.02915847674012184, "rewards/rejected": -0.19822052121162415, "step": 234 }, { "epoch": 0.37037037037037035, "grad_norm": 0.22871002554893494, "learning_rate": 3.9141298268213966e-06, "log_odds_chosen": 0.4361989200115204, "log_odds_ratio": -0.5028459429740906, "logits/chosen": 0.2875756621360779, "logits/rejected": -0.6794713735580444, "logps/chosen": -1.6101529598236084, "logps/rejected": -1.9754250049591064, "loss": 1.7655, "nll_loss": 1.7152522802352905, "rewards/accuracies": 1.0, "rewards/chosen": -0.16101528704166412, "rewards/margins": 0.03652720898389816, "rewards/rejected": -0.19754250347614288, "step": 235 }, { "epoch": 0.37194641449960597, "grad_norm": 0.261535108089447, "learning_rate": 3.912529134040255e-06, "log_odds_chosen": 0.296495646238327, "log_odds_ratio": -0.5591344833374023, "logits/chosen": 0.27751684188842773, "logits/rejected": -0.5663548707962036, "logps/chosen": -1.6922770738601685, "logps/rejected": -1.9417719841003418, "loss": 1.8539, "nll_loss": 1.7979625463485718, "rewards/accuracies": 1.0, "rewards/chosen": -0.1692277193069458, "rewards/margins": 0.02494947426021099, "rewards/rejected": -0.19417718052864075, "step": 236 }, { "epoch": 0.3735224586288416, "grad_norm": 0.2609320878982544, "learning_rate": 3.910913993014998e-06, "log_odds_chosen": 0.4593818187713623, "log_odds_ratio": -0.4922163188457489, "logits/chosen": 0.3688610792160034, "logits/rejected": -0.7247602939605713, "logps/chosen": -1.687050223350525, "logps/rejected": -2.0763425827026367, "loss": 1.8363, "nll_loss": 1.7870500087738037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1687050312757492, "rewards/margins": 0.038929231464862823, "rewards/rejected": -0.20763425529003143, "step": 237 }, { "epoch": 0.3750985027580772, "grad_norm": 0.25336939096450806, "learning_rate": 3.909284415947246e-06, "log_odds_chosen": 0.4314580261707306, "log_odds_ratio": -0.5070245862007141, "logits/chosen": 0.32482269406318665, "logits/rejected": -0.8647500276565552, "logps/chosen": -1.73568594455719, "logps/rejected": -2.101022243499756, "loss": 1.87, "nll_loss": 1.819305419921875, "rewards/accuracies": 1.0, "rewards/chosen": -0.17356860637664795, "rewards/margins": 0.036533623933792114, "rewards/rejected": -0.21010223031044006, "step": 238 }, { "epoch": 0.37667454688731283, "grad_norm": 0.24124158918857574, "learning_rate": 3.907640415147674e-06, "log_odds_chosen": 0.4658309817314148, "log_odds_ratio": -0.49067068099975586, "logits/chosen": 0.21756987273693085, "logits/rejected": -0.9139111042022705, "logps/chosen": -1.6312497854232788, "logps/rejected": -2.020339250564575, "loss": 1.788, "nll_loss": 1.7389440536499023, "rewards/accuracies": 1.0, "rewards/chosen": -0.16312497854232788, "rewards/margins": 0.038908950984478, "rewards/rejected": -0.20203393697738647, "step": 239 }, { "epoch": 0.37825059101654845, "grad_norm": 0.2780405282974243, "learning_rate": 3.905982003035924e-06, "log_odds_chosen": 0.3844011127948761, "log_odds_ratio": -0.5241358280181885, "logits/chosen": 0.23867914080619812, "logits/rejected": -0.8461136817932129, "logps/chosen": -1.7408959865570068, "logps/rejected": -2.067878246307373, "loss": 1.8775, "nll_loss": 1.825044870376587, "rewards/accuracies": 1.0, "rewards/chosen": -0.17408961057662964, "rewards/margins": 0.03269820660352707, "rewards/rejected": -0.2067878246307373, "step": 240 }, { "epoch": 0.3798266351457841, "grad_norm": 0.24185331165790558, "learning_rate": 3.904309192140506e-06, "log_odds_chosen": 0.278605192899704, "log_odds_ratio": -0.571584165096283, "logits/chosen": 0.24360942840576172, "logits/rejected": -0.4919203519821167, "logps/chosen": -1.6554006338119507, "logps/rejected": -1.8880599737167358, "loss": 1.8086, "nll_loss": 1.7514057159423828, "rewards/accuracies": 0.875, "rewards/chosen": -0.16554008424282074, "rewards/margins": 0.023265928030014038, "rewards/rejected": -0.18880601227283478, "step": 241 }, { "epoch": 0.3814026792750197, "grad_norm": 0.26548993587493896, "learning_rate": 3.90262199509871e-06, "log_odds_chosen": 0.2826857268810272, "log_odds_ratio": -0.5648576021194458, "logits/chosen": 0.22797901928424835, "logits/rejected": -0.5838393568992615, "logps/chosen": -1.6858417987823486, "logps/rejected": -1.9206804037094116, "loss": 1.8445, "nll_loss": 1.7879817485809326, "rewards/accuracies": 1.0, "rewards/chosen": -0.1685841828584671, "rewards/margins": 0.023483866825699806, "rewards/rejected": -0.19206805527210236, "step": 242 }, { "epoch": 0.3829787234042553, "grad_norm": 0.2655971944332123, "learning_rate": 3.900920424656501e-06, "log_odds_chosen": 0.4220297932624817, "log_odds_ratio": -0.51133131980896, "logits/chosen": 0.2471286654472351, "logits/rejected": -0.7664991021156311, "logps/chosen": -1.6024655103683472, "logps/rejected": -1.953262209892273, "loss": 1.7496, "nll_loss": 1.6984376907348633, "rewards/accuracies": 0.875, "rewards/chosen": -0.16024655103683472, "rewards/margins": 0.035079680383205414, "rewards/rejected": -0.19532622396945953, "step": 243 }, { "epoch": 0.38455476753349094, "grad_norm": 0.2719763517379761, "learning_rate": 3.899204493668432e-06, "log_odds_chosen": 0.34531235694885254, "log_odds_ratio": -0.545647382736206, "logits/chosen": 0.2916204631328583, "logits/rejected": -0.6753207445144653, "logps/chosen": -1.760868787765503, "logps/rejected": -2.058903694152832, "loss": 1.8996, "nll_loss": 1.8450438976287842, "rewards/accuracies": 0.75, "rewards/chosen": -0.17608687281608582, "rewards/margins": 0.029803497716784477, "rewards/rejected": -0.20589037239551544, "step": 244 }, { "epoch": 0.38613081166272656, "grad_norm": 0.26759397983551025, "learning_rate": 3.897474215097542e-06, "log_odds_chosen": 0.34969383478164673, "log_odds_ratio": -0.5378445386886597, "logits/chosen": 0.30676940083503723, "logits/rejected": -0.9074739217758179, "logps/chosen": -1.7014522552490234, "logps/rejected": -1.9977072477340698, "loss": 1.8594, "nll_loss": 1.805631160736084, "rewards/accuracies": 1.0, "rewards/chosen": -0.1701452136039734, "rewards/margins": 0.029625503346323967, "rewards/rejected": -0.1997707188129425, "step": 245 }, { "epoch": 0.3877068557919622, "grad_norm": 0.24695803225040436, "learning_rate": 3.8957296020152596e-06, "log_odds_chosen": 0.4582657217979431, "log_odds_ratio": -0.4948864281177521, "logits/chosen": 0.3029223680496216, "logits/rejected": -0.7880758047103882, "logps/chosen": -1.6669402122497559, "logps/rejected": -2.054736614227295, "loss": 1.8203, "nll_loss": 1.7708466053009033, "rewards/accuracies": 1.0, "rewards/chosen": -0.1666940301656723, "rewards/margins": 0.03877962380647659, "rewards/rejected": -0.2054736316204071, "step": 246 }, { "epoch": 0.3892828999211978, "grad_norm": 0.2638940215110779, "learning_rate": 3.893970667601303e-06, "log_odds_chosen": 0.3049715757369995, "log_odds_ratio": -0.5559093952178955, "logits/chosen": 0.20208218693733215, "logits/rejected": -1.0700570344924927, "logps/chosen": -1.7513179779052734, "logps/rejected": -2.0092613697052, "loss": 1.8901, "nll_loss": 1.83454430103302, "rewards/accuracies": 1.0, "rewards/chosen": -0.17513179779052734, "rewards/margins": 0.02579433098435402, "rewards/rejected": -0.20092612504959106, "step": 247 }, { "epoch": 0.3908589440504334, "grad_norm": 0.260355681180954, "learning_rate": 3.892197425143581e-06, "log_odds_chosen": 0.4682731330394745, "log_odds_ratio": -0.48940151929855347, "logits/chosen": 0.255657821893692, "logits/rejected": -0.671928882598877, "logps/chosen": -1.6768563985824585, "logps/rejected": -2.071852922439575, "loss": 1.8127, "nll_loss": 1.763710618019104, "rewards/accuracies": 1.0, "rewards/chosen": -0.16768565773963928, "rewards/margins": 0.03949964791536331, "rewards/rejected": -0.2071852833032608, "step": 248 }, { "epoch": 0.39243498817966904, "grad_norm": 0.2769114673137665, "learning_rate": 3.890409888038094e-06, "log_odds_chosen": 0.35800671577453613, "log_odds_ratio": -0.5353755354881287, "logits/chosen": 0.27717655897140503, "logits/rejected": -0.6495774984359741, "logps/chosen": -1.718163013458252, "logps/rejected": -2.023691415786743, "loss": 1.8664, "nll_loss": 1.812865138053894, "rewards/accuracies": 1.0, "rewards/chosen": -0.17181627452373505, "rewards/margins": 0.030552847310900688, "rewards/rejected": -0.20236913859844208, "step": 249 }, { "epoch": 0.39401103230890466, "grad_norm": 0.2523289620876312, "learning_rate": 3.888608069788831e-06, "log_odds_chosen": 0.3482479751110077, "log_odds_ratio": -0.5456973314285278, "logits/chosen": 0.18573682010173798, "logits/rejected": -0.5659343600273132, "logps/chosen": -1.672256350517273, "logps/rejected": -1.9685872793197632, "loss": 1.8214, "nll_loss": 1.7668198347091675, "rewards/accuracies": 0.875, "rewards/chosen": -0.16722562909126282, "rewards/margins": 0.029633095487952232, "rewards/rejected": -0.1968587189912796, "step": 250 }, { "epoch": 0.3955870764381403, "grad_norm": 0.23957766592502594, "learning_rate": 3.8867919840076685e-06, "log_odds_chosen": 0.4184243083000183, "log_odds_ratio": -0.5095526576042175, "logits/chosen": 0.22751504182815552, "logits/rejected": -0.8524928092956543, "logps/chosen": -1.6037871837615967, "logps/rejected": -1.9516103267669678, "loss": 1.7432, "nll_loss": 1.692209005355835, "rewards/accuracies": 1.0, "rewards/chosen": -0.16037872433662415, "rewards/margins": 0.03478231281042099, "rewards/rejected": -0.19516104459762573, "step": 251 }, { "epoch": 0.3971631205673759, "grad_norm": 0.2576284110546112, "learning_rate": 3.884961644414267e-06, "log_odds_chosen": 0.34130245447158813, "log_odds_ratio": -0.5387775897979736, "logits/chosen": 0.24311238527297974, "logits/rejected": -0.3957245647907257, "logps/chosen": -1.7268153429031372, "logps/rejected": -2.0152997970581055, "loss": 1.848, "nll_loss": 1.794105887413025, "rewards/accuracies": 1.0, "rewards/chosen": -0.1726815402507782, "rewards/margins": 0.028848443180322647, "rewards/rejected": -0.20152997970581055, "step": 252 }, { "epoch": 0.3987391646966115, "grad_norm": 0.24223408102989197, "learning_rate": 3.883117064835967e-06, "log_odds_chosen": 0.3959362506866455, "log_odds_ratio": -0.5255994200706482, "logits/chosen": 0.1985481083393097, "logits/rejected": -0.7414513230323792, "logps/chosen": -1.6674610376358032, "logps/rejected": -2.0037710666656494, "loss": 1.8186, "nll_loss": 1.7660025358200073, "rewards/accuracies": 0.875, "rewards/chosen": -0.1667460948228836, "rewards/margins": 0.033631011843681335, "rewards/rejected": -0.20037710666656494, "step": 253 }, { "epoch": 0.40031520882584715, "grad_norm": 0.26332396268844604, "learning_rate": 3.881258259207688e-06, "log_odds_chosen": 0.3393678367137909, "log_odds_ratio": -0.5432149171829224, "logits/chosen": 0.30877983570098877, "logits/rejected": -0.7882847785949707, "logps/chosen": -1.6376413106918335, "logps/rejected": -1.919863224029541, "loss": 1.7661, "nll_loss": 1.7118016481399536, "rewards/accuracies": 0.875, "rewards/chosen": -0.1637641191482544, "rewards/margins": 0.028222184628248215, "rewards/rejected": -0.1919863075017929, "step": 254 }, { "epoch": 0.40189125295508277, "grad_norm": 0.25575923919677734, "learning_rate": 3.8793852415718165e-06, "log_odds_chosen": 0.4229394495487213, "log_odds_ratio": -0.5054378509521484, "logits/chosen": 0.21786652505397797, "logits/rejected": -1.0462470054626465, "logps/chosen": -1.561218023300171, "logps/rejected": -1.9078154563903809, "loss": 1.694, "nll_loss": 1.6435015201568604, "rewards/accuracies": 1.0, "rewards/chosen": -0.15612182021141052, "rewards/margins": 0.0346597358584404, "rewards/rejected": -0.19078153371810913, "step": 255 }, { "epoch": 0.4034672970843184, "grad_norm": 0.24122343957424164, "learning_rate": 3.877498026078107e-06, "log_odds_chosen": 0.2585289180278778, "log_odds_ratio": -0.5783178210258484, "logits/chosen": 0.22445496916770935, "logits/rejected": -0.9274032711982727, "logps/chosen": -1.7019916772842407, "logps/rejected": -1.9216302633285522, "loss": 1.8365, "nll_loss": 1.7786757946014404, "rewards/accuracies": 0.75, "rewards/chosen": -0.1701991707086563, "rewards/margins": 0.02196386270225048, "rewards/rejected": -0.19216305017471313, "step": 256 }, { "epoch": 0.40504334121355395, "grad_norm": 0.25378501415252686, "learning_rate": 3.875596626983573e-06, "log_odds_chosen": 0.48093894124031067, "log_odds_ratio": -0.4933563768863678, "logits/chosen": 0.24534080922603607, "logits/rejected": -0.61388099193573, "logps/chosen": -1.6251795291900635, "logps/rejected": -2.02630352973938, "loss": 1.7574, "nll_loss": 1.708074688911438, "rewards/accuracies": 1.0, "rewards/chosen": -0.1625179499387741, "rewards/margins": 0.040112413465976715, "rewards/rejected": -0.20263037085533142, "step": 257 }, { "epoch": 0.4066193853427896, "grad_norm": 0.26203182339668274, "learning_rate": 3.873681058652374e-06, "log_odds_chosen": 0.27626505494117737, "log_odds_ratio": -0.5716174244880676, "logits/chosen": 0.2242291271686554, "logits/rejected": -0.705990731716156, "logps/chosen": -1.742016077041626, "logps/rejected": -1.976877212524414, "loss": 1.8763, "nll_loss": 1.819146990776062, "rewards/accuracies": 1.0, "rewards/chosen": -0.1742016226053238, "rewards/margins": 0.023486101999878883, "rewards/rejected": -0.19768773019313812, "step": 258 }, { "epoch": 0.4081954294720252, "grad_norm": 0.26595333218574524, "learning_rate": 3.871751335555715e-06, "log_odds_chosen": 0.26442596316337585, "log_odds_ratio": -0.5714837312698364, "logits/chosen": 0.20778290927410126, "logits/rejected": -0.8641613125801086, "logps/chosen": -1.6800938844680786, "logps/rejected": -1.9005059003829956, "loss": 1.809, "nll_loss": 1.7518802881240845, "rewards/accuracies": 1.0, "rewards/chosen": -0.16800937056541443, "rewards/margins": 0.0220412015914917, "rewards/rejected": -0.19005057215690613, "step": 259 }, { "epoch": 0.4097714736012608, "grad_norm": 0.23981213569641113, "learning_rate": 3.869807472271731e-06, "log_odds_chosen": 0.3423271179199219, "log_odds_ratio": -0.5396283864974976, "logits/chosen": 0.23563726246356964, "logits/rejected": -0.7463378310203552, "logps/chosen": -1.6491752862930298, "logps/rejected": -1.9362696409225464, "loss": 1.8109, "nll_loss": 1.7569705247879028, "rewards/accuracies": 1.0, "rewards/chosen": -0.16491752862930298, "rewards/margins": 0.028709445148706436, "rewards/rejected": -0.19362697005271912, "step": 260 }, { "epoch": 0.41134751773049644, "grad_norm": 0.24014748632907867, "learning_rate": 3.8678494834853826e-06, "log_odds_chosen": 0.34554576873779297, "log_odds_ratio": -0.5426000356674194, "logits/chosen": 0.20350177586078644, "logits/rejected": -0.6877405643463135, "logps/chosen": -1.6484317779541016, "logps/rejected": -1.934998631477356, "loss": 1.7826, "nll_loss": 1.728297472000122, "rewards/accuracies": 0.875, "rewards/chosen": -0.16484320163726807, "rewards/margins": 0.028656674548983574, "rewards/rejected": -0.1934998780488968, "step": 261 }, { "epoch": 0.41292356185973206, "grad_norm": 0.27845731377601624, "learning_rate": 3.865877383988339e-06, "log_odds_chosen": 0.41938862204551697, "log_odds_ratio": -0.5120880603790283, "logits/chosen": 0.20454849302768707, "logits/rejected": -0.6759002208709717, "logps/chosen": -1.6775566339492798, "logps/rejected": -2.0273048877716064, "loss": 1.8329, "nll_loss": 1.7817234992980957, "rewards/accuracies": 1.0, "rewards/chosen": -0.16775566339492798, "rewards/margins": 0.0349748432636261, "rewards/rejected": -0.20273049175739288, "step": 262 }, { "epoch": 0.4144996059889677, "grad_norm": 0.2552221417427063, "learning_rate": 3.863891188678869e-06, "log_odds_chosen": 0.16435928642749786, "log_odds_ratio": -0.6247628927230835, "logits/chosen": 0.17906969785690308, "logits/rejected": -0.8283835649490356, "logps/chosen": -1.5928330421447754, "logps/rejected": -1.7304943799972534, "loss": 1.7381, "nll_loss": 1.6756339073181152, "rewards/accuracies": 0.75, "rewards/chosen": -0.1592833399772644, "rewards/margins": 0.013766113668680191, "rewards/rejected": -0.1730494201183319, "step": 263 }, { "epoch": 0.4160756501182033, "grad_norm": 0.24987082183361053, "learning_rate": 3.8618909125617305e-06, "log_odds_chosen": 0.5285029411315918, "log_odds_ratio": -0.4670139253139496, "logits/chosen": 0.16765527427196503, "logits/rejected": -1.05913507938385, "logps/chosen": -1.5894949436187744, "logps/rejected": -2.031071662902832, "loss": 1.7407, "nll_loss": 1.693982720375061, "rewards/accuracies": 1.0, "rewards/chosen": -0.15894947946071625, "rewards/margins": 0.04415770620107651, "rewards/rejected": -0.20310717821121216, "step": 264 }, { "epoch": 0.4176516942474389, "grad_norm": 0.24382364749908447, "learning_rate": 3.859876570748054e-06, "log_odds_chosen": 0.3537558913230896, "log_odds_ratio": -0.5339797735214233, "logits/chosen": 0.2316458523273468, "logits/rejected": -0.3801443874835968, "logps/chosen": -1.6417627334594727, "logps/rejected": -1.9363051652908325, "loss": 1.792, "nll_loss": 1.7385859489440918, "rewards/accuracies": 1.0, "rewards/chosen": -0.16417627036571503, "rewards/margins": 0.02945425920188427, "rewards/rejected": -0.19363053143024445, "step": 265 }, { "epoch": 0.41922773837667454, "grad_norm": 0.23967225849628448, "learning_rate": 3.857848178455231e-06, "log_odds_chosen": 0.11959446966648102, "log_odds_ratio": -0.6412428617477417, "logits/chosen": 0.2559909522533417, "logits/rejected": -1.0443730354309082, "logps/chosen": -1.656022071838379, "logps/rejected": -1.7599290609359741, "loss": 1.8213, "nll_loss": 1.7571834325790405, "rewards/accuracies": 0.75, "rewards/chosen": -0.16560222208499908, "rewards/margins": 0.010390684939920902, "rewards/rejected": -0.1759929060935974, "step": 266 }, { "epoch": 0.42080378250591016, "grad_norm": 0.27286192774772644, "learning_rate": 3.855805751006794e-06, "log_odds_chosen": 0.4806629419326782, "log_odds_ratio": -0.49105098843574524, "logits/chosen": 0.2224351465702057, "logits/rejected": -1.0106374025344849, "logps/chosen": -1.6792700290679932, "logps/rejected": -2.0848042964935303, "loss": 1.8049, "nll_loss": 1.7558059692382812, "rewards/accuracies": 1.0, "rewards/chosen": -0.16792699694633484, "rewards/margins": 0.04055342823266983, "rewards/rejected": -0.20848044753074646, "step": 267 }, { "epoch": 0.4223798266351458, "grad_norm": 0.22964340448379517, "learning_rate": 3.853749303832308e-06, "log_odds_chosen": 0.34756800532341003, "log_odds_ratio": -0.5398483872413635, "logits/chosen": 0.19423283636569977, "logits/rejected": -0.5895228981971741, "logps/chosen": -1.6056160926818848, "logps/rejected": -1.892835021018982, "loss": 1.7439, "nll_loss": 1.6899384260177612, "rewards/accuracies": 1.0, "rewards/chosen": -0.16056160628795624, "rewards/margins": 0.02872188575565815, "rewards/rejected": -0.18928351998329163, "step": 268 }, { "epoch": 0.4239558707643814, "grad_norm": 0.2715766429901123, "learning_rate": 3.8516788524672495e-06, "log_odds_chosen": 0.2077798843383789, "log_odds_ratio": -0.6008598804473877, "logits/chosen": 0.22610792517662048, "logits/rejected": -0.9308719038963318, "logps/chosen": -1.6854466199874878, "logps/rejected": -1.8608953952789307, "loss": 1.8208, "nll_loss": 1.7607052326202393, "rewards/accuracies": 0.625, "rewards/chosen": -0.16854466497898102, "rewards/margins": 0.017544886097311974, "rewards/rejected": -0.18608956038951874, "step": 269 }, { "epoch": 0.425531914893617, "grad_norm": 0.23874664306640625, "learning_rate": 3.849594412552889e-06, "log_odds_chosen": 0.3525705933570862, "log_odds_ratio": -0.5381215810775757, "logits/chosen": 0.24739238619804382, "logits/rejected": -0.6722042560577393, "logps/chosen": -1.587384581565857, "logps/rejected": -1.871896505355835, "loss": 1.727, "nll_loss": 1.673226237297058, "rewards/accuracies": 1.0, "rewards/chosen": -0.15873846411705017, "rewards/margins": 0.028451191261410713, "rewards/rejected": -0.18718963861465454, "step": 270 }, { "epoch": 0.42710795902285265, "grad_norm": 0.2558384835720062, "learning_rate": 3.847495999836175e-06, "log_odds_chosen": 0.3543975353240967, "log_odds_ratio": -0.5356498956680298, "logits/chosen": 0.2624046504497528, "logits/rejected": -0.9314246773719788, "logps/chosen": -1.666176438331604, "logps/rejected": -1.9635179042816162, "loss": 1.7931, "nll_loss": 1.7394909858703613, "rewards/accuracies": 1.0, "rewards/chosen": -0.16661766171455383, "rewards/margins": 0.029734138399362564, "rewards/rejected": -0.1963518112897873, "step": 271 }, { "epoch": 0.42868400315208827, "grad_norm": 0.23578688502311707, "learning_rate": 3.845383630169613e-06, "log_odds_chosen": 0.3167238235473633, "log_odds_ratio": -0.5535402894020081, "logits/chosen": 0.11884280294179916, "logits/rejected": -0.9415445923805237, "logps/chosen": -1.5773344039916992, "logps/rejected": -1.837627649307251, "loss": 1.7049, "nll_loss": 1.6495603322982788, "rewards/accuracies": 0.875, "rewards/chosen": -0.15773345530033112, "rewards/margins": 0.026029333472251892, "rewards/rejected": -0.18376277387142181, "step": 272 }, { "epoch": 0.4302600472813239, "grad_norm": 0.2651219367980957, "learning_rate": 3.843257319511147e-06, "log_odds_chosen": 0.5530175566673279, "log_odds_ratio": -0.46196067333221436, "logits/chosen": 0.17332234978675842, "logits/rejected": -1.021928310394287, "logps/chosen": -1.5769469738006592, "logps/rejected": -2.0343515872955322, "loss": 1.7041, "nll_loss": 1.6578803062438965, "rewards/accuracies": 1.0, "rewards/chosen": -0.15769469738006592, "rewards/margins": 0.04574044048786163, "rewards/rejected": -0.20343513786792755, "step": 273 }, { "epoch": 0.4318360914105595, "grad_norm": 0.24971356987953186, "learning_rate": 3.841117083924039e-06, "log_odds_chosen": 0.32874542474746704, "log_odds_ratio": -0.5475614666938782, "logits/chosen": 0.16287937760353088, "logits/rejected": -1.0936428308486938, "logps/chosen": -1.6155405044555664, "logps/rejected": -1.888677716255188, "loss": 1.7473, "nll_loss": 1.692505121231079, "rewards/accuracies": 1.0, "rewards/chosen": -0.16155406832695007, "rewards/margins": 0.0273137167096138, "rewards/rejected": -0.18886777758598328, "step": 274 }, { "epoch": 0.43341213553979513, "grad_norm": 0.2504657506942749, "learning_rate": 3.838962939576746e-06, "log_odds_chosen": 0.4173721373081207, "log_odds_ratio": -0.51390141248703, "logits/chosen": 0.13147510588169098, "logits/rejected": -0.8347434401512146, "logps/chosen": -1.5679432153701782, "logps/rejected": -1.9134660959243774, "loss": 1.72, "nll_loss": 1.668624758720398, "rewards/accuracies": 1.0, "rewards/chosen": -0.15679430961608887, "rewards/margins": 0.03455227613449097, "rewards/rejected": -0.19134658575057983, "step": 275 }, { "epoch": 0.43498817966903075, "grad_norm": 0.2594338357448578, "learning_rate": 3.8367949027427985e-06, "log_odds_chosen": 0.3050197958946228, "log_odds_ratio": -0.5566756129264832, "logits/chosen": 0.19317705929279327, "logits/rejected": -0.7772097587585449, "logps/chosen": -1.683677077293396, "logps/rejected": -1.9412704706192017, "loss": 1.8395, "nll_loss": 1.7838023900985718, "rewards/accuracies": 1.0, "rewards/chosen": -0.16836771368980408, "rewards/margins": 0.02575933374464512, "rewards/rejected": -0.19412705302238464, "step": 276 }, { "epoch": 0.43656422379826637, "grad_norm": 0.26758840680122375, "learning_rate": 3.834612989800681e-06, "log_odds_chosen": 0.5289045572280884, "log_odds_ratio": -0.46756529808044434, "logits/chosen": 0.21106746792793274, "logits/rejected": -1.300022006034851, "logps/chosen": -1.6692185401916504, "logps/rejected": -2.1159589290618896, "loss": 1.7865, "nll_loss": 1.7397515773773193, "rewards/accuracies": 1.0, "rewards/chosen": -0.16692185401916504, "rewards/margins": 0.04467405378818512, "rewards/rejected": -0.21159592270851135, "step": 277 }, { "epoch": 0.438140267927502, "grad_norm": 0.37664347887039185, "learning_rate": 3.832417217233703e-06, "log_odds_chosen": 0.3042469024658203, "log_odds_ratio": -0.555115282535553, "logits/chosen": 0.17117249965667725, "logits/rejected": -0.787204921245575, "logps/chosen": -1.6579128503799438, "logps/rejected": -1.9126213788986206, "loss": 1.7924, "nll_loss": 1.7368648052215576, "rewards/accuracies": 1.0, "rewards/chosen": -0.16579128801822662, "rewards/margins": 0.02547084540128708, "rewards/rejected": -0.1912621259689331, "step": 278 }, { "epoch": 0.4397163120567376, "grad_norm": 0.2642952501773834, "learning_rate": 3.8302076016298775e-06, "log_odds_chosen": 0.4446471929550171, "log_odds_ratio": -0.49887239933013916, "logits/chosen": 0.11896737664937973, "logits/rejected": -1.0080976486206055, "logps/chosen": -1.5889983177185059, "logps/rejected": -1.9574190378189087, "loss": 1.7293, "nll_loss": 1.6794246435165405, "rewards/accuracies": 1.0, "rewards/chosen": -0.15889984369277954, "rewards/margins": 0.036842066794633865, "rewards/rejected": -0.1957419216632843, "step": 279 }, { "epoch": 0.44129235618597323, "grad_norm": 0.2610625922679901, "learning_rate": 3.827984159681796e-06, "log_odds_chosen": 0.34331244230270386, "log_odds_ratio": -0.541645884513855, "logits/chosen": 0.10684026777744293, "logits/rejected": -0.8201386332511902, "logps/chosen": -1.6439259052276611, "logps/rejected": -1.9289183616638184, "loss": 1.7716, "nll_loss": 1.7174153327941895, "rewards/accuracies": 0.875, "rewards/chosen": -0.1643926203250885, "rewards/margins": 0.028499236330389977, "rewards/rejected": -0.19289185106754303, "step": 280 }, { "epoch": 0.4428684003152088, "grad_norm": 0.24843472242355347, "learning_rate": 3.825746908186498e-06, "log_odds_chosen": 0.3994872272014618, "log_odds_ratio": -0.5212578773498535, "logits/chosen": 0.2376633733510971, "logits/rejected": -1.0532779693603516, "logps/chosen": -1.6814634799957275, "logps/rejected": -2.018228769302368, "loss": 1.7922, "nll_loss": 1.740039348602295, "rewards/accuracies": 1.0, "rewards/chosen": -0.16814635694026947, "rewards/margins": 0.03367652744054794, "rewards/rejected": -0.201822891831398, "step": 281 }, { "epoch": 0.4444444444444444, "grad_norm": 0.2748637795448303, "learning_rate": 3.823495864045352e-06, "log_odds_chosen": 0.4453040361404419, "log_odds_ratio": -0.4974423050880432, "logits/chosen": 0.20578338205814362, "logits/rejected": -0.5144822001457214, "logps/chosen": -1.6322267055511475, "logps/rejected": -2.0018556118011475, "loss": 1.7625, "nll_loss": 1.7127240896224976, "rewards/accuracies": 1.0, "rewards/chosen": -0.16322267055511475, "rewards/margins": 0.036962881684303284, "rewards/rejected": -0.20018555223941803, "step": 282 }, { "epoch": 0.44602048857368004, "grad_norm": 0.2273551970720291, "learning_rate": 3.8212310442639205e-06, "log_odds_chosen": 0.33169132471084595, "log_odds_ratio": -0.5450347065925598, "logits/chosen": 0.2068972885608673, "logits/rejected": -0.695864200592041, "logps/chosen": -1.5796043872833252, "logps/rejected": -1.8531723022460938, "loss": 1.7234, "nll_loss": 1.6689225435256958, "rewards/accuracies": 0.875, "rewards/chosen": -0.1579604595899582, "rewards/margins": 0.027356795966625214, "rewards/rejected": -0.1853172332048416, "step": 283 }, { "epoch": 0.44759653270291566, "grad_norm": 0.24065622687339783, "learning_rate": 3.8189524659518355e-06, "log_odds_chosen": 0.40192341804504395, "log_odds_ratio": -0.5161522626876831, "logits/chosen": 0.2697753310203552, "logits/rejected": -0.8584244847297668, "logps/chosen": -1.6505751609802246, "logps/rejected": -1.9848883152008057, "loss": 1.7755, "nll_loss": 1.7239261865615845, "rewards/accuracies": 0.875, "rewards/chosen": -0.16505752503871918, "rewards/margins": 0.03343129903078079, "rewards/rejected": -0.19848881661891937, "step": 284 }, { "epoch": 0.4491725768321513, "grad_norm": 0.244610995054245, "learning_rate": 3.816660146322667e-06, "log_odds_chosen": 0.36820292472839355, "log_odds_ratio": -0.5295414328575134, "logits/chosen": 0.1906885802745819, "logits/rejected": -0.700128436088562, "logps/chosen": -1.5470272302627563, "logps/rejected": -1.847809076309204, "loss": 1.6794, "nll_loss": 1.626416802406311, "rewards/accuracies": 1.0, "rewards/chosen": -0.15470272302627563, "rewards/margins": 0.03007819503545761, "rewards/rejected": -0.18478091061115265, "step": 285 }, { "epoch": 0.4507486209613869, "grad_norm": 0.23016570508480072, "learning_rate": 3.814354102693797e-06, "log_odds_chosen": 0.3836941719055176, "log_odds_ratio": -0.5216916799545288, "logits/chosen": 0.21484431624412537, "logits/rejected": -0.9419076442718506, "logps/chosen": -1.6132217645645142, "logps/rejected": -1.9288195371627808, "loss": 1.7432, "nll_loss": 1.6910346746444702, "rewards/accuracies": 1.0, "rewards/chosen": -0.16132217645645142, "rewards/margins": 0.0315597802400589, "rewards/rejected": -0.19288195669651031, "step": 286 }, { "epoch": 0.4523246650906225, "grad_norm": 0.23653464019298553, "learning_rate": 3.8120343524862814e-06, "log_odds_chosen": 0.3473433256149292, "log_odds_ratio": -0.535927951335907, "logits/chosen": 0.18108825385570526, "logits/rejected": -0.833530843257904, "logps/chosen": -1.6525256633758545, "logps/rejected": -1.9398300647735596, "loss": 1.7645, "nll_loss": 1.710868239402771, "rewards/accuracies": 1.0, "rewards/chosen": -0.16525256633758545, "rewards/margins": 0.028730444610118866, "rewards/rejected": -0.1939830183982849, "step": 287 }, { "epoch": 0.45390070921985815, "grad_norm": 0.23629866540431976, "learning_rate": 3.809700913224726e-06, "log_odds_chosen": 0.36637431383132935, "log_odds_ratio": -0.5327513217926025, "logits/chosen": 0.15463852882385254, "logits/rejected": -1.287902593612671, "logps/chosen": -1.5792927742004395, "logps/rejected": -1.8798011541366577, "loss": 1.7056, "nll_loss": 1.6522890329360962, "rewards/accuracies": 1.0, "rewards/chosen": -0.15792928636074066, "rewards/margins": 0.03005082532763481, "rewards/rejected": -0.18798011541366577, "step": 288 }, { "epoch": 0.45547675334909377, "grad_norm": 0.23628529906272888, "learning_rate": 3.8073538025371494e-06, "log_odds_chosen": 0.4771001935005188, "log_odds_ratio": -0.48583680391311646, "logits/chosen": 0.2631601393222809, "logits/rejected": -1.0782270431518555, "logps/chosen": -1.551235318183899, "logps/rejected": -1.9436652660369873, "loss": 1.6886, "nll_loss": 1.6399903297424316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1551235318183899, "rewards/margins": 0.03924299776554108, "rewards/rejected": -0.19436652958393097, "step": 289 }, { "epoch": 0.4570527974783294, "grad_norm": 0.2573354244232178, "learning_rate": 3.804993038154852e-06, "log_odds_chosen": 0.3571556806564331, "log_odds_ratio": -0.5380254983901978, "logits/chosen": 0.18856900930404663, "logits/rejected": -0.9549089670181274, "logps/chosen": -1.5852500200271606, "logps/rejected": -1.88179349899292, "loss": 1.7309, "nll_loss": 1.6771280765533447, "rewards/accuracies": 0.875, "rewards/chosen": -0.1585249900817871, "rewards/margins": 0.029654357582330704, "rewards/rejected": -0.1881793588399887, "step": 290 }, { "epoch": 0.458628841607565, "grad_norm": 0.2339908331632614, "learning_rate": 3.8026186379122816e-06, "log_odds_chosen": 0.2612074613571167, "log_odds_ratio": -0.5736123323440552, "logits/chosen": 0.1575014591217041, "logits/rejected": -0.9461207985877991, "logps/chosen": -1.6632664203643799, "logps/rejected": -1.8798828125, "loss": 1.7845, "nll_loss": 1.72710120677948, "rewards/accuracies": 1.0, "rewards/chosen": -0.16632665693759918, "rewards/margins": 0.02166163921356201, "rewards/rejected": -0.18798828125, "step": 291 }, { "epoch": 0.46020488573680063, "grad_norm": 0.232927143573761, "learning_rate": 3.8002306197468983e-06, "log_odds_chosen": 0.16624774038791656, "log_odds_ratio": -0.6159506440162659, "logits/chosen": 0.15579693019390106, "logits/rejected": -1.0309805870056152, "logps/chosen": -1.6420388221740723, "logps/rejected": -1.777790904045105, "loss": 1.7816, "nll_loss": 1.7199894189834595, "rewards/accuracies": 0.75, "rewards/chosen": -0.16420388221740723, "rewards/margins": 0.013575192540884018, "rewards/rejected": -0.17777907848358154, "step": 292 }, { "epoch": 0.46178092986603625, "grad_norm": 0.26111435890197754, "learning_rate": 3.7978290016990367e-06, "log_odds_chosen": 0.4844040274620056, "log_odds_ratio": -0.4840275049209595, "logits/chosen": 0.12877169251441956, "logits/rejected": -1.0049679279327393, "logps/chosen": -1.6433277130126953, "logps/rejected": -2.0521345138549805, "loss": 1.7643, "nll_loss": 1.7158782482147217, "rewards/accuracies": 1.0, "rewards/chosen": -0.16433276236057281, "rewards/margins": 0.040880680084228516, "rewards/rejected": -0.20521345734596252, "step": 293 }, { "epoch": 0.46335697399527187, "grad_norm": 0.2526698112487793, "learning_rate": 3.795413801911776e-06, "log_odds_chosen": 0.5316009521484375, "log_odds_ratio": -0.46823519468307495, "logits/chosen": 0.1174880787730217, "logits/rejected": -1.0741727352142334, "logps/chosen": -1.5681180953979492, "logps/rejected": -2.0108184814453125, "loss": 1.7117, "nll_loss": 1.6648805141448975, "rewards/accuracies": 1.0, "rewards/chosen": -0.15681181848049164, "rewards/margins": 0.04427003860473633, "rewards/rejected": -0.20108187198638916, "step": 294 }, { "epoch": 0.4649330181245075, "grad_norm": 0.26307472586631775, "learning_rate": 3.7929850386307965e-06, "log_odds_chosen": 0.32403603196144104, "log_odds_ratio": -0.549573540687561, "logits/chosen": 0.2209596186876297, "logits/rejected": -0.8648887872695923, "logps/chosen": -1.775384783744812, "logps/rejected": -2.0520851612091064, "loss": 1.8883, "nll_loss": 1.8333498239517212, "rewards/accuracies": 0.875, "rewards/chosen": -0.17753848433494568, "rewards/margins": 0.02767005003988743, "rewards/rejected": -0.20520853996276855, "step": 295 }, { "epoch": 0.4665090622537431, "grad_norm": 0.24105559289455414, "learning_rate": 3.790542730204245e-06, "log_odds_chosen": 0.391379177570343, "log_odds_ratio": -0.5190234184265137, "logits/chosen": 0.17468759417533875, "logits/rejected": -1.050643801689148, "logps/chosen": -1.705775260925293, "logps/rejected": -2.037628412246704, "loss": 1.8141, "nll_loss": 1.7621614933013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.1705775409936905, "rewards/margins": 0.033185333013534546, "rewards/rejected": -0.20376285910606384, "step": 296 }, { "epoch": 0.46808510638297873, "grad_norm": 0.2376202493906021, "learning_rate": 3.7880868950825935e-06, "log_odds_chosen": 0.40684062242507935, "log_odds_ratio": -0.510847806930542, "logits/chosen": 0.13849994540214539, "logits/rejected": -1.200305700302124, "logps/chosen": -1.6610805988311768, "logps/rejected": -2.001574754714966, "loss": 1.7604, "nll_loss": 1.7093130350112915, "rewards/accuracies": 1.0, "rewards/chosen": -0.16610805690288544, "rewards/margins": 0.034049421548843384, "rewards/rejected": -0.20015747845172882, "step": 297 }, { "epoch": 0.46966115051221435, "grad_norm": 0.2616525888442993, "learning_rate": 3.7856175518185058e-06, "log_odds_chosen": 0.3999539613723755, "log_odds_ratio": -0.5190368294715881, "logits/chosen": 0.015357280150055885, "logits/rejected": -1.1991338729858398, "logps/chosen": -1.6416277885437012, "logps/rejected": -1.9745447635650635, "loss": 1.7682, "nll_loss": 1.7162597179412842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1641627848148346, "rewards/margins": 0.033291686326265335, "rewards/rejected": -0.19745448231697083, "step": 298 }, { "epoch": 0.47123719464145, "grad_norm": 0.23752158880233765, "learning_rate": 3.7831347190666883e-06, "log_odds_chosen": 0.4781672954559326, "log_odds_ratio": -0.488000750541687, "logits/chosen": 0.08399657905101776, "logits/rejected": -1.1212432384490967, "logps/chosen": -1.6346526145935059, "logps/rejected": -2.035963773727417, "loss": 1.7627, "nll_loss": 1.713881015777588, "rewards/accuracies": 0.875, "rewards/chosen": -0.16346527636051178, "rewards/margins": 0.0401310995221138, "rewards/rejected": -0.20359636843204498, "step": 299 }, { "epoch": 0.4728132387706856, "grad_norm": 0.25213325023651123, "learning_rate": 3.780638415583759e-06, "log_odds_chosen": 0.2852388620376587, "log_odds_ratio": -0.5705251097679138, "logits/chosen": 0.21283775568008423, "logits/rejected": -0.935249924659729, "logps/chosen": -1.6187589168548584, "logps/rejected": -1.8579552173614502, "loss": 1.7571, "nll_loss": 1.7000482082366943, "rewards/accuracies": 0.75, "rewards/chosen": -0.1618758887052536, "rewards/margins": 0.023919638246297836, "rewards/rejected": -0.18579553067684174, "step": 300 }, { "epoch": 0.4743892828999212, "grad_norm": 0.24251310527324677, "learning_rate": 3.7781286602280967e-06, "log_odds_chosen": 0.17071868479251862, "log_odds_ratio": -0.6216680407524109, "logits/chosen": 0.14868459105491638, "logits/rejected": -1.1415449380874634, "logps/chosen": -1.5997627973556519, "logps/rejected": -1.7335293292999268, "loss": 1.7454, "nll_loss": 1.6831833124160767, "rewards/accuracies": 0.75, "rewards/chosen": -0.1599762886762619, "rewards/margins": 0.013376658782362938, "rewards/rejected": -0.1733529418706894, "step": 301 }, { "epoch": 0.47596532702915684, "grad_norm": 0.22967545688152313, "learning_rate": 3.7756054719597044e-06, "log_odds_chosen": 0.26898688077926636, "log_odds_ratio": -0.5931567549705505, "logits/chosen": 0.033872295171022415, "logits/rejected": -1.1282514333724976, "logps/chosen": -1.6345136165618896, "logps/rejected": -1.8664180040359497, "loss": 1.7516, "nll_loss": 1.6922358274459839, "rewards/accuracies": 0.75, "rewards/chosen": -0.16345134377479553, "rewards/margins": 0.023190462961792946, "rewards/rejected": -0.18664182722568512, "step": 302 }, { "epoch": 0.47754137115839246, "grad_norm": 0.2190561145544052, "learning_rate": 3.773068869840066e-06, "log_odds_chosen": 0.19828103482723236, "log_odds_ratio": -0.6074354648590088, "logits/chosen": 0.2555277347564697, "logits/rejected": -1.1666932106018066, "logps/chosen": -1.6585826873779297, "logps/rejected": -1.8199939727783203, "loss": 1.7779, "nll_loss": 1.7171512842178345, "rewards/accuracies": 0.75, "rewards/chosen": -0.16585825383663177, "rewards/margins": 0.016141142696142197, "rewards/rejected": -0.18199938535690308, "step": 303 }, { "epoch": 0.4791174152876281, "grad_norm": 0.21842867136001587, "learning_rate": 3.770518873031997e-06, "log_odds_chosen": 0.44869300723075867, "log_odds_ratio": -0.4964669644832611, "logits/chosen": 0.03254036605358124, "logits/rejected": -0.8346494436264038, "logps/chosen": -1.5328748226165771, "logps/rejected": -1.8997446298599243, "loss": 1.674, "nll_loss": 1.6243867874145508, "rewards/accuracies": 1.0, "rewards/chosen": -0.15328750014305115, "rewards/margins": 0.03668695688247681, "rewards/rejected": -0.18997445702552795, "step": 304 }, { "epoch": 0.4806934594168637, "grad_norm": 0.253165602684021, "learning_rate": 3.7679555007995065e-06, "log_odds_chosen": 0.41193148493766785, "log_odds_ratio": -0.5260132551193237, "logits/chosen": 0.14702853560447693, "logits/rejected": -1.0447622537612915, "logps/chosen": -1.6558337211608887, "logps/rejected": -2.0012173652648926, "loss": 1.7843, "nll_loss": 1.731735110282898, "rewards/accuracies": 0.875, "rewards/chosen": -0.16558335721492767, "rewards/margins": 0.0345383882522583, "rewards/rejected": -0.20012176036834717, "step": 305 }, { "epoch": 0.48226950354609927, "grad_norm": 0.24374498426914215, "learning_rate": 3.7653787725076464e-06, "log_odds_chosen": 0.2701460123062134, "log_odds_ratio": -0.5720412731170654, "logits/chosen": 0.1489667445421219, "logits/rejected": -0.9628247618675232, "logps/chosen": -1.6143256425857544, "logps/rejected": -1.8385647535324097, "loss": 1.7374, "nll_loss": 1.680199146270752, "rewards/accuracies": 0.875, "rewards/chosen": -0.16143256425857544, "rewards/margins": 0.022423917427659035, "rewards/rejected": -0.18385647237300873, "step": 306 }, { "epoch": 0.4838455476753349, "grad_norm": 0.2575761675834656, "learning_rate": 3.7627887076223685e-06, "log_odds_chosen": 0.3698280155658722, "log_odds_ratio": -0.5293493270874023, "logits/chosen": 0.17162802815437317, "logits/rejected": -0.7795068621635437, "logps/chosen": -1.6772853136062622, "logps/rejected": -1.9871926307678223, "loss": 1.783, "nll_loss": 1.7300152778625488, "rewards/accuracies": 1.0, "rewards/chosen": -0.16772854328155518, "rewards/margins": 0.03099072352051735, "rewards/rejected": -0.19871927797794342, "step": 307 }, { "epoch": 0.4854215918045705, "grad_norm": 0.2139917016029358, "learning_rate": 3.7601853257103765e-06, "log_odds_chosen": 0.22644855082035065, "log_odds_ratio": -0.5928743481636047, "logits/chosen": 0.06793497502803802, "logits/rejected": -1.0903844833374023, "logps/chosen": -1.582783579826355, "logps/rejected": -1.7714552879333496, "loss": 1.6918, "nll_loss": 1.632529616355896, "rewards/accuracies": 0.875, "rewards/chosen": -0.15827836096286774, "rewards/margins": 0.018867187201976776, "rewards/rejected": -0.17714554071426392, "step": 308 }, { "epoch": 0.48699763593380613, "grad_norm": 0.21651345491409302, "learning_rate": 3.7575686464389767e-06, "log_odds_chosen": 0.3462998867034912, "log_odds_ratio": -0.5444170832633972, "logits/chosen": 0.10276569426059723, "logits/rejected": -1.1056041717529297, "logps/chosen": -1.5598326921463013, "logps/rejected": -1.8447059392929077, "loss": 1.6921, "nll_loss": 1.6376224756240845, "rewards/accuracies": 0.875, "rewards/chosen": -0.15598325431346893, "rewards/margins": 0.02848733589053154, "rewards/rejected": -0.18447057902812958, "step": 309 }, { "epoch": 0.48857368006304175, "grad_norm": 0.2089070826768875, "learning_rate": 3.7549386895759315e-06, "log_odds_chosen": 0.38229963183403015, "log_odds_ratio": -0.5227848291397095, "logits/chosen": 0.06746693700551987, "logits/rejected": -1.028963327407837, "logps/chosen": -1.5149016380310059, "logps/rejected": -1.8246971368789673, "loss": 1.6366, "nll_loss": 1.5843473672866821, "rewards/accuracies": 1.0, "rewards/chosen": -0.15149016678333282, "rewards/margins": 0.030979545786976814, "rewards/rejected": -0.1824697107076645, "step": 310 }, { "epoch": 0.49014972419227737, "grad_norm": 0.20956042408943176, "learning_rate": 3.7522954749893086e-06, "log_odds_chosen": 0.5883792042732239, "log_odds_ratio": -0.45304739475250244, "logits/chosen": -0.004524541087448597, "logits/rejected": -1.15907621383667, "logps/chosen": -1.5441385507583618, "logps/rejected": -2.0315380096435547, "loss": 1.6764, "nll_loss": 1.631089448928833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544138640165329, "rewards/margins": 0.048739951103925705, "rewards/rejected": -0.2031538188457489, "step": 311 }, { "epoch": 0.491725768321513, "grad_norm": 0.23161104321479797, "learning_rate": 3.749639022647332e-06, "log_odds_chosen": 0.35890865325927734, "log_odds_ratio": -0.5340113639831543, "logits/chosen": 0.06597714126110077, "logits/rejected": -1.0388400554656982, "logps/chosen": -1.6328809261322021, "logps/rejected": -1.932662010192871, "loss": 1.7561, "nll_loss": 1.7027454376220703, "rewards/accuracies": 1.0, "rewards/chosen": -0.16328811645507812, "rewards/margins": 0.02997809275984764, "rewards/rejected": -0.19326619803905487, "step": 312 }, { "epoch": 0.4933018124507486, "grad_norm": 0.20770005881786346, "learning_rate": 3.7469693526182304e-06, "log_odds_chosen": 0.513086199760437, "log_odds_ratio": -0.485725075006485, "logits/chosen": 0.0205635167658329, "logits/rejected": -1.4314470291137695, "logps/chosen": -1.581476092338562, "logps/rejected": -2.0070858001708984, "loss": 1.6904, "nll_loss": 1.6417973041534424, "rewards/accuracies": 0.875, "rewards/chosen": -0.15814761817455292, "rewards/margins": 0.04256095737218857, "rewards/rejected": -0.2007085680961609, "step": 313 }, { "epoch": 0.49487785657998423, "grad_norm": 0.21436423063278198, "learning_rate": 3.744286485070085e-06, "log_odds_chosen": 0.5086329579353333, "log_odds_ratio": -0.47319746017456055, "logits/chosen": 0.03913354501128197, "logits/rejected": -1.1685231924057007, "logps/chosen": -1.5282073020935059, "logps/rejected": -1.9466618299484253, "loss": 1.6421, "nll_loss": 1.594788908958435, "rewards/accuracies": 1.0, "rewards/chosen": -0.15282073616981506, "rewards/margins": 0.041845470666885376, "rewards/rejected": -0.19466620683670044, "step": 314 }, { "epoch": 0.49645390070921985, "grad_norm": 0.23891815543174744, "learning_rate": 3.7415904402706795e-06, "log_odds_chosen": 0.4192996621131897, "log_odds_ratio": -0.514187216758728, "logits/chosen": 0.06727111339569092, "logits/rejected": -1.1843537092208862, "logps/chosen": -1.7258471250534058, "logps/rejected": -2.082641363143921, "loss": 1.835, "nll_loss": 1.7836283445358276, "rewards/accuracies": 1.0, "rewards/chosen": -0.17258471250534058, "rewards/margins": 0.03567943722009659, "rewards/rejected": -0.20826414227485657, "step": 315 }, { "epoch": 0.4980299448384555, "grad_norm": 0.21085584163665771, "learning_rate": 3.7388812385873435e-06, "log_odds_chosen": 0.3644852042198181, "log_odds_ratio": -0.5296192169189453, "logits/chosen": 0.030755888670682907, "logits/rejected": -1.189257264137268, "logps/chosen": -1.5624669790267944, "logps/rejected": -1.8610371351242065, "loss": 1.6768, "nll_loss": 1.6238601207733154, "rewards/accuracies": 1.0, "rewards/chosen": -0.15624670684337616, "rewards/margins": 0.02985702082514763, "rewards/rejected": -0.1861037313938141, "step": 316 }, { "epoch": 0.4996059889676911, "grad_norm": 0.23275373876094818, "learning_rate": 3.7361589004868033e-06, "log_odds_chosen": 0.5013652443885803, "log_odds_ratio": -0.479345440864563, "logits/chosen": 0.14082355797290802, "logits/rejected": -1.2382307052612305, "logps/chosen": -1.6419178247451782, "logps/rejected": -2.0661187171936035, "loss": 1.7418, "nll_loss": 1.6938456296920776, "rewards/accuracies": 1.0, "rewards/chosen": -0.16419179737567902, "rewards/margins": 0.042420096695423126, "rewards/rejected": -0.20661188662052155, "step": 317 }, { "epoch": 0.5011820330969267, "grad_norm": 0.2070867121219635, "learning_rate": 3.733423446535022e-06, "log_odds_chosen": 0.5261310338973999, "log_odds_ratio": -0.47375503182411194, "logits/chosen": 0.12559077143669128, "logits/rejected": -1.3008148670196533, "logps/chosen": -1.634326457977295, "logps/rejected": -2.0824835300445557, "loss": 1.7359, "nll_loss": 1.688564658164978, "rewards/accuracies": 1.0, "rewards/chosen": -0.16343267261981964, "rewards/margins": 0.044815681874752045, "rewards/rejected": -0.20824836194515228, "step": 318 }, { "epoch": 0.5027580772261623, "grad_norm": 0.20222921669483185, "learning_rate": 3.7306748973970476e-06, "log_odds_chosen": 0.39474886655807495, "log_odds_ratio": -0.5250571370124817, "logits/chosen": 0.03667742758989334, "logits/rejected": -1.4291538000106812, "logps/chosen": -1.5048575401306152, "logps/rejected": -1.823891282081604, "loss": 1.6483, "nll_loss": 1.5958224534988403, "rewards/accuracies": 0.875, "rewards/chosen": -0.15048575401306152, "rewards/margins": 0.03190337494015694, "rewards/rejected": -0.18238912522792816, "step": 319 }, { "epoch": 0.5043341213553979, "grad_norm": 0.27620604634284973, "learning_rate": 3.7279132738368564e-06, "log_odds_chosen": 0.18474048376083374, "log_odds_ratio": -0.6092777252197266, "logits/chosen": 0.1035664826631546, "logits/rejected": -1.2585889101028442, "logps/chosen": -1.7019753456115723, "logps/rejected": -1.8543686866760254, "loss": 1.8124, "nll_loss": 1.7514902353286743, "rewards/accuracies": 0.75, "rewards/chosen": -0.17019754648208618, "rewards/margins": 0.015239320695400238, "rewards/rejected": -0.18543685972690582, "step": 320 }, { "epoch": 0.5059101654846335, "grad_norm": 0.21710968017578125, "learning_rate": 3.725138596717195e-06, "log_odds_chosen": 0.44385021924972534, "log_odds_ratio": -0.4991372227668762, "logits/chosen": 0.023607883602380753, "logits/rejected": -1.283747911453247, "logps/chosen": -1.5559569597244263, "logps/rejected": -1.9218320846557617, "loss": 1.6659, "nll_loss": 1.6159745454788208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15559569001197815, "rewards/margins": 0.03658752888441086, "rewards/rejected": -0.1921832263469696, "step": 321 }, { "epoch": 0.5074862096138691, "grad_norm": 0.20364168286323547, "learning_rate": 3.7223508869994244e-06, "log_odds_chosen": 0.4654800295829773, "log_odds_ratio": -0.4937525987625122, "logits/chosen": 0.04612987861037254, "logits/rejected": -1.3605108261108398, "logps/chosen": -1.6008849143981934, "logps/rejected": -1.9879066944122314, "loss": 1.7137, "nll_loss": 1.6643142700195312, "rewards/accuracies": 1.0, "rewards/chosen": -0.16008850932121277, "rewards/margins": 0.038702160120010376, "rewards/rejected": -0.19879068434238434, "step": 322 }, { "epoch": 0.5090622537431048, "grad_norm": 0.23145321011543274, "learning_rate": 3.7195501657433594e-06, "log_odds_chosen": 0.39249351620674133, "log_odds_ratio": -0.5205338597297668, "logits/chosen": -0.0046775080263614655, "logits/rejected": -1.3155083656311035, "logps/chosen": -1.6052358150482178, "logps/rejected": -1.9290968179702759, "loss": 1.7238, "nll_loss": 1.6717445850372314, "rewards/accuracies": 1.0, "rewards/chosen": -0.16052357852458954, "rewards/margins": 0.03238610923290253, "rewards/rejected": -0.19290968775749207, "step": 323 }, { "epoch": 0.5106382978723404, "grad_norm": 0.19913478195667267, "learning_rate": 3.716736454107111e-06, "log_odds_chosen": 0.5085100531578064, "log_odds_ratio": -0.47588035464286804, "logits/chosen": 0.002479949500411749, "logits/rejected": -1.0518862009048462, "logps/chosen": -1.5002402067184448, "logps/rejected": -1.9133625030517578, "loss": 1.6117, "nll_loss": 1.5641216039657593, "rewards/accuracies": 1.0, "rewards/chosen": -0.15002401173114777, "rewards/margins": 0.04131225496530533, "rewards/rejected": -0.1913362741470337, "step": 324 }, { "epoch": 0.512214342001576, "grad_norm": 0.21510443091392517, "learning_rate": 3.7139097733469277e-06, "log_odds_chosen": 0.5286773443222046, "log_odds_ratio": -0.46826720237731934, "logits/chosen": 0.02427489310503006, "logits/rejected": -1.2997654676437378, "logps/chosen": -1.54619562625885, "logps/rejected": -1.982952356338501, "loss": 1.6601, "nll_loss": 1.6132692098617554, "rewards/accuracies": 1.0, "rewards/chosen": -0.15461957454681396, "rewards/margins": 0.04367566481232643, "rewards/rejected": -0.1982952207326889, "step": 325 }, { "epoch": 0.5137903861308116, "grad_norm": 0.22013559937477112, "learning_rate": 3.711070144817032e-06, "log_odds_chosen": 0.4152149558067322, "log_odds_ratio": -0.512770414352417, "logits/chosen": 0.005936339497566223, "logits/rejected": -1.122101068496704, "logps/chosen": -1.633888840675354, "logps/rejected": -1.9839468002319336, "loss": 1.754, "nll_loss": 1.7027238607406616, "rewards/accuracies": 1.0, "rewards/chosen": -0.16338886320590973, "rewards/margins": 0.03500579297542572, "rewards/rejected": -0.19839467108249664, "step": 326 }, { "epoch": 0.5153664302600472, "grad_norm": 0.24476923048496246, "learning_rate": 3.708217589969461e-06, "log_odds_chosen": 0.5117456912994385, "log_odds_ratio": -0.4778427481651306, "logits/chosen": -0.02480306476354599, "logits/rejected": -1.3842414617538452, "logps/chosen": -1.6081162691116333, "logps/rejected": -2.0358729362487793, "loss": 1.7307, "nll_loss": 1.682942509651184, "rewards/accuracies": 1.0, "rewards/chosen": -0.160811647772789, "rewards/margins": 0.042775679379701614, "rewards/rejected": -0.20358730852603912, "step": 327 }, { "epoch": 0.5169424743892829, "grad_norm": 0.2023211419582367, "learning_rate": 3.705352130353904e-06, "log_odds_chosen": 0.4642333984375, "log_odds_ratio": -0.4990030825138092, "logits/chosen": 0.04158155620098114, "logits/rejected": -1.2979916334152222, "logps/chosen": -1.5249441862106323, "logps/rejected": -1.9054869413375854, "loss": 1.6543, "nll_loss": 1.6043576002120972, "rewards/accuracies": 1.0, "rewards/chosen": -0.152494415640831, "rewards/margins": 0.038054272532463074, "rewards/rejected": -0.19054868817329407, "step": 328 }, { "epoch": 0.5185185185185185, "grad_norm": 0.2157369703054428, "learning_rate": 3.7024737876175404e-06, "log_odds_chosen": 0.4267793893814087, "log_odds_ratio": -0.5041587352752686, "logits/chosen": -0.02120812237262726, "logits/rejected": -1.1328731775283813, "logps/chosen": -1.5772178173065186, "logps/rejected": -1.9269858598709106, "loss": 1.683, "nll_loss": 1.6326076984405518, "rewards/accuracies": 1.0, "rewards/chosen": -0.15772177278995514, "rewards/margins": 0.03497680649161339, "rewards/rejected": -0.19269859790802002, "step": 329 }, { "epoch": 0.5200945626477541, "grad_norm": 0.22613677382469177, "learning_rate": 3.699582583504874e-06, "log_odds_chosen": 0.2962914705276489, "log_odds_ratio": -0.5632017254829407, "logits/chosen": 0.033639900386333466, "logits/rejected": -1.3186163902282715, "logps/chosen": -1.7170500755310059, "logps/rejected": -1.966399908065796, "loss": 1.815, "nll_loss": 1.7586567401885986, "rewards/accuracies": 0.875, "rewards/chosen": -0.17170500755310059, "rewards/margins": 0.024934988468885422, "rewards/rejected": -0.1966399997472763, "step": 330 }, { "epoch": 0.5216706067769897, "grad_norm": 0.20476558804512024, "learning_rate": 3.696678539857571e-06, "log_odds_chosen": 0.40065276622772217, "log_odds_ratio": -0.5229502320289612, "logits/chosen": -0.00485864095389843, "logits/rejected": -1.3043968677520752, "logps/chosen": -1.6962597370147705, "logps/rejected": -2.0372161865234375, "loss": 1.8021, "nll_loss": 1.7498358488082886, "rewards/accuracies": 0.875, "rewards/chosen": -0.16962596774101257, "rewards/margins": 0.034095652401447296, "rewards/rejected": -0.20372162759304047, "step": 331 }, { "epoch": 0.5232466509062254, "grad_norm": 0.20712663233280182, "learning_rate": 3.6937616786142956e-06, "log_odds_chosen": 0.5233447551727295, "log_odds_ratio": -0.47018271684646606, "logits/chosen": 0.010211546905338764, "logits/rejected": -1.0574318170547485, "logps/chosen": -1.5134122371673584, "logps/rejected": -1.9436025619506836, "loss": 1.6422, "nll_loss": 1.595203161239624, "rewards/accuracies": 1.0, "rewards/chosen": -0.15134122967720032, "rewards/margins": 0.04301903396844864, "rewards/rejected": -0.19436024129390717, "step": 332 }, { "epoch": 0.524822695035461, "grad_norm": 0.18907472491264343, "learning_rate": 3.6908320218105393e-06, "log_odds_chosen": 0.3291381895542145, "log_odds_ratio": -0.5515700578689575, "logits/chosen": -0.01022535003721714, "logits/rejected": -1.2733728885650635, "logps/chosen": -1.5107743740081787, "logps/rejected": -1.7811356782913208, "loss": 1.6236, "nll_loss": 1.5684043169021606, "rewards/accuracies": 0.875, "rewards/chosen": -0.15107741951942444, "rewards/margins": 0.027036139741539955, "rewards/rejected": -0.17811356484889984, "step": 333 }, { "epoch": 0.5263987391646966, "grad_norm": 0.19910985231399536, "learning_rate": 3.6878895915784607e-06, "log_odds_chosen": 0.48929572105407715, "log_odds_ratio": -0.483676016330719, "logits/chosen": 0.03388974070549011, "logits/rejected": -1.230672836303711, "logps/chosen": -1.4776809215545654, "logps/rejected": -1.8740699291229248, "loss": 1.5862, "nll_loss": 1.5378473997116089, "rewards/accuracies": 1.0, "rewards/chosen": -0.14776809513568878, "rewards/margins": 0.03963891416788101, "rewards/rejected": -0.187406986951828, "step": 334 }, { "epoch": 0.5279747832939322, "grad_norm": 0.19112317264080048, "learning_rate": 3.6849344101467147e-06, "log_odds_chosen": 0.3993302285671234, "log_odds_ratio": -0.5199868679046631, "logits/chosen": 0.02075035311281681, "logits/rejected": -1.4165470600128174, "logps/chosen": -1.5604709386825562, "logps/rejected": -1.890925407409668, "loss": 1.6795, "nll_loss": 1.6274938583374023, "rewards/accuracies": 0.875, "rewards/chosen": -0.15604707598686218, "rewards/margins": 0.03304546698927879, "rewards/rejected": -0.18909254670143127, "step": 335 }, { "epoch": 0.5295508274231678, "grad_norm": 0.2065410017967224, "learning_rate": 3.6819664998402857e-06, "log_odds_chosen": 0.3870427906513214, "log_odds_ratio": -0.521834671497345, "logits/chosen": 0.007402241230010986, "logits/rejected": -1.2406339645385742, "logps/chosen": -1.597013235092163, "logps/rejected": -1.9189107418060303, "loss": 1.7091, "nll_loss": 1.65691339969635, "rewards/accuracies": 1.0, "rewards/chosen": -0.15970134735107422, "rewards/margins": 0.032189756631851196, "rewards/rejected": -0.19189107418060303, "step": 336 }, { "epoch": 0.5311268715524035, "grad_norm": 0.22015894949436188, "learning_rate": 3.6789858830803186e-06, "log_odds_chosen": 0.4236854314804077, "log_odds_ratio": -0.5088356733322144, "logits/chosen": 0.009572651237249374, "logits/rejected": -1.2554011344909668, "logps/chosen": -1.649095058441162, "logps/rejected": -2.0025815963745117, "loss": 1.7505, "nll_loss": 1.6996192932128906, "rewards/accuracies": 1.0, "rewards/chosen": -0.16490954160690308, "rewards/margins": 0.035348646342754364, "rewards/rejected": -0.20025816559791565, "step": 337 }, { "epoch": 0.5327029156816391, "grad_norm": 0.20396317541599274, "learning_rate": 3.6759925823839486e-06, "log_odds_chosen": 0.3307921886444092, "log_odds_ratio": -0.5461194515228271, "logits/chosen": -0.017404936254024506, "logits/rejected": -1.1315640211105347, "logps/chosen": -1.5301204919815063, "logps/rejected": -1.7987494468688965, "loss": 1.6634, "nll_loss": 1.6087586879730225, "rewards/accuracies": 1.0, "rewards/chosen": -0.15301203727722168, "rewards/margins": 0.02686290442943573, "rewards/rejected": -0.1798749417066574, "step": 338 }, { "epoch": 0.5342789598108747, "grad_norm": 0.19404453039169312, "learning_rate": 3.672986620364134e-06, "log_odds_chosen": 0.4503750205039978, "log_odds_ratio": -0.49645406007766724, "logits/chosen": 0.02146240696310997, "logits/rejected": -1.14137601852417, "logps/chosen": -1.6056911945343018, "logps/rejected": -1.9799296855926514, "loss": 1.7114, "nll_loss": 1.6618030071258545, "rewards/accuracies": 1.0, "rewards/chosen": -0.16056913137435913, "rewards/margins": 0.03742384910583496, "rewards/rejected": -0.1979929804801941, "step": 339 }, { "epoch": 0.5358550039401103, "grad_norm": 0.19405515491962433, "learning_rate": 3.669968019729481e-06, "log_odds_chosen": 0.5453786849975586, "log_odds_ratio": -0.46696317195892334, "logits/chosen": -0.03269782289862633, "logits/rejected": -1.4044153690338135, "logps/chosen": -1.4841482639312744, "logps/rejected": -1.9323790073394775, "loss": 1.5994, "nll_loss": 1.5526580810546875, "rewards/accuracies": 1.0, "rewards/chosen": -0.14841482043266296, "rewards/margins": 0.04482308030128479, "rewards/rejected": -0.19323790073394775, "step": 340 }, { "epoch": 0.5374310480693459, "grad_norm": 0.20472969114780426, "learning_rate": 3.666936803284076e-06, "log_odds_chosen": 0.47615405917167664, "log_odds_ratio": -0.4855659306049347, "logits/chosen": 0.01596236228942871, "logits/rejected": -1.2273820638656616, "logps/chosen": -1.6264029741287231, "logps/rejected": -2.0267691612243652, "loss": 1.7196, "nll_loss": 1.6710734367370605, "rewards/accuracies": 1.0, "rewards/chosen": -0.1626402884721756, "rewards/margins": 0.0400366336107254, "rewards/rejected": -0.202676922082901, "step": 341 }, { "epoch": 0.5390070921985816, "grad_norm": 0.21434363722801208, "learning_rate": 3.663892993927312e-06, "log_odds_chosen": 0.5617402791976929, "log_odds_ratio": -0.4550952911376953, "logits/chosen": -0.059917159378528595, "logits/rejected": -1.359694004058838, "logps/chosen": -1.575050950050354, "logps/rejected": -2.043088674545288, "loss": 1.6888, "nll_loss": 1.643282175064087, "rewards/accuracies": 1.0, "rewards/chosen": -0.1575051099061966, "rewards/margins": 0.046803757548332214, "rewards/rejected": -0.20430885255336761, "step": 342 }, { "epoch": 0.5405831363278172, "grad_norm": 0.20112687349319458, "learning_rate": 3.6608366146537136e-06, "log_odds_chosen": 0.6060886383056641, "log_odds_ratio": -0.45338305830955505, "logits/chosen": -0.08892233669757843, "logits/rejected": -1.231791377067566, "logps/chosen": -1.5024844408035278, "logps/rejected": -2.009472608566284, "loss": 1.6204, "nll_loss": 1.575110912322998, "rewards/accuracies": 1.0, "rewards/chosen": -0.1502484530210495, "rewards/margins": 0.05069882422685623, "rewards/rejected": -0.20094728469848633, "step": 343 }, { "epoch": 0.5421591804570528, "grad_norm": 0.20276731252670288, "learning_rate": 3.6577676885527674e-06, "log_odds_chosen": 0.47172704339027405, "log_odds_ratio": -0.4901059567928314, "logits/chosen": -0.064692422747612, "logits/rejected": -1.493216633796692, "logps/chosen": -1.5645720958709717, "logps/rejected": -1.954676866531372, "loss": 1.664, "nll_loss": 1.6149814128875732, "rewards/accuracies": 1.0, "rewards/chosen": -0.15645720064640045, "rewards/margins": 0.03901049122214317, "rewards/rejected": -0.19546771049499512, "step": 344 }, { "epoch": 0.5437352245862884, "grad_norm": 0.19876694679260254, "learning_rate": 3.654686238808744e-06, "log_odds_chosen": 0.4601633548736572, "log_odds_ratio": -0.49714383482933044, "logits/chosen": -0.09634008258581161, "logits/rejected": -1.3992743492126465, "logps/chosen": -1.657270908355713, "logps/rejected": -2.0440926551818848, "loss": 1.7581, "nll_loss": 1.7084193229675293, "rewards/accuracies": 1.0, "rewards/chosen": -0.16572707891464233, "rewards/margins": 0.03868217021226883, "rewards/rejected": -0.20440925657749176, "step": 345 }, { "epoch": 0.545311268715524, "grad_norm": 0.18044152855873108, "learning_rate": 3.6515922887005245e-06, "log_odds_chosen": 0.6581941843032837, "log_odds_ratio": -0.42069223523139954, "logits/chosen": -0.19447211921215057, "logits/rejected": -1.4484854936599731, "logps/chosen": -1.4948885440826416, "logps/rejected": -2.034649133682251, "loss": 1.6052, "nll_loss": 1.563119888305664, "rewards/accuracies": 1.0, "rewards/chosen": -0.14948883652687073, "rewards/margins": 0.053976062685251236, "rewards/rejected": -0.20346491038799286, "step": 346 }, { "epoch": 0.5468873128447597, "grad_norm": 0.1821555495262146, "learning_rate": 3.6484858616014236e-06, "log_odds_chosen": 0.29319724440574646, "log_odds_ratio": -0.5593742728233337, "logits/chosen": -0.02489875629544258, "logits/rejected": -1.4301215410232544, "logps/chosen": -1.6183066368103027, "logps/rejected": -1.8595659732818604, "loss": 1.7166, "nll_loss": 1.6606316566467285, "rewards/accuracies": 1.0, "rewards/chosen": -0.16183066368103027, "rewards/margins": 0.024125942960381508, "rewards/rejected": -0.18595659732818604, "step": 347 }, { "epoch": 0.5484633569739953, "grad_norm": 0.20152583718299866, "learning_rate": 3.6453669809790154e-06, "log_odds_chosen": 0.34444230794906616, "log_odds_ratio": -0.5457963943481445, "logits/chosen": 0.003659643232822418, "logits/rejected": -1.1108278036117554, "logps/chosen": -1.548649549484253, "logps/rejected": -1.8318045139312744, "loss": 1.6556, "nll_loss": 1.6010490655899048, "rewards/accuracies": 1.0, "rewards/chosen": -0.15486496686935425, "rewards/margins": 0.02831549569964409, "rewards/rejected": -0.18318045139312744, "step": 348 }, { "epoch": 0.5500394011032309, "grad_norm": 0.19325992465019226, "learning_rate": 3.642235670394952e-06, "log_odds_chosen": 0.43656274676322937, "log_odds_ratio": -0.5032880902290344, "logits/chosen": -0.02493971772491932, "logits/rejected": -1.0908689498901367, "logps/chosen": -1.5585966110229492, "logps/rejected": -1.9134851694107056, "loss": 1.6744, "nll_loss": 1.6240770816802979, "rewards/accuracies": 1.0, "rewards/chosen": -0.15585967898368835, "rewards/margins": 0.035488829016685486, "rewards/rejected": -0.19134849309921265, "step": 349 }, { "epoch": 0.5516154452324665, "grad_norm": 0.19222001731395721, "learning_rate": 3.63909195350479e-06, "log_odds_chosen": 0.41014277935028076, "log_odds_ratio": -0.5183743238449097, "logits/chosen": 0.04076732322573662, "logits/rejected": -1.2510112524032593, "logps/chosen": -1.6457445621490479, "logps/rejected": -1.9860320091247559, "loss": 1.7474, "nll_loss": 1.6955746412277222, "rewards/accuracies": 0.875, "rewards/chosen": -0.16457447409629822, "rewards/margins": 0.03402874246239662, "rewards/rejected": -0.19860321283340454, "step": 350 }, { "epoch": 0.5531914893617021, "grad_norm": 0.19920876622200012, "learning_rate": 3.635935854057809e-06, "log_odds_chosen": 0.3992159962654114, "log_odds_ratio": -0.5221322774887085, "logits/chosen": -0.14892584085464478, "logits/rejected": -0.9987085461616516, "logps/chosen": -1.5974595546722412, "logps/rejected": -1.9293639659881592, "loss": 1.7044, "nll_loss": 1.6521823406219482, "rewards/accuracies": 0.875, "rewards/chosen": -0.1597459614276886, "rewards/margins": 0.03319043666124344, "rewards/rejected": -0.19293639063835144, "step": 351 }, { "epoch": 0.5547675334909378, "grad_norm": 0.19184619188308716, "learning_rate": 3.6327673958968327e-06, "log_odds_chosen": 0.5156201720237732, "log_odds_ratio": -0.4716494679450989, "logits/chosen": -0.10240821540355682, "logits/rejected": -1.2988749742507935, "logps/chosen": -1.4805288314819336, "logps/rejected": -1.8984272480010986, "loss": 1.6073, "nll_loss": 1.5601266622543335, "rewards/accuracies": 1.0, "rewards/chosen": -0.1480528861284256, "rewards/margins": 0.04178984463214874, "rewards/rejected": -0.18984271585941315, "step": 352 }, { "epoch": 0.5563435776201734, "grad_norm": 0.17695419490337372, "learning_rate": 3.6295866029580483e-06, "log_odds_chosen": 0.49165239930152893, "log_odds_ratio": -0.4805663824081421, "logits/chosen": -0.09917198866605759, "logits/rejected": -1.3583862781524658, "logps/chosen": -1.5266773700714111, "logps/rejected": -1.9281859397888184, "loss": 1.623, "nll_loss": 1.5749820470809937, "rewards/accuracies": 1.0, "rewards/chosen": -0.15266773104667664, "rewards/margins": 0.04015086218714714, "rewards/rejected": -0.19281861186027527, "step": 353 }, { "epoch": 0.557919621749409, "grad_norm": 0.18373258411884308, "learning_rate": 3.626393499270829e-06, "log_odds_chosen": 0.45253658294677734, "log_odds_ratio": -0.5001296997070312, "logits/chosen": -0.09345138818025589, "logits/rejected": -1.326229453086853, "logps/chosen": -1.4635684490203857, "logps/rejected": -1.8293596506118774, "loss": 1.5919, "nll_loss": 1.541857361793518, "rewards/accuracies": 1.0, "rewards/chosen": -0.14635683596134186, "rewards/margins": 0.03657911717891693, "rewards/rejected": -0.18293596804141998, "step": 354 }, { "epoch": 0.5594956658786446, "grad_norm": 0.19679243862628937, "learning_rate": 3.6231881089575466e-06, "log_odds_chosen": 0.6074280142784119, "log_odds_ratio": -0.43782979249954224, "logits/chosen": -0.16901959478855133, "logits/rejected": -1.4631741046905518, "logps/chosen": -1.5110644102096558, "logps/rejected": -2.0135817527770996, "loss": 1.5938, "nll_loss": 1.550012230873108, "rewards/accuracies": 1.0, "rewards/chosen": -0.15110644698143005, "rewards/margins": 0.05025171861052513, "rewards/rejected": -0.2013581544160843, "step": 355 }, { "epoch": 0.5610717100078803, "grad_norm": 0.2090955525636673, "learning_rate": 3.6199704562333945e-06, "log_odds_chosen": 0.4590110778808594, "log_odds_ratio": -0.4973413050174713, "logits/chosen": -0.13931547105312347, "logits/rejected": -1.3538612127304077, "logps/chosen": -1.4821869134902954, "logps/rejected": -1.8512073755264282, "loss": 1.5913, "nll_loss": 1.5415163040161133, "rewards/accuracies": 1.0, "rewards/chosen": -0.14821870625019073, "rewards/margins": 0.03690203279256821, "rewards/rejected": -0.18512074649333954, "step": 356 }, { "epoch": 0.5626477541371159, "grad_norm": 0.20289309322834015, "learning_rate": 3.6167405654062024e-06, "log_odds_chosen": 0.4794601798057556, "log_odds_ratio": -0.49567416310310364, "logits/chosen": -0.07716728746891022, "logits/rejected": -1.2874313592910767, "logps/chosen": -1.541725516319275, "logps/rejected": -1.9342597723007202, "loss": 1.6395, "nll_loss": 1.589914321899414, "rewards/accuracies": 1.0, "rewards/chosen": -0.15417256951332092, "rewards/margins": 0.03925342112779617, "rewards/rejected": -0.1934259682893753, "step": 357 }, { "epoch": 0.5642237982663515, "grad_norm": 0.1778980940580368, "learning_rate": 3.6134984608762515e-06, "log_odds_chosen": 0.5081315636634827, "log_odds_ratio": -0.4739688038825989, "logits/chosen": -0.08499579131603241, "logits/rejected": -1.3757938146591187, "logps/chosen": -1.4640223979949951, "logps/rejected": -1.8742594718933105, "loss": 1.5833, "nll_loss": 1.5359253883361816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1464022397994995, "rewards/margins": 0.041023701429367065, "rewards/rejected": -0.18742592632770538, "step": 358 }, { "epoch": 0.5657998423955871, "grad_norm": 0.2062489539384842, "learning_rate": 3.6102441671360945e-06, "log_odds_chosen": 0.4876347780227661, "log_odds_ratio": -0.48857545852661133, "logits/chosen": -0.12350551038980484, "logits/rejected": -1.2674330472946167, "logps/chosen": -1.6090378761291504, "logps/rejected": -2.016418218612671, "loss": 1.7003, "nll_loss": 1.651473879814148, "rewards/accuracies": 1.0, "rewards/chosen": -0.16090378165245056, "rewards/margins": 0.04073803871870041, "rewards/rejected": -0.20164184272289276, "step": 359 }, { "epoch": 0.5673758865248227, "grad_norm": 0.19282633066177368, "learning_rate": 3.6069777087703654e-06, "log_odds_chosen": 0.5007855296134949, "log_odds_ratio": -0.48002350330352783, "logits/chosen": -0.10314866900444031, "logits/rejected": -1.2184945344924927, "logps/chosen": -1.533939242362976, "logps/rejected": -1.9453692436218262, "loss": 1.6501, "nll_loss": 1.602098822593689, "rewards/accuracies": 1.0, "rewards/chosen": -0.1533939242362976, "rewards/margins": 0.041142985224723816, "rewards/rejected": -0.19453692436218262, "step": 360 }, { "epoch": 0.5689519306540584, "grad_norm": 0.1999576985836029, "learning_rate": 3.6036991104555973e-06, "log_odds_chosen": 0.4542830288410187, "log_odds_ratio": -0.5044661164283752, "logits/chosen": -0.08808690309524536, "logits/rejected": -1.3358922004699707, "logps/chosen": -1.6718225479125977, "logps/rejected": -2.0570645332336426, "loss": 1.7592, "nll_loss": 1.7087852954864502, "rewards/accuracies": 0.875, "rewards/chosen": -0.16718226671218872, "rewards/margins": 0.03852420300245285, "rewards/rejected": -0.20570647716522217, "step": 361 }, { "epoch": 0.570527974783294, "grad_norm": 0.19183827936649323, "learning_rate": 3.600408396960034e-06, "log_odds_chosen": 0.5055266618728638, "log_odds_ratio": -0.4799540042877197, "logits/chosen": -0.06735289096832275, "logits/rejected": -0.9921278953552246, "logps/chosen": -1.4769415855407715, "logps/rejected": -1.8809959888458252, "loss": 1.5713, "nll_loss": 1.523301601409912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476941704750061, "rewards/margins": 0.04040544107556343, "rewards/rejected": -0.18809959292411804, "step": 362 }, { "epoch": 0.5721040189125296, "grad_norm": 0.19298037886619568, "learning_rate": 3.5971055931434447e-06, "log_odds_chosen": 0.4434927701950073, "log_odds_ratio": -0.5033714771270752, "logits/chosen": -0.04916198179125786, "logits/rejected": -1.233707308769226, "logps/chosen": -1.5274627208709717, "logps/rejected": -1.8934059143066406, "loss": 1.6346, "nll_loss": 1.5842169523239136, "rewards/accuracies": 0.875, "rewards/chosen": -0.1527462899684906, "rewards/margins": 0.03659432381391525, "rewards/rejected": -0.18934059143066406, "step": 363 }, { "epoch": 0.5736800630417652, "grad_norm": 0.1897989809513092, "learning_rate": 3.5937907239569343e-06, "log_odds_chosen": 0.47198542952537537, "log_odds_ratio": -0.4953606128692627, "logits/chosen": -0.1299748420715332, "logits/rejected": -1.3614236116409302, "logps/chosen": -1.579548954963684, "logps/rejected": -1.9681391716003418, "loss": 1.6868, "nll_loss": 1.6372454166412354, "rewards/accuracies": 0.875, "rewards/chosen": -0.15795490145683289, "rewards/margins": 0.038859013468027115, "rewards/rejected": -0.1968139261007309, "step": 364 }, { "epoch": 0.5752561071710008, "grad_norm": 0.22458185255527496, "learning_rate": 3.5904638144427572e-06, "log_odds_chosen": 0.2747001647949219, "log_odds_ratio": -0.5726035833358765, "logits/chosen": -0.09000087529420853, "logits/rejected": -1.1089969873428345, "logps/chosen": -1.6369290351867676, "logps/rejected": -1.8634740114212036, "loss": 1.7333, "nll_loss": 1.6760823726654053, "rewards/accuracies": 0.75, "rewards/chosen": -0.1636928915977478, "rewards/margins": 0.02265450730919838, "rewards/rejected": -0.18634741008281708, "step": 365 }, { "epoch": 0.5768321513002365, "grad_norm": 0.2086506485939026, "learning_rate": 3.5871248897341246e-06, "log_odds_chosen": 0.5135898590087891, "log_odds_ratio": -0.4752338230609894, "logits/chosen": -0.12942443788051605, "logits/rejected": -1.0795626640319824, "logps/chosen": -1.4760322570800781, "logps/rejected": -1.8992903232574463, "loss": 1.598, "nll_loss": 1.5504556894302368, "rewards/accuracies": 1.0, "rewards/chosen": -0.14760322868824005, "rewards/margins": 0.042325813323259354, "rewards/rejected": -0.1899290531873703, "step": 366 }, { "epoch": 0.578408195429472, "grad_norm": 0.18183429539203644, "learning_rate": 3.5837739750550182e-06, "log_odds_chosen": 0.4922761619091034, "log_odds_ratio": -0.4857975244522095, "logits/chosen": -0.15072286128997803, "logits/rejected": -1.6683162450790405, "logps/chosen": -1.5550795793533325, "logps/rejected": -1.9653193950653076, "loss": 1.6554, "nll_loss": 1.6068187952041626, "rewards/accuracies": 1.0, "rewards/chosen": -0.15550795197486877, "rewards/margins": 0.04102398827672005, "rewards/rejected": -0.19653193652629852, "step": 367 }, { "epoch": 0.5799842395587076, "grad_norm": 0.1786677986383438, "learning_rate": 3.5804110957199977e-06, "log_odds_chosen": 0.5304347276687622, "log_odds_ratio": -0.4684828817844391, "logits/chosen": -0.031741030514240265, "logits/rejected": -1.2201794385910034, "logps/chosen": -1.533761739730835, "logps/rejected": -1.97342848777771, "loss": 1.6311, "nll_loss": 1.584226369857788, "rewards/accuracies": 1.0, "rewards/chosen": -0.15337617695331573, "rewards/margins": 0.04396669566631317, "rewards/rejected": -0.1973428726196289, "step": 368 }, { "epoch": 0.5815602836879432, "grad_norm": 0.19358626008033752, "learning_rate": 3.577036277134011e-06, "log_odds_chosen": 0.6033509373664856, "log_odds_ratio": -0.44030019640922546, "logits/chosen": -0.12189745157957077, "logits/rejected": -1.4489309787750244, "logps/chosen": -1.5189951658248901, "logps/rejected": -2.0174660682678223, "loss": 1.6305, "nll_loss": 1.5864982604980469, "rewards/accuracies": 1.0, "rewards/chosen": -0.1518995314836502, "rewards/margins": 0.04984709620475769, "rewards/rejected": -0.2017466127872467, "step": 369 }, { "epoch": 0.5831363278171788, "grad_norm": 0.20722126960754395, "learning_rate": 3.5736495447922e-06, "log_odds_chosen": 0.38122087717056274, "log_odds_ratio": -0.5253958106040955, "logits/chosen": -0.11854588240385056, "logits/rejected": -1.445725440979004, "logps/chosen": -1.643795371055603, "logps/rejected": -1.960480809211731, "loss": 1.7461, "nll_loss": 1.693605899810791, "rewards/accuracies": 1.0, "rewards/chosen": -0.1643795371055603, "rewards/margins": 0.031668562442064285, "rewards/rejected": -0.19604811072349548, "step": 370 }, { "epoch": 0.5847123719464145, "grad_norm": 0.1782001107931137, "learning_rate": 3.5702509242797096e-06, "log_odds_chosen": 0.7012959718704224, "log_odds_ratio": -0.41842737793922424, "logits/chosen": -0.13191047310829163, "logits/rejected": -1.4372180700302124, "logps/chosen": -1.4734841585159302, "logps/rejected": -2.0532171726226807, "loss": 1.5859, "nll_loss": 1.5440880060195923, "rewards/accuracies": 1.0, "rewards/chosen": -0.14734841883182526, "rewards/margins": 0.05797329545021057, "rewards/rejected": -0.20532171428203583, "step": 371 }, { "epoch": 0.5862884160756501, "grad_norm": 0.189020574092865, "learning_rate": 3.566840441271495e-06, "log_odds_chosen": 0.663593053817749, "log_odds_ratio": -0.43108314275741577, "logits/chosen": -0.09653455764055252, "logits/rejected": -1.4999427795410156, "logps/chosen": -1.4899102449417114, "logps/rejected": -2.0343017578125, "loss": 1.578, "nll_loss": 1.5348646640777588, "rewards/accuracies": 1.0, "rewards/chosen": -0.14899101853370667, "rewards/margins": 0.05443914607167244, "rewards/rejected": -0.2034301608800888, "step": 372 }, { "epoch": 0.5878644602048857, "grad_norm": 0.1943143755197525, "learning_rate": 3.5634181215321265e-06, "log_odds_chosen": 0.6287661790847778, "log_odds_ratio": -0.43060097098350525, "logits/chosen": -0.07774099707603455, "logits/rejected": -1.3174422979354858, "logps/chosen": -1.5436866283416748, "logps/rejected": -2.067762613296509, "loss": 1.6544, "nll_loss": 1.6113276481628418, "rewards/accuracies": 1.0, "rewards/chosen": -0.15436868369579315, "rewards/margins": 0.052407585084438324, "rewards/rejected": -0.20677624642848969, "step": 373 }, { "epoch": 0.5894405043341213, "grad_norm": 0.1871887594461441, "learning_rate": 3.5599839909155947e-06, "log_odds_chosen": 0.49737420678138733, "log_odds_ratio": -0.4843059182167053, "logits/chosen": -0.10240314900875092, "logits/rejected": -1.391071081161499, "logps/chosen": -1.5068795680999756, "logps/rejected": -1.9151724576950073, "loss": 1.6079, "nll_loss": 1.5594788789749146, "rewards/accuracies": 0.875, "rewards/chosen": -0.15068796277046204, "rewards/margins": 0.04082927852869034, "rewards/rejected": -0.19151723384857178, "step": 374 }, { "epoch": 0.5910165484633569, "grad_norm": 0.1926075965166092, "learning_rate": 3.556538075365116e-06, "log_odds_chosen": 0.5647552013397217, "log_odds_ratio": -0.4604189991950989, "logits/chosen": -0.05626612901687622, "logits/rejected": -1.3970637321472168, "logps/chosen": -1.5002104043960571, "logps/rejected": -1.962066411972046, "loss": 1.6008, "nll_loss": 1.5547971725463867, "rewards/accuracies": 1.0, "rewards/chosen": -0.1500210165977478, "rewards/margins": 0.046185605227947235, "rewards/rejected": -0.19620664417743683, "step": 375 }, { "epoch": 0.5925925925925926, "grad_norm": 0.20022441446781158, "learning_rate": 3.5530804009129367e-06, "log_odds_chosen": 0.5052685737609863, "log_odds_ratio": -0.4745987355709076, "logits/chosen": -0.11242527514696121, "logits/rejected": -1.191476583480835, "logps/chosen": -1.5387141704559326, "logps/rejected": -1.9558299779891968, "loss": 1.6266, "nll_loss": 1.5791561603546143, "rewards/accuracies": 1.0, "rewards/chosen": -0.15387141704559326, "rewards/margins": 0.041711580008268356, "rewards/rejected": -0.19558300077915192, "step": 376 }, { "epoch": 0.5941686367218282, "grad_norm": 0.1937384456396103, "learning_rate": 3.5496109936801368e-06, "log_odds_chosen": 0.49315646290779114, "log_odds_ratio": -0.48415568470954895, "logits/chosen": -0.17054055631160736, "logits/rejected": -1.4667962789535522, "logps/chosen": -1.5606714487075806, "logps/rejected": -1.9743766784667969, "loss": 1.6571, "nll_loss": 1.6086863279342651, "rewards/accuracies": 1.0, "rewards/chosen": -0.1560671329498291, "rewards/margins": 0.04137052595615387, "rewards/rejected": -0.19743765890598297, "step": 377 }, { "epoch": 0.5957446808510638, "grad_norm": 0.20795175433158875, "learning_rate": 3.546129879876429e-06, "log_odds_chosen": 0.3628261685371399, "log_odds_ratio": -0.5408048629760742, "logits/chosen": -0.05689922347664833, "logits/rejected": -1.130873203277588, "logps/chosen": -1.6209430694580078, "logps/rejected": -1.9227240085601807, "loss": 1.7149, "nll_loss": 1.6608681678771973, "rewards/accuracies": 0.875, "rewards/chosen": -0.1620943248271942, "rewards/margins": 0.03017808124423027, "rewards/rejected": -0.19227240979671478, "step": 378 }, { "epoch": 0.5973207249802994, "grad_norm": 0.19484388828277588, "learning_rate": 3.5426370857999662e-06, "log_odds_chosen": 0.3801731467247009, "log_odds_ratio": -0.5251167416572571, "logits/chosen": -0.10485132038593292, "logits/rejected": -1.2549301385879517, "logps/chosen": -1.6024380922317505, "logps/rejected": -1.9180117845535278, "loss": 1.7071, "nll_loss": 1.6545681953430176, "rewards/accuracies": 1.0, "rewards/chosen": -0.16024382412433624, "rewards/margins": 0.03155737742781639, "rewards/rejected": -0.19180117547512054, "step": 379 }, { "epoch": 0.598896769109535, "grad_norm": 0.19403113424777985, "learning_rate": 3.53913263783714e-06, "log_odds_chosen": 0.4171554744243622, "log_odds_ratio": -0.5190439820289612, "logits/chosen": -0.12759403884410858, "logits/rejected": -1.392691731452942, "logps/chosen": -1.555301308631897, "logps/rejected": -1.904205083847046, "loss": 1.6481, "nll_loss": 1.5961991548538208, "rewards/accuracies": 0.875, "rewards/chosen": -0.15553012490272522, "rewards/margins": 0.03489039093255997, "rewards/rejected": -0.1904205083847046, "step": 380 }, { "epoch": 0.6004728132387707, "grad_norm": 0.1852397322654724, "learning_rate": 3.53561656246238e-06, "log_odds_chosen": 0.6395785808563232, "log_odds_ratio": -0.42720136046409607, "logits/chosen": -0.2312593162059784, "logits/rejected": -1.6027368307113647, "logps/chosen": -1.5752849578857422, "logps/rejected": -2.1103007793426514, "loss": 1.6683, "nll_loss": 1.6255991458892822, "rewards/accuracies": 1.0, "rewards/chosen": -0.15752847492694855, "rewards/margins": 0.05350159481167793, "rewards/rejected": -0.21103009581565857, "step": 381 }, { "epoch": 0.6020488573680063, "grad_norm": 0.18580475449562073, "learning_rate": 3.532088886237956e-06, "log_odds_chosen": 0.539408802986145, "log_odds_ratio": -0.4628356993198395, "logits/chosen": -0.14560621976852417, "logits/rejected": -1.546494483947754, "logps/chosen": -1.6014196872711182, "logps/rejected": -2.0526225566864014, "loss": 1.6812, "nll_loss": 1.634964942932129, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601419895887375, "rewards/margins": 0.045120254158973694, "rewards/rejected": -0.20526225864887238, "step": 382 }, { "epoch": 0.6036249014972419, "grad_norm": 0.17321372032165527, "learning_rate": 3.528549635813778e-06, "log_odds_chosen": 0.5498687028884888, "log_odds_ratio": -0.46246442198753357, "logits/chosen": -0.18676355481147766, "logits/rejected": -1.3866394758224487, "logps/chosen": -1.4379013776779175, "logps/rejected": -1.884263277053833, "loss": 1.5412, "nll_loss": 1.494981288909912, "rewards/accuracies": 1.0, "rewards/chosen": -0.143790140748024, "rewards/margins": 0.04463617503643036, "rewards/rejected": -0.18842631578445435, "step": 383 }, { "epoch": 0.6052009456264775, "grad_norm": 0.20354455709457397, "learning_rate": 3.524998837927192e-06, "log_odds_chosen": 0.587373673915863, "log_odds_ratio": -0.4441196024417877, "logits/chosen": -0.14221185445785522, "logits/rejected": -1.3197717666625977, "logps/chosen": -1.5544791221618652, "logps/rejected": -2.0435311794281006, "loss": 1.6329, "nll_loss": 1.5884504318237305, "rewards/accuracies": 1.0, "rewards/chosen": -0.15544790029525757, "rewards/margins": 0.04890521243214607, "rewards/rejected": -0.20435310900211334, "step": 384 }, { "epoch": 0.6067769897557131, "grad_norm": 0.1994301825761795, "learning_rate": 3.5214365194027797e-06, "log_odds_chosen": 0.5964666604995728, "log_odds_ratio": -0.44221487641334534, "logits/chosen": -0.15480360388755798, "logits/rejected": -1.4440878629684448, "logps/chosen": -1.4780986309051514, "logps/rejected": -1.9680850505828857, "loss": 1.57, "nll_loss": 1.5257560014724731, "rewards/accuracies": 1.0, "rewards/chosen": -0.14780986309051514, "rewards/margins": 0.048998646438121796, "rewards/rejected": -0.19680851697921753, "step": 385 }, { "epoch": 0.6083530338849488, "grad_norm": 0.21158069372177124, "learning_rate": 3.517862707152157e-06, "log_odds_chosen": 0.45025360584259033, "log_odds_ratio": -0.5009865164756775, "logits/chosen": -0.06984852999448776, "logits/rejected": -1.1637235879898071, "logps/chosen": -1.6143461465835571, "logps/rejected": -1.9897408485412598, "loss": 1.695, "nll_loss": 1.6448723077774048, "rewards/accuracies": 1.0, "rewards/chosen": -0.1614346206188202, "rewards/margins": 0.03753947466611862, "rewards/rejected": -0.1989741027355194, "step": 386 }, { "epoch": 0.6099290780141844, "grad_norm": 0.21827860176563263, "learning_rate": 3.5142774281737674e-06, "log_odds_chosen": 0.6315152645111084, "log_odds_ratio": -0.4288613796234131, "logits/chosen": -0.15267856419086456, "logits/rejected": -1.3205287456512451, "logps/chosen": -1.6036657094955444, "logps/rejected": -2.1343271732330322, "loss": 1.6804, "nll_loss": 1.6375137567520142, "rewards/accuracies": 1.0, "rewards/chosen": -0.16036657989025116, "rewards/margins": 0.053066130727529526, "rewards/rejected": -0.21343271434307098, "step": 387 }, { "epoch": 0.61150512214342, "grad_norm": 0.17510953545570374, "learning_rate": 3.5106807095526817e-06, "log_odds_chosen": 0.6356069445610046, "log_odds_ratio": -0.4359210133552551, "logits/chosen": -0.1717139482498169, "logits/rejected": -1.565706729888916, "logps/chosen": -1.5656462907791138, "logps/rejected": -2.093924045562744, "loss": 1.6578, "nll_loss": 1.6142207384109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.1565646231174469, "rewards/margins": 0.05282779037952423, "rewards/rejected": -0.20939242839813232, "step": 388 }, { "epoch": 0.6130811662726556, "grad_norm": 0.18727731704711914, "learning_rate": 3.5070725784603905e-06, "log_odds_chosen": 0.537490963935852, "log_odds_ratio": -0.4669988751411438, "logits/chosen": -0.24123258888721466, "logits/rejected": -1.2403115034103394, "logps/chosen": -1.439449429512024, "logps/rejected": -1.8756340742111206, "loss": 1.5259, "nll_loss": 1.4791667461395264, "rewards/accuracies": 1.0, "rewards/chosen": -0.14394494891166687, "rewards/margins": 0.04361846297979355, "rewards/rejected": -0.18756340444087982, "step": 389 }, { "epoch": 0.6146572104018913, "grad_norm": 0.22596481442451477, "learning_rate": 3.503453062154602e-06, "log_odds_chosen": 0.4628780484199524, "log_odds_ratio": -0.49970224499702454, "logits/chosen": -0.1650674045085907, "logits/rejected": -1.2386726140975952, "logps/chosen": -1.602417230606079, "logps/rejected": -1.9950282573699951, "loss": 1.6994, "nll_loss": 1.6494615077972412, "rewards/accuracies": 0.875, "rewards/chosen": -0.1602417379617691, "rewards/margins": 0.0392610989511013, "rewards/rejected": -0.1995028257369995, "step": 390 }, { "epoch": 0.6162332545311269, "grad_norm": 0.19225400686264038, "learning_rate": 3.499822187979032e-06, "log_odds_chosen": 0.45917797088623047, "log_odds_ratio": -0.4991348385810852, "logits/chosen": -0.09038020670413971, "logits/rejected": -1.4409539699554443, "logps/chosen": -1.5594383478164673, "logps/rejected": -1.9403319358825684, "loss": 1.6617, "nll_loss": 1.6117753982543945, "rewards/accuracies": 0.875, "rewards/chosen": -0.1559438407421112, "rewards/margins": 0.03808935359120369, "rewards/rejected": -0.1940331906080246, "step": 391 }, { "epoch": 0.6178092986603625, "grad_norm": 0.1856825351715088, "learning_rate": 3.496179983363202e-06, "log_odds_chosen": 0.41265982389450073, "log_odds_ratio": -0.5119627118110657, "logits/chosen": -0.11190656572580338, "logits/rejected": -1.3574274778366089, "logps/chosen": -1.5668977499008179, "logps/rejected": -1.9084656238555908, "loss": 1.6514, "nll_loss": 1.6002510786056519, "rewards/accuracies": 1.0, "rewards/chosen": -0.15668979287147522, "rewards/margins": 0.034156784415245056, "rewards/rejected": -0.19084656238555908, "step": 392 }, { "epoch": 0.6193853427895981, "grad_norm": 0.20805980265140533, "learning_rate": 3.4925264758222268e-06, "log_odds_chosen": 0.6294342279434204, "log_odds_ratio": -0.43072307109832764, "logits/chosen": -0.16953016817569733, "logits/rejected": -1.2341816425323486, "logps/chosen": -1.5001755952835083, "logps/rejected": -2.0215494632720947, "loss": 1.5961, "nll_loss": 1.5530593395233154, "rewards/accuracies": 1.0, "rewards/chosen": -0.15001757442951202, "rewards/margins": 0.05213739350438118, "rewards/rejected": -0.2021549493074417, "step": 393 }, { "epoch": 0.6209613869188337, "grad_norm": 0.24518869817256927, "learning_rate": 3.488861692956611e-06, "log_odds_chosen": 0.5471794009208679, "log_odds_ratio": -0.4630282521247864, "logits/chosen": -0.19086423516273499, "logits/rejected": -1.339902639389038, "logps/chosen": -1.5402213335037231, "logps/rejected": -1.9952548742294312, "loss": 1.6296, "nll_loss": 1.5832523107528687, "rewards/accuracies": 1.0, "rewards/chosen": -0.15402214229106903, "rewards/margins": 0.04550333693623543, "rewards/rejected": -0.19952546060085297, "step": 394 }, { "epoch": 0.6225374310480694, "grad_norm": 0.2510071098804474, "learning_rate": 3.4851856624520394e-06, "log_odds_chosen": 0.5228825807571411, "log_odds_ratio": -0.46843814849853516, "logits/chosen": -0.13727201521396637, "logits/rejected": -1.3861788511276245, "logps/chosen": -1.572906494140625, "logps/rejected": -2.005622386932373, "loss": 1.6575, "nll_loss": 1.6106876134872437, "rewards/accuracies": 1.0, "rewards/chosen": -0.15729066729545593, "rewards/margins": 0.04327157884836197, "rewards/rejected": -0.2005622535943985, "step": 395 }, { "epoch": 0.624113475177305, "grad_norm": 0.20526158809661865, "learning_rate": 3.4814984120791664e-06, "log_odds_chosen": 0.5153719782829285, "log_odds_ratio": -0.4714086055755615, "logits/chosen": -0.13660681247711182, "logits/rejected": -1.2547574043273926, "logps/chosen": -1.5410287380218506, "logps/rejected": -1.964991807937622, "loss": 1.636, "nll_loss": 1.5889039039611816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541028618812561, "rewards/margins": 0.04239630699157715, "rewards/rejected": -0.19649919867515564, "step": 396 }, { "epoch": 0.6256895193065406, "grad_norm": 0.18158891797065735, "learning_rate": 3.477799969693407e-06, "log_odds_chosen": 0.47999995946884155, "log_odds_ratio": -0.490679532289505, "logits/chosen": -0.09590557217597961, "logits/rejected": -1.3949567079544067, "logps/chosen": -1.541025996208191, "logps/rejected": -1.9312851428985596, "loss": 1.6301, "nll_loss": 1.5809931755065918, "rewards/accuracies": 1.0, "rewards/chosen": -0.15410259366035461, "rewards/margins": 0.039025940001010895, "rewards/rejected": -0.1931285411119461, "step": 397 }, { "epoch": 0.6272655634357762, "grad_norm": 0.2004203349351883, "learning_rate": 3.474090363234728e-06, "log_odds_chosen": 0.766968309879303, "log_odds_ratio": -0.39646148681640625, "logits/chosen": -0.2562759518623352, "logits/rejected": -1.4337137937545776, "logps/chosen": -1.4752384424209595, "logps/rejected": -2.112752914428711, "loss": 1.5586, "nll_loss": 1.5189671516418457, "rewards/accuracies": 1.0, "rewards/chosen": -0.14752383530139923, "rewards/margins": 0.0637514516711235, "rewards/rejected": -0.21127529442310333, "step": 398 }, { "epoch": 0.6288416075650118, "grad_norm": 0.19742122292518616, "learning_rate": 3.4703696207274325e-06, "log_odds_chosen": 0.5179776549339294, "log_odds_ratio": -0.474680095911026, "logits/chosen": -0.14589636027812958, "logits/rejected": -1.2598799467086792, "logps/chosen": -1.556343913078308, "logps/rejected": -1.9830336570739746, "loss": 1.6301, "nll_loss": 1.5826400518417358, "rewards/accuracies": 1.0, "rewards/chosen": -0.15563438832759857, "rewards/margins": 0.042668960988521576, "rewards/rejected": -0.19830335676670074, "step": 399 }, { "epoch": 0.6304176516942475, "grad_norm": 0.18192359805107117, "learning_rate": 3.4666377702799545e-06, "log_odds_chosen": 0.5299201011657715, "log_odds_ratio": -0.4676084518432617, "logits/chosen": -0.24987564980983734, "logits/rejected": -1.5932269096374512, "logps/chosen": -1.46696138381958, "logps/rejected": -1.8967633247375488, "loss": 1.568, "nll_loss": 1.521193265914917, "rewards/accuracies": 1.0, "rewards/chosen": -0.14669615030288696, "rewards/margins": 0.042980194091796875, "rewards/rejected": -0.18967632949352264, "step": 400 }, { "epoch": 0.6319936958234831, "grad_norm": 0.19527654349803925, "learning_rate": 3.4628948400846417e-06, "log_odds_chosen": 0.6314361691474915, "log_odds_ratio": -0.4365447759628296, "logits/chosen": -0.1739773005247116, "logits/rejected": -1.4769560098648071, "logps/chosen": -1.5479360818862915, "logps/rejected": -2.0758628845214844, "loss": 1.637, "nll_loss": 1.5933518409729004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1547936052083969, "rewards/margins": 0.05279267579317093, "rewards/rejected": -0.20758628845214844, "step": 401 }, { "epoch": 0.6335697399527187, "grad_norm": 0.2035856693983078, "learning_rate": 3.4591408584175426e-06, "log_odds_chosen": 0.5278856158256531, "log_odds_ratio": -0.4733438193798065, "logits/chosen": -0.18014143407344818, "logits/rejected": -1.3621004819869995, "logps/chosen": -1.5849114656448364, "logps/rejected": -2.027892589569092, "loss": 1.6766, "nll_loss": 1.6292688846588135, "rewards/accuracies": 1.0, "rewards/chosen": -0.1584911346435547, "rewards/margins": 0.044298142194747925, "rewards/rejected": -0.2027892768383026, "step": 402 }, { "epoch": 0.6351457840819543, "grad_norm": 0.19495651125907898, "learning_rate": 3.4553758536381974e-06, "log_odds_chosen": 0.5365288853645325, "log_odds_ratio": -0.4719133973121643, "logits/chosen": -0.1622263491153717, "logits/rejected": -1.3355566263198853, "logps/chosen": -1.5399608612060547, "logps/rejected": -1.984842300415039, "loss": 1.6272, "nll_loss": 1.580039381980896, "rewards/accuracies": 1.0, "rewards/chosen": -0.153996080160141, "rewards/margins": 0.04448813945055008, "rewards/rejected": -0.19848422706127167, "step": 403 }, { "epoch": 0.6367218282111899, "grad_norm": 0.20605534315109253, "learning_rate": 3.451599854189418e-06, "log_odds_chosen": 0.5477701425552368, "log_odds_ratio": -0.46024447679519653, "logits/chosen": -0.13585253059864044, "logits/rejected": -1.0337761640548706, "logps/chosen": -1.54197359085083, "logps/rejected": -1.995017647743225, "loss": 1.6418, "nll_loss": 1.5958125591278076, "rewards/accuracies": 1.0, "rewards/chosen": -0.15419737994670868, "rewards/margins": 0.045304395258426666, "rewards/rejected": -0.19950176775455475, "step": 404 }, { "epoch": 0.6382978723404256, "grad_norm": 0.19387783110141754, "learning_rate": 3.4478128885970765e-06, "log_odds_chosen": 0.6080644130706787, "log_odds_ratio": -0.4360560178756714, "logits/chosen": -0.179177924990654, "logits/rejected": -1.3908740282058716, "logps/chosen": -1.604873776435852, "logps/rejected": -2.1161084175109863, "loss": 1.6909, "nll_loss": 1.6472656726837158, "rewards/accuracies": 1.0, "rewards/chosen": -0.16048739850521088, "rewards/margins": 0.051123470067977905, "rewards/rejected": -0.21161086857318878, "step": 405 }, { "epoch": 0.6398739164696612, "grad_norm": 0.190069779753685, "learning_rate": 3.44401498546989e-06, "log_odds_chosen": 0.42473104596138, "log_odds_ratio": -0.5118191242218018, "logits/chosen": -0.1188054233789444, "logits/rejected": -1.49931001663208, "logps/chosen": -1.5570849180221558, "logps/rejected": -1.9073197841644287, "loss": 1.6278, "nll_loss": 1.5765697956085205, "rewards/accuracies": 1.0, "rewards/chosen": -0.15570849180221558, "rewards/margins": 0.035023488104343414, "rewards/rejected": -0.1907319873571396, "step": 406 }, { "epoch": 0.6414499605988968, "grad_norm": 0.19107168912887573, "learning_rate": 3.4402061734992005e-06, "log_odds_chosen": 0.5350978374481201, "log_odds_ratio": -0.46566373109817505, "logits/chosen": -0.15249407291412354, "logits/rejected": -1.402602195739746, "logps/chosen": -1.5002124309539795, "logps/rejected": -1.9390612840652466, "loss": 1.5941, "nll_loss": 1.5475372076034546, "rewards/accuracies": 1.0, "rewards/chosen": -0.1500212401151657, "rewards/margins": 0.043884895741939545, "rewards/rejected": -0.19390614330768585, "step": 407 }, { "epoch": 0.6430260047281324, "grad_norm": 0.2040640264749527, "learning_rate": 3.4363864814587656e-06, "log_odds_chosen": 0.49293413758277893, "log_odds_ratio": -0.48022550344467163, "logits/chosen": -0.2637179493904114, "logits/rejected": -0.9641510248184204, "logps/chosen": -1.475205421447754, "logps/rejected": -1.8777835369110107, "loss": 1.5716, "nll_loss": 1.5235683917999268, "rewards/accuracies": 1.0, "rewards/chosen": -0.1475205421447754, "rewards/margins": 0.04025781527161598, "rewards/rejected": -0.18777838349342346, "step": 408 }, { "epoch": 0.644602048857368, "grad_norm": 0.20765496790409088, "learning_rate": 3.4325559382045343e-06, "log_odds_chosen": 0.4098273515701294, "log_odds_ratio": -0.5131589770317078, "logits/chosen": -0.23338492214679718, "logits/rejected": -1.4860385656356812, "logps/chosen": -1.4819140434265137, "logps/rejected": -1.8101638555526733, "loss": 1.5729, "nll_loss": 1.5215588808059692, "rewards/accuracies": 1.0, "rewards/chosen": -0.1481914222240448, "rewards/margins": 0.03282497450709343, "rewards/rejected": -0.18101638555526733, "step": 409 }, { "epoch": 0.6461780929866037, "grad_norm": 0.19961215555667877, "learning_rate": 3.4287145726744295e-06, "log_odds_chosen": 0.5392709374427795, "log_odds_ratio": -0.4628680646419525, "logits/chosen": -0.26481306552886963, "logits/rejected": -1.3813103437423706, "logps/chosen": -1.4664371013641357, "logps/rejected": -1.904196858406067, "loss": 1.5695, "nll_loss": 1.523188829421997, "rewards/accuracies": 1.0, "rewards/chosen": -0.146643728017807, "rewards/margins": 0.04377596825361252, "rewards/rejected": -0.19041968882083893, "step": 410 }, { "epoch": 0.6477541371158393, "grad_norm": 0.20400294661521912, "learning_rate": 3.4248624138881335e-06, "log_odds_chosen": 0.42437130212783813, "log_odds_ratio": -0.5254440307617188, "logits/chosen": -0.1493159532546997, "logits/rejected": -1.1513835191726685, "logps/chosen": -1.6029716730117798, "logps/rejected": -1.9682285785675049, "loss": 1.6887, "nll_loss": 1.6361618041992188, "rewards/accuracies": 0.875, "rewards/chosen": -0.16029717028141022, "rewards/margins": 0.03652569651603699, "rewards/rejected": -0.1968228816986084, "step": 411 }, { "epoch": 0.6493301812450749, "grad_norm": 0.20339736342430115, "learning_rate": 3.4209994909468672e-06, "log_odds_chosen": 0.6561870574951172, "log_odds_ratio": -0.42825421690940857, "logits/chosen": -0.28305602073669434, "logits/rejected": -1.04371976852417, "logps/chosen": -1.5072612762451172, "logps/rejected": -2.0518369674682617, "loss": 1.5933, "nll_loss": 1.5505071878433228, "rewards/accuracies": 1.0, "rewards/chosen": -0.15072615444660187, "rewards/margins": 0.054457567632198334, "rewards/rejected": -0.2051836997270584, "step": 412 }, { "epoch": 0.6509062253743105, "grad_norm": 0.19428101181983948, "learning_rate": 3.4171258330331667e-06, "log_odds_chosen": 0.43498852849006653, "log_odds_ratio": -0.5014755129814148, "logits/chosen": -0.12146922200918198, "logits/rejected": -1.1085965633392334, "logps/chosen": -1.593203067779541, "logps/rejected": -1.9516777992248535, "loss": 1.6774, "nll_loss": 1.627271056175232, "rewards/accuracies": 1.0, "rewards/chosen": -0.15932030975818634, "rewards/margins": 0.03584747388958931, "rewards/rejected": -0.19516779482364655, "step": 413 }, { "epoch": 0.6524822695035462, "grad_norm": 0.19590893387794495, "learning_rate": 3.4132414694106684e-06, "log_odds_chosen": 0.6712747812271118, "log_odds_ratio": -0.4207912087440491, "logits/chosen": -0.16028505563735962, "logits/rejected": -1.4717087745666504, "logps/chosen": -1.498020052909851, "logps/rejected": -2.0545201301574707, "loss": 1.5811, "nll_loss": 1.5389834642410278, "rewards/accuracies": 1.0, "rewards/chosen": -0.14980201423168182, "rewards/margins": 0.055649999529123306, "rewards/rejected": -0.20545199513435364, "step": 414 }, { "epoch": 0.6540583136327817, "grad_norm": 0.20174475014209747, "learning_rate": 3.409346429423884e-06, "log_odds_chosen": 0.4537242650985718, "log_odds_ratio": -0.4941532015800476, "logits/chosen": -0.010996952652931213, "logits/rejected": -1.3585155010223389, "logps/chosen": -1.5672545433044434, "logps/rejected": -1.9409823417663574, "loss": 1.6455, "nll_loss": 1.5961326360702515, "rewards/accuracies": 1.0, "rewards/chosen": -0.1567254513502121, "rewards/margins": 0.03737279772758484, "rewards/rejected": -0.19409826397895813, "step": 415 }, { "epoch": 0.6556343577620173, "grad_norm": 0.19159899652004242, "learning_rate": 3.40544074249798e-06, "log_odds_chosen": 0.6167906522750854, "log_odds_ratio": -0.4354286193847656, "logits/chosen": -0.18299099802970886, "logits/rejected": -1.46063232421875, "logps/chosen": -1.5565929412841797, "logps/rejected": -2.0718507766723633, "loss": 1.6407, "nll_loss": 1.5971307754516602, "rewards/accuracies": 1.0, "rewards/chosen": -0.15565930306911469, "rewards/margins": 0.0515257902443409, "rewards/rejected": -0.20718510448932648, "step": 416 }, { "epoch": 0.6572104018912529, "grad_norm": 0.20851117372512817, "learning_rate": 3.401524438138556e-06, "log_odds_chosen": 0.45928677916526794, "log_odds_ratio": -0.49514341354370117, "logits/chosen": -0.13402409851551056, "logits/rejected": -1.1145453453063965, "logps/chosen": -1.5616354942321777, "logps/rejected": -1.9419609308242798, "loss": 1.6266, "nll_loss": 1.5770922899246216, "rewards/accuracies": 1.0, "rewards/chosen": -0.1561635434627533, "rewards/margins": 0.03803255409002304, "rewards/rejected": -0.19419609010219574, "step": 417 }, { "epoch": 0.6587864460204885, "grad_norm": 0.19792215526103973, "learning_rate": 3.39759754593142e-06, "log_odds_chosen": 0.4961496889591217, "log_odds_ratio": -0.4824981689453125, "logits/chosen": -0.1806434839963913, "logits/rejected": -1.2893097400665283, "logps/chosen": -1.6215399503707886, "logps/rejected": -2.039522409439087, "loss": 1.6951, "nll_loss": 1.646848440170288, "rewards/accuracies": 1.0, "rewards/chosen": -0.16215398907661438, "rewards/margins": 0.04179824888706207, "rewards/rejected": -0.20395225286483765, "step": 418 }, { "epoch": 0.6603624901497241, "grad_norm": 0.2001093477010727, "learning_rate": 3.3936600955423683e-06, "log_odds_chosen": 0.7080036997795105, "log_odds_ratio": -0.4158882200717926, "logits/chosen": -0.10217966884374619, "logits/rejected": -1.1086851358413696, "logps/chosen": -1.5155251026153564, "logps/rejected": -2.1040117740631104, "loss": 1.603, "nll_loss": 1.5614415407180786, "rewards/accuracies": 1.0, "rewards/chosen": -0.15155251324176788, "rewards/margins": 0.05884869024157524, "rewards/rejected": -0.21040120720863342, "step": 419 }, { "epoch": 0.6619385342789598, "grad_norm": 0.1921870857477188, "learning_rate": 3.3897121167169573e-06, "log_odds_chosen": 0.39313969016075134, "log_odds_ratio": -0.5258656144142151, "logits/chosen": -0.22022226452827454, "logits/rejected": -1.3231024742126465, "logps/chosen": -1.4893752336502075, "logps/rejected": -1.808854341506958, "loss": 1.5882, "nll_loss": 1.535656452178955, "rewards/accuracies": 0.875, "rewards/chosen": -0.14893752336502075, "rewards/margins": 0.03194789960980415, "rewards/rejected": -0.1808854341506958, "step": 420 }, { "epoch": 0.6635145784081954, "grad_norm": 0.20021358132362366, "learning_rate": 3.38575363928028e-06, "log_odds_chosen": 0.6059412360191345, "log_odds_ratio": -0.4419128894805908, "logits/chosen": -0.14556629955768585, "logits/rejected": -1.1496225595474243, "logps/chosen": -1.4978344440460205, "logps/rejected": -1.997003197669983, "loss": 1.5713, "nll_loss": 1.52711820602417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497834473848343, "rewards/margins": 0.04991687089204788, "rewards/rejected": -0.19970029592514038, "step": 421 }, { "epoch": 0.665090622537431, "grad_norm": 0.19506679475307465, "learning_rate": 3.3817846931367452e-06, "log_odds_chosen": 0.415115624666214, "log_odds_ratio": -0.5117157101631165, "logits/chosen": -0.22510936856269836, "logits/rejected": -1.2167201042175293, "logps/chosen": -1.4807124137878418, "logps/rejected": -1.8138527870178223, "loss": 1.5701, "nll_loss": 1.5189181566238403, "rewards/accuracies": 1.0, "rewards/chosen": -0.1480712592601776, "rewards/margins": 0.033314019441604614, "rewards/rejected": -0.18138529360294342, "step": 422 }, { "epoch": 0.6666666666666666, "grad_norm": 0.19481457769870758, "learning_rate": 3.377805308269844e-06, "log_odds_chosen": 0.6872407793998718, "log_odds_ratio": -0.42233163118362427, "logits/chosen": -0.11342249810695648, "logits/rejected": -1.5072932243347168, "logps/chosen": -1.5372941493988037, "logps/rejected": -2.1134226322174072, "loss": 1.6132, "nll_loss": 1.5709176063537598, "rewards/accuracies": 1.0, "rewards/chosen": -0.15372943878173828, "rewards/margins": 0.05761285126209259, "rewards/rejected": -0.21134227514266968, "step": 423 }, { "epoch": 0.6682427107959023, "grad_norm": 0.18579819798469543, "learning_rate": 3.3738155147419275e-06, "log_odds_chosen": 0.6220219135284424, "log_odds_ratio": -0.44024914503097534, "logits/chosen": -0.235686257481575, "logits/rejected": -1.2404701709747314, "logps/chosen": -1.5340425968170166, "logps/rejected": -2.0529518127441406, "loss": 1.5954, "nll_loss": 1.5513544082641602, "rewards/accuracies": 1.0, "rewards/chosen": -0.15340426564216614, "rewards/margins": 0.05189090967178345, "rewards/rejected": -0.2052951604127884, "step": 424 }, { "epoch": 0.6698187549251379, "grad_norm": 0.19320693612098694, "learning_rate": 3.3698153426939824e-06, "log_odds_chosen": 0.7084161639213562, "log_odds_ratio": -0.4107830226421356, "logits/chosen": -0.2280699759721756, "logits/rejected": -1.0708644390106201, "logps/chosen": -1.463280439376831, "logps/rejected": -2.0446677207946777, "loss": 1.5378, "nll_loss": 1.4967448711395264, "rewards/accuracies": 1.0, "rewards/chosen": -0.14632803201675415, "rewards/margins": 0.05813872069120407, "rewards/rejected": -0.2044667750597, "step": 425 }, { "epoch": 0.6713947990543735, "grad_norm": 0.20736993849277496, "learning_rate": 3.3658048223453954e-06, "log_odds_chosen": 0.662844717502594, "log_odds_ratio": -0.4233693480491638, "logits/chosen": -0.1891903281211853, "logits/rejected": -1.40634024143219, "logps/chosen": -1.6040951013565063, "logps/rejected": -2.1638996601104736, "loss": 1.6514, "nll_loss": 1.6090670824050903, "rewards/accuracies": 1.0, "rewards/chosen": -0.16040951013565063, "rewards/margins": 0.05598045140504837, "rewards/rejected": -0.2163899689912796, "step": 426 }, { "epoch": 0.6729708431836091, "grad_norm": 0.1907954066991806, "learning_rate": 3.3617839839937337e-06, "log_odds_chosen": 0.6645872592926025, "log_odds_ratio": -0.4254325330257416, "logits/chosen": -0.11419974267482758, "logits/rejected": -1.4858274459838867, "logps/chosen": -1.5667697191238403, "logps/rejected": -2.1278765201568604, "loss": 1.6334, "nll_loss": 1.5908530950546265, "rewards/accuracies": 1.0, "rewards/chosen": -0.15667694807052612, "rewards/margins": 0.056110695004463196, "rewards/rejected": -0.2127876579761505, "step": 427 }, { "epoch": 0.6745468873128447, "grad_norm": 0.20678722858428955, "learning_rate": 3.3577528580145107e-06, "log_odds_chosen": 0.3481862545013428, "log_odds_ratio": -0.5371396541595459, "logits/chosen": -0.14534202218055725, "logits/rejected": -1.29691481590271, "logps/chosen": -1.585597276687622, "logps/rejected": -1.8742812871932983, "loss": 1.6676, "nll_loss": 1.6138746738433838, "rewards/accuracies": 1.0, "rewards/chosen": -0.15855972468852997, "rewards/margins": 0.028868405148386955, "rewards/rejected": -0.18742814660072327, "step": 428 }, { "epoch": 0.6761229314420804, "grad_norm": 0.19353458285331726, "learning_rate": 3.353711474860956e-06, "log_odds_chosen": 0.5981341600418091, "log_odds_ratio": -0.45493775606155396, "logits/chosen": -0.1791481226682663, "logits/rejected": -1.231849193572998, "logps/chosen": -1.5348036289215088, "logps/rejected": -2.0394744873046875, "loss": 1.597, "nll_loss": 1.5514580011367798, "rewards/accuracies": 1.0, "rewards/chosen": -0.15348035097122192, "rewards/margins": 0.05046708881855011, "rewards/rejected": -0.20394745469093323, "step": 429 }, { "epoch": 0.677698975571316, "grad_norm": 0.18463017046451569, "learning_rate": 3.3496598650637916e-06, "log_odds_chosen": 0.569009006023407, "log_odds_ratio": -0.45516982674598694, "logits/chosen": -0.20293018221855164, "logits/rejected": -1.1590790748596191, "logps/chosen": -1.435080647468567, "logps/rejected": -1.8941256999969482, "loss": 1.5186, "nll_loss": 1.4731093645095825, "rewards/accuracies": 1.0, "rewards/chosen": -0.14350807666778564, "rewards/margins": 0.0459044985473156, "rewards/rejected": -0.18941256403923035, "step": 430 }, { "epoch": 0.6792750197005516, "grad_norm": 0.18710088729858398, "learning_rate": 3.3455980592309923e-06, "log_odds_chosen": 0.619581401348114, "log_odds_ratio": -0.4386385977268219, "logits/chosen": -0.23650380969047546, "logits/rejected": -1.2405811548233032, "logps/chosen": -1.4119963645935059, "logps/rejected": -1.9160082340240479, "loss": 1.5042, "nll_loss": 1.460310459136963, "rewards/accuracies": 1.0, "rewards/chosen": -0.14119963347911835, "rewards/margins": 0.05040118098258972, "rewards/rejected": -0.19160079956054688, "step": 431 }, { "epoch": 0.6808510638297872, "grad_norm": 0.1992560178041458, "learning_rate": 3.341526088047562e-06, "log_odds_chosen": 0.5567487478256226, "log_odds_ratio": -0.4619826078414917, "logits/chosen": -0.17194384336471558, "logits/rejected": -1.293556571006775, "logps/chosen": -1.6019841432571411, "logps/rejected": -2.070882558822632, "loss": 1.662, "nll_loss": 1.6158294677734375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601984202861786, "rewards/margins": 0.046889837831258774, "rewards/rejected": -0.20708826184272766, "step": 432 }, { "epoch": 0.6824271079590228, "grad_norm": 0.226112961769104, "learning_rate": 3.3374439822752972e-06, "log_odds_chosen": 0.44421204924583435, "log_odds_ratio": -0.49906378984451294, "logits/chosen": -0.12680430710315704, "logits/rejected": -1.1431465148925781, "logps/chosen": -1.5594537258148193, "logps/rejected": -1.9281821250915527, "loss": 1.649, "nll_loss": 1.5991100072860718, "rewards/accuracies": 1.0, "rewards/chosen": -0.15594536066055298, "rewards/margins": 0.036872848868370056, "rewards/rejected": -0.19281822443008423, "step": 433 }, { "epoch": 0.6840031520882585, "grad_norm": 0.207797572016716, "learning_rate": 3.333351772752559e-06, "log_odds_chosen": 0.5869032144546509, "log_odds_ratio": -0.4558618366718292, "logits/chosen": -0.23912350833415985, "logits/rejected": -1.287145972251892, "logps/chosen": -1.6291857957839966, "logps/rejected": -2.120633602142334, "loss": 1.6932, "nll_loss": 1.6475987434387207, "rewards/accuracies": 1.0, "rewards/chosen": -0.1629185825586319, "rewards/margins": 0.049144770950078964, "rewards/rejected": -0.21206337213516235, "step": 434 }, { "epoch": 0.6855791962174941, "grad_norm": 0.20063838362693787, "learning_rate": 3.3292494903940338e-06, "log_odds_chosen": 0.7061201333999634, "log_odds_ratio": -0.40912342071533203, "logits/chosen": -0.1544758826494217, "logits/rejected": -1.3016716241836548, "logps/chosen": -1.5344663858413696, "logps/rejected": -2.1211295127868652, "loss": 1.6071, "nll_loss": 1.5661424398422241, "rewards/accuracies": 1.0, "rewards/chosen": -0.15344664454460144, "rewards/margins": 0.05866629630327225, "rewards/rejected": -0.2121129333972931, "step": 435 }, { "epoch": 0.6871552403467297, "grad_norm": 0.1838790476322174, "learning_rate": 3.3251371661905063e-06, "log_odds_chosen": 0.6065340638160706, "log_odds_ratio": -0.44656994938850403, "logits/chosen": -0.1865832805633545, "logits/rejected": -1.140378713607788, "logps/chosen": -1.3614064455032349, "logps/rejected": -1.8547477722167969, "loss": 1.4488, "nll_loss": 1.4041305780410767, "rewards/accuracies": 1.0, "rewards/chosen": -0.1361406445503235, "rewards/margins": 0.04933411255478859, "rewards/rejected": -0.18547475337982178, "step": 436 }, { "epoch": 0.6887312844759653, "grad_norm": 0.21491163969039917, "learning_rate": 3.321014831208622e-06, "log_odds_chosen": 0.5981911420822144, "log_odds_ratio": -0.44269564747810364, "logits/chosen": -0.19441677629947662, "logits/rejected": -1.1200268268585205, "logps/chosen": -1.4213942289352417, "logps/rejected": -1.9019947052001953, "loss": 1.5169, "nll_loss": 1.4725940227508545, "rewards/accuracies": 1.0, "rewards/chosen": -0.14213941991329193, "rewards/margins": 0.04806005209684372, "rewards/rejected": -0.19019947946071625, "step": 437 }, { "epoch": 0.6903073286052009, "grad_norm": 0.2163185477256775, "learning_rate": 3.316882516590652e-06, "log_odds_chosen": 0.6079857349395752, "log_odds_ratio": -0.43692946434020996, "logits/chosen": -0.18331696093082428, "logits/rejected": -1.3628792762756348, "logps/chosen": -1.4688149690628052, "logps/rejected": -1.966677188873291, "loss": 1.5508, "nll_loss": 1.5071358680725098, "rewards/accuracies": 1.0, "rewards/chosen": -0.14688150584697723, "rewards/margins": 0.04978622496128082, "rewards/rejected": -0.19666773080825806, "step": 438 }, { "epoch": 0.6918833727344366, "grad_norm": 0.21325580775737762, "learning_rate": 3.31274025355426e-06, "log_odds_chosen": 0.5819729566574097, "log_odds_ratio": -0.4491526484489441, "logits/chosen": -0.22322604060173035, "logits/rejected": -1.189731478691101, "logps/chosen": -1.5592323541641235, "logps/rejected": -2.043937921524048, "loss": 1.6116, "nll_loss": 1.5666409730911255, "rewards/accuracies": 1.0, "rewards/chosen": -0.15592323243618011, "rewards/margins": 0.04847054183483124, "rewards/rejected": -0.20439377427101135, "step": 439 }, { "epoch": 0.6934594168636722, "grad_norm": 0.20833250880241394, "learning_rate": 3.308588073392265e-06, "log_odds_chosen": 0.5521525144577026, "log_odds_ratio": -0.46361684799194336, "logits/chosen": -0.2142220437526703, "logits/rejected": -1.1978386640548706, "logps/chosen": -1.5905332565307617, "logps/rejected": -2.054898738861084, "loss": 1.6463, "nll_loss": 1.599968433380127, "rewards/accuracies": 1.0, "rewards/chosen": -0.15905332565307617, "rewards/margins": 0.046436551958322525, "rewards/rejected": -0.2054898738861084, "step": 440 }, { "epoch": 0.6950354609929078, "grad_norm": 0.19477160274982452, "learning_rate": 3.3044260074724035e-06, "log_odds_chosen": 0.6352304220199585, "log_odds_ratio": -0.43725699186325073, "logits/chosen": -0.20722348988056183, "logits/rejected": -1.4436430931091309, "logps/chosen": -1.5010461807250977, "logps/rejected": -2.0235297679901123, "loss": 1.5782, "nll_loss": 1.5344798564910889, "rewards/accuracies": 1.0, "rewards/chosen": -0.15010464191436768, "rewards/margins": 0.05224834755063057, "rewards/rejected": -0.20235297083854675, "step": 441 }, { "epoch": 0.6966115051221434, "grad_norm": 0.18534111976623535, "learning_rate": 3.300254087237097e-06, "log_odds_chosen": 0.5580976605415344, "log_odds_ratio": -0.457084059715271, "logits/chosen": -0.16514423489570618, "logits/rejected": -1.3200604915618896, "logps/chosen": -1.3929381370544434, "logps/rejected": -1.8415474891662598, "loss": 1.4993, "nll_loss": 1.4535483121871948, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392938196659088, "rewards/margins": 0.044860921800136566, "rewards/rejected": -0.18415474891662598, "step": 442 }, { "epoch": 0.698187549251379, "grad_norm": 0.26748111844062805, "learning_rate": 3.2960723442032105e-06, "log_odds_chosen": 0.7100386619567871, "log_odds_ratio": -0.4068644642829895, "logits/chosen": -0.23274515569210052, "logits/rejected": -1.637979507446289, "logps/chosen": -1.590896725654602, "logps/rejected": -2.1930923461914062, "loss": 1.6465, "nll_loss": 1.6058528423309326, "rewards/accuracies": 1.0, "rewards/chosen": -0.15908968448638916, "rewards/margins": 0.06021953374147415, "rewards/rejected": -0.21930919587612152, "step": 443 }, { "epoch": 0.6997635933806147, "grad_norm": 0.1990683227777481, "learning_rate": 3.291880809961814e-06, "log_odds_chosen": 0.6279516220092773, "log_odds_ratio": -0.4397643506526947, "logits/chosen": -0.15238967537879944, "logits/rejected": -1.2192305326461792, "logps/chosen": -1.5531084537506104, "logps/rejected": -2.074535369873047, "loss": 1.6241, "nll_loss": 1.5800902843475342, "rewards/accuracies": 1.0, "rewards/chosen": -0.15531083941459656, "rewards/margins": 0.0521426796913147, "rewards/rejected": -0.20745351910591125, "step": 444 }, { "epoch": 0.7013396375098503, "grad_norm": 0.19820798933506012, "learning_rate": 3.2876795161779473e-06, "log_odds_chosen": 0.7250336408615112, "log_odds_ratio": -0.3975331783294678, "logits/chosen": -0.18375059962272644, "logits/rejected": -1.543222188949585, "logps/chosen": -1.5645023584365845, "logps/rejected": -2.1766436100006104, "loss": 1.6162, "nll_loss": 1.5764946937561035, "rewards/accuracies": 1.0, "rewards/chosen": -0.15645024180412292, "rewards/margins": 0.0612141489982605, "rewards/rejected": -0.21766439080238342, "step": 445 }, { "epoch": 0.7029156816390859, "grad_norm": 0.19689838588237762, "learning_rate": 3.2834684945903776e-06, "log_odds_chosen": 0.5597304105758667, "log_odds_ratio": -0.45593225955963135, "logits/chosen": -0.2338670790195465, "logits/rejected": -1.2486504316329956, "logps/chosen": -1.4915810823440552, "logps/rejected": -1.9466543197631836, "loss": 1.572, "nll_loss": 1.5264508724212646, "rewards/accuracies": 1.0, "rewards/chosen": -0.14915812015533447, "rewards/margins": 0.045507319271564484, "rewards/rejected": -0.19466543197631836, "step": 446 }, { "epoch": 0.7044917257683215, "grad_norm": 0.2056231051683426, "learning_rate": 3.2792477770113624e-06, "log_odds_chosen": 0.5060315728187561, "log_odds_ratio": -0.47748908400535583, "logits/chosen": -0.32436949014663696, "logits/rejected": -1.376452922821045, "logps/chosen": -1.5770741701126099, "logps/rejected": -1.9976955652236938, "loss": 1.6348, "nll_loss": 1.5870327949523926, "rewards/accuracies": 1.0, "rewards/chosen": -0.15770742297172546, "rewards/margins": 0.04206214100122452, "rewards/rejected": -0.19976955652236938, "step": 447 }, { "epoch": 0.7060677698975572, "grad_norm": 0.2036747932434082, "learning_rate": 3.275017395326407e-06, "log_odds_chosen": 0.4934311509132385, "log_odds_ratio": -0.48520928621292114, "logits/chosen": -0.13775332272052765, "logits/rejected": -1.1801694631576538, "logps/chosen": -1.4680533409118652, "logps/rejected": -1.8637882471084595, "loss": 1.5528, "nll_loss": 1.5042613744735718, "rewards/accuracies": 0.875, "rewards/chosen": -0.14680534601211548, "rewards/margins": 0.039573490619659424, "rewards/rejected": -0.1863788366317749, "step": 448 }, { "epoch": 0.7076438140267928, "grad_norm": 0.1903351992368698, "learning_rate": 3.2707773814940244e-06, "log_odds_chosen": 0.6554431915283203, "log_odds_ratio": -0.42446404695510864, "logits/chosen": -0.15731389820575714, "logits/rejected": -1.3429402112960815, "logps/chosen": -1.4967751502990723, "logps/rejected": -2.0305981636047363, "loss": 1.5792, "nll_loss": 1.5367555618286133, "rewards/accuracies": 1.0, "rewards/chosen": -0.14967751502990723, "rewards/margins": 0.05338229984045029, "rewards/rejected": -0.20305980741977692, "step": 449 }, { "epoch": 0.7092198581560284, "grad_norm": 0.18632066249847412, "learning_rate": 3.2665277675454935e-06, "log_odds_chosen": 0.6589217185974121, "log_odds_ratio": -0.43469709157943726, "logits/chosen": -0.22264309227466583, "logits/rejected": -1.58710515499115, "logps/chosen": -1.5806881189346313, "logps/rejected": -2.1421871185302734, "loss": 1.6344, "nll_loss": 1.590897798538208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15806882083415985, "rewards/margins": 0.05614989995956421, "rewards/rejected": -0.21421872079372406, "step": 450 }, { "epoch": 0.710795902285264, "grad_norm": 0.1898472160100937, "learning_rate": 3.262268585584619e-06, "log_odds_chosen": 0.6008990406990051, "log_odds_ratio": -0.440044105052948, "logits/chosen": -0.1463158279657364, "logits/rejected": -1.3233999013900757, "logps/chosen": -1.5258179903030396, "logps/rejected": -2.0252106189727783, "loss": 1.5941, "nll_loss": 1.5501309633255005, "rewards/accuracies": 1.0, "rewards/chosen": -0.15258179605007172, "rewards/margins": 0.04993927478790283, "rewards/rejected": -0.20252105593681335, "step": 451 }, { "epoch": 0.7123719464144996, "grad_norm": 0.20325587689876556, "learning_rate": 3.2579998677874853e-06, "log_odds_chosen": 0.691182017326355, "log_odds_ratio": -0.41538918018341064, "logits/chosen": -0.23037710785865784, "logits/rejected": -1.2760132551193237, "logps/chosen": -1.5629483461380005, "logps/rejected": -2.1457886695861816, "loss": 1.6173, "nll_loss": 1.5757336616516113, "rewards/accuracies": 1.0, "rewards/chosen": -0.1562948226928711, "rewards/margins": 0.05828403681516647, "rewards/rejected": -0.21457885205745697, "step": 452 }, { "epoch": 0.7139479905437353, "grad_norm": 0.18862774968147278, "learning_rate": 3.2537216464022155e-06, "log_odds_chosen": 0.6802084445953369, "log_odds_ratio": -0.41921448707580566, "logits/chosen": -0.20463165640830994, "logits/rejected": -1.2661035060882568, "logps/chosen": -1.5623295307159424, "logps/rejected": -2.129488945007324, "loss": 1.6273, "nll_loss": 1.585338830947876, "rewards/accuracies": 1.0, "rewards/chosen": -0.15623293817043304, "rewards/margins": 0.0567159429192543, "rewards/rejected": -0.21294888854026794, "step": 453 }, { "epoch": 0.7155240346729709, "grad_norm": 0.1871948093175888, "learning_rate": 3.2494339537487314e-06, "log_odds_chosen": 0.5408557653427124, "log_odds_ratio": -0.46073442697525024, "logits/chosen": -0.23007997870445251, "logits/rejected": -1.5315394401550293, "logps/chosen": -1.5641494989395142, "logps/rejected": -2.0117013454437256, "loss": 1.6397, "nll_loss": 1.5936379432678223, "rewards/accuracies": 1.0, "rewards/chosen": -0.1564149558544159, "rewards/margins": 0.04475518316030502, "rewards/rejected": -0.2011701464653015, "step": 454 }, { "epoch": 0.7171000788022065, "grad_norm": 0.18707512319087982, "learning_rate": 3.2451368222185006e-06, "log_odds_chosen": 0.454687237739563, "log_odds_ratio": -0.4977053105831146, "logits/chosen": -0.2190241664648056, "logits/rejected": -1.1657060384750366, "logps/chosen": -1.5094616413116455, "logps/rejected": -1.8778409957885742, "loss": 1.5921, "nll_loss": 1.5422812700271606, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094618499279022, "rewards/margins": 0.03683791682124138, "rewards/rejected": -0.1877840906381607, "step": 455 }, { "epoch": 0.7186761229314421, "grad_norm": 0.17670530080795288, "learning_rate": 3.2408302842743007e-06, "log_odds_chosen": 0.6599798798561096, "log_odds_ratio": -0.4245462715625763, "logits/chosen": -0.1710800975561142, "logits/rejected": -1.3067679405212402, "logps/chosen": -1.4607981443405151, "logps/rejected": -2.002277374267578, "loss": 1.5484, "nll_loss": 1.505940556526184, "rewards/accuracies": 1.0, "rewards/chosen": -0.14607983827590942, "rewards/margins": 0.05414789542555809, "rewards/rejected": -0.20022772252559662, "step": 456 }, { "epoch": 0.7202521670606777, "grad_norm": 0.18531948328018188, "learning_rate": 3.2365143724499684e-06, "log_odds_chosen": 0.590911865234375, "log_odds_ratio": -0.4425351023674011, "logits/chosen": -0.1856268346309662, "logits/rejected": -1.3595974445343018, "logps/chosen": -1.5068074464797974, "logps/rejected": -1.9935933351516724, "loss": 1.5948, "nll_loss": 1.5505702495574951, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506807506084442, "rewards/margins": 0.04867858439683914, "rewards/rejected": -0.19935932755470276, "step": 457 }, { "epoch": 0.7218282111899134, "grad_norm": 0.19168265163898468, "learning_rate": 3.2321891193501564e-06, "log_odds_chosen": 0.5883606672286987, "log_odds_ratio": -0.4528968632221222, "logits/chosen": -0.16482576727867126, "logits/rejected": -0.8930314779281616, "logps/chosen": -1.510907530784607, "logps/rejected": -1.9958248138427734, "loss": 1.5783, "nll_loss": 1.532994031906128, "rewards/accuracies": 1.0, "rewards/chosen": -0.15109075605869293, "rewards/margins": 0.04849172383546829, "rewards/rejected": -0.19958247244358063, "step": 458 }, { "epoch": 0.723404255319149, "grad_norm": 0.18463407456874847, "learning_rate": 3.2278545576500858e-06, "log_odds_chosen": 0.8210570812225342, "log_odds_ratio": -0.37329068779945374, "logits/chosen": -0.08877343684434891, "logits/rejected": -1.078304648399353, "logps/chosen": -1.3643009662628174, "logps/rejected": -2.0353879928588867, "loss": 1.4512, "nll_loss": 1.4138658046722412, "rewards/accuracies": 1.0, "rewards/chosen": -0.13643008470535278, "rewards/margins": 0.06710872799158096, "rewards/rejected": -0.20353882014751434, "step": 459 }, { "epoch": 0.7249802994483846, "grad_norm": 0.19575412571430206, "learning_rate": 3.223510720095299e-06, "log_odds_chosen": 0.785904049873352, "log_odds_ratio": -0.38319867849349976, "logits/chosen": -0.2679174542427063, "logits/rejected": -1.3926105499267578, "logps/chosen": -1.5412414073944092, "logps/rejected": -2.202934980392456, "loss": 1.5979, "nll_loss": 1.5595486164093018, "rewards/accuracies": 1.0, "rewards/chosen": -0.15412414073944092, "rewards/margins": 0.0661693587899208, "rewards/rejected": -0.22029350697994232, "step": 460 }, { "epoch": 0.7265563435776202, "grad_norm": 0.18656474351882935, "learning_rate": 3.2191576395014158e-06, "log_odds_chosen": 0.7210904359817505, "log_odds_ratio": -0.3992398679256439, "logits/chosen": -0.20938719809055328, "logits/rejected": -1.283248782157898, "logps/chosen": -1.4764115810394287, "logps/rejected": -2.072150707244873, "loss": 1.5461, "nll_loss": 1.5061570405960083, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476411670446396, "rewards/margins": 0.059573911130428314, "rewards/rejected": -0.2072150707244873, "step": 461 }, { "epoch": 0.7281323877068558, "grad_norm": 0.18474119901657104, "learning_rate": 3.2147953487538794e-06, "log_odds_chosen": 0.6938648223876953, "log_odds_ratio": -0.4105943739414215, "logits/chosen": -0.14119039475917816, "logits/rejected": -1.3704811334609985, "logps/chosen": -1.4421080350875854, "logps/rejected": -2.0102922916412354, "loss": 1.514, "nll_loss": 1.4729448556900024, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442108154296875, "rewards/margins": 0.05681842565536499, "rewards/rejected": -0.2010292410850525, "step": 462 }, { "epoch": 0.7297084318360915, "grad_norm": 0.19482626020908356, "learning_rate": 3.2104238808077133e-06, "log_odds_chosen": 0.5768538117408752, "log_odds_ratio": -0.45441049337387085, "logits/chosen": -0.169452965259552, "logits/rejected": -1.0683661699295044, "logps/chosen": -1.4814167022705078, "logps/rejected": -1.9551403522491455, "loss": 1.5563, "nll_loss": 1.5108129978179932, "rewards/accuracies": 1.0, "rewards/chosen": -0.14814168214797974, "rewards/margins": 0.04737236350774765, "rewards/rejected": -0.1955140382051468, "step": 463 }, { "epoch": 0.731284475965327, "grad_norm": 0.18169742822647095, "learning_rate": 3.2060432686872704e-06, "log_odds_chosen": 0.8345743417739868, "log_odds_ratio": -0.3699982464313507, "logits/chosen": -0.2313491553068161, "logits/rejected": -1.2104275226593018, "logps/chosen": -1.387764811515808, "logps/rejected": -2.067291736602783, "loss": 1.4733, "nll_loss": 1.436316728591919, "rewards/accuracies": 1.0, "rewards/chosen": -0.138776496052742, "rewards/margins": 0.06795267760753632, "rewards/rejected": -0.20672915875911713, "step": 464 }, { "epoch": 0.7328605200945626, "grad_norm": 0.18539521098136902, "learning_rate": 3.201653545485982e-06, "log_odds_chosen": 0.6590836048126221, "log_odds_ratio": -0.42436298727989197, "logits/chosen": -0.14550940692424774, "logits/rejected": -1.2948503494262695, "logps/chosen": -1.5323055982589722, "logps/rejected": -2.083099365234375, "loss": 1.5889, "nll_loss": 1.5465004444122314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15323056280612946, "rewards/margins": 0.0550793781876564, "rewards/rejected": -0.20830994844436646, "step": 465 }, { "epoch": 0.7344365642237982, "grad_norm": 0.2062307447195053, "learning_rate": 3.197254744366111e-06, "log_odds_chosen": 0.67624831199646, "log_odds_ratio": -0.41811689734458923, "logits/chosen": -0.1269284039735794, "logits/rejected": -1.2436720132827759, "logps/chosen": -1.442671775817871, "logps/rejected": -1.994502305984497, "loss": 1.519, "nll_loss": 1.4771640300750732, "rewards/accuracies": 1.0, "rewards/chosen": -0.14426717162132263, "rewards/margins": 0.0551830530166626, "rewards/rejected": -0.19945020973682404, "step": 466 }, { "epoch": 0.7360126083530338, "grad_norm": 0.19510377943515778, "learning_rate": 3.192846898558498e-06, "log_odds_chosen": 0.5479187369346619, "log_odds_ratio": -0.4584289491176605, "logits/chosen": -0.17342276871204376, "logits/rejected": -1.2843399047851562, "logps/chosen": -1.5753339529037476, "logps/rejected": -2.0328989028930664, "loss": 1.6463, "nll_loss": 1.600473403930664, "rewards/accuracies": 1.0, "rewards/chosen": -0.15753339231014252, "rewards/margins": 0.045756496489048004, "rewards/rejected": -0.20328989624977112, "step": 467 }, { "epoch": 0.7375886524822695, "grad_norm": 0.19615499675273895, "learning_rate": 3.188430041362313e-06, "log_odds_chosen": 0.5022854804992676, "log_odds_ratio": -0.4808230698108673, "logits/chosen": -0.1259315460920334, "logits/rejected": -1.3108328580856323, "logps/chosen": -1.5695524215698242, "logps/rejected": -1.9884154796600342, "loss": 1.6285, "nll_loss": 1.580439567565918, "rewards/accuracies": 1.0, "rewards/chosen": -0.15695525705814362, "rewards/margins": 0.04188628867268562, "rewards/rejected": -0.19884154200553894, "step": 468 }, { "epoch": 0.7391646966115051, "grad_norm": 0.20162400603294373, "learning_rate": 3.184004206144803e-06, "log_odds_chosen": 0.7329556941986084, "log_odds_ratio": -0.4027223289012909, "logits/chosen": -0.23247480392456055, "logits/rejected": -1.2680878639221191, "logps/chosen": -1.4653428792953491, "logps/rejected": -2.0662801265716553, "loss": 1.5357, "nll_loss": 1.4954301118850708, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465342789888382, "rewards/margins": 0.06009373068809509, "rewards/rejected": -0.20662802457809448, "step": 469 }, { "epoch": 0.7407407407407407, "grad_norm": 0.18583819270133972, "learning_rate": 3.1795694263410386e-06, "log_odds_chosen": 0.7982565760612488, "log_odds_ratio": -0.38432639837265015, "logits/chosen": -0.19295667111873627, "logits/rejected": -1.293751835823059, "logps/chosen": -1.4557034969329834, "logps/rejected": -2.122990608215332, "loss": 1.5197, "nll_loss": 1.48124098777771, "rewards/accuracies": 1.0, "rewards/chosen": -0.14557035267353058, "rewards/margins": 0.06672872602939606, "rewards/rejected": -0.21229907870292664, "step": 470 }, { "epoch": 0.7423167848699763, "grad_norm": 0.1854601353406906, "learning_rate": 3.1751257354536634e-06, "log_odds_chosen": 0.552662193775177, "log_odds_ratio": -0.4665309488773346, "logits/chosen": -0.1528901308774948, "logits/rejected": -1.371885061264038, "logps/chosen": -1.4518961906433105, "logps/rejected": -1.9063466787338257, "loss": 1.5332, "nll_loss": 1.4865120649337769, "rewards/accuracies": 0.875, "rewards/chosen": -0.14518961310386658, "rewards/margins": 0.04544505476951599, "rewards/rejected": -0.19063468277454376, "step": 471 }, { "epoch": 0.7438928289992119, "grad_norm": 0.1899978667497635, "learning_rate": 3.1706731670526394e-06, "log_odds_chosen": 0.6217374205589294, "log_odds_ratio": -0.43196773529052734, "logits/chosen": -0.2525237500667572, "logits/rejected": -1.3043723106384277, "logps/chosen": -1.5042320489883423, "logps/rejected": -2.0150396823883057, "loss": 1.5598, "nll_loss": 1.5165841579437256, "rewards/accuracies": 1.0, "rewards/chosen": -0.15042319893836975, "rewards/margins": 0.05108076333999634, "rewards/rejected": -0.2015039622783661, "step": 472 }, { "epoch": 0.7454688731284476, "grad_norm": 0.1951638162136078, "learning_rate": 3.166211754774994e-06, "log_odds_chosen": 0.6629724502563477, "log_odds_ratio": -0.422730028629303, "logits/chosen": -0.20018966495990753, "logits/rejected": -1.4212433099746704, "logps/chosen": -1.5557457208633423, "logps/rejected": -2.1103403568267822, "loss": 1.6437, "nll_loss": 1.6013872623443604, "rewards/accuracies": 1.0, "rewards/chosen": -0.15557457506656647, "rewards/margins": 0.055459462106227875, "rewards/rejected": -0.21103402972221375, "step": 473 }, { "epoch": 0.7470449172576832, "grad_norm": 0.19168664515018463, "learning_rate": 3.1617415323245665e-06, "log_odds_chosen": 0.6726161241531372, "log_odds_ratio": -0.42290619015693665, "logits/chosen": -0.2400115728378296, "logits/rejected": -1.3783491849899292, "logps/chosen": -1.4658328294754028, "logps/rejected": -2.0105373859405518, "loss": 1.5266, "nll_loss": 1.4843122959136963, "rewards/accuracies": 1.0, "rewards/chosen": -0.14658328890800476, "rewards/margins": 0.054470453411340714, "rewards/rejected": -0.20105375349521637, "step": 474 }, { "epoch": 0.7486209613869188, "grad_norm": 0.18189279735088348, "learning_rate": 3.157262533471752e-06, "log_odds_chosen": 0.7140947580337524, "log_odds_ratio": -0.40235432982444763, "logits/chosen": -0.15209892392158508, "logits/rejected": -1.3825089931488037, "logps/chosen": -1.5169684886932373, "logps/rejected": -2.1144258975982666, "loss": 1.5855, "nll_loss": 1.54523503780365, "rewards/accuracies": 1.0, "rewards/chosen": -0.15169686079025269, "rewards/margins": 0.05974572151899338, "rewards/rejected": -0.21144257485866547, "step": 475 }, { "epoch": 0.7501970055161544, "grad_norm": 0.19609789550304413, "learning_rate": 3.1527747920532468e-06, "log_odds_chosen": 0.565768837928772, "log_odds_ratio": -0.4514068365097046, "logits/chosen": -0.16712833940982819, "logits/rejected": -1.1320550441741943, "logps/chosen": -1.4766864776611328, "logps/rejected": -1.9391474723815918, "loss": 1.5527, "nll_loss": 1.5075533390045166, "rewards/accuracies": 1.0, "rewards/chosen": -0.14766864478588104, "rewards/margins": 0.04624609276652336, "rewards/rejected": -0.1939147412776947, "step": 476 }, { "epoch": 0.75177304964539, "grad_norm": 0.19328206777572632, "learning_rate": 3.148278341971795e-06, "log_odds_chosen": 0.65244460105896, "log_odds_ratio": -0.4249870181083679, "logits/chosen": -0.22436018288135529, "logits/rejected": -1.223185420036316, "logps/chosen": -1.5077931880950928, "logps/rejected": -2.0486738681793213, "loss": 1.5739, "nll_loss": 1.5314006805419922, "rewards/accuracies": 1.0, "rewards/chosen": -0.15077932178974152, "rewards/margins": 0.054088056087493896, "rewards/rejected": -0.2048673778772354, "step": 477 }, { "epoch": 0.7533490937746257, "grad_norm": 0.20173610746860504, "learning_rate": 3.143773217195929e-06, "log_odds_chosen": 0.73219895362854, "log_odds_ratio": -0.4004945158958435, "logits/chosen": -0.2401634305715561, "logits/rejected": -1.3643122911453247, "logps/chosen": -1.5717616081237793, "logps/rejected": -2.18953800201416, "loss": 1.6288, "nll_loss": 1.5887385606765747, "rewards/accuracies": 1.0, "rewards/chosen": -0.1571761518716812, "rewards/margins": 0.06177765130996704, "rewards/rejected": -0.21895381808280945, "step": 478 }, { "epoch": 0.7549251379038613, "grad_norm": 0.19656141102313995, "learning_rate": 3.139259451759714e-06, "log_odds_chosen": 0.5744882225990295, "log_odds_ratio": -0.4510755240917206, "logits/chosen": -0.24257460236549377, "logits/rejected": -1.3083908557891846, "logps/chosen": -1.5712709426879883, "logps/rejected": -2.050100564956665, "loss": 1.644, "nll_loss": 1.5989316701889038, "rewards/accuracies": 1.0, "rewards/chosen": -0.15712709724903107, "rewards/margins": 0.047882966697216034, "rewards/rejected": -0.2050100713968277, "step": 479 }, { "epoch": 0.7565011820330969, "grad_norm": 0.18602579832077026, "learning_rate": 3.134737079762493e-06, "log_odds_chosen": 0.6003292202949524, "log_odds_ratio": -0.4424300193786621, "logits/chosen": -0.1941242814064026, "logits/rejected": -1.311800479888916, "logps/chosen": -1.501556634902954, "logps/rejected": -1.9944710731506348, "loss": 1.5673, "nll_loss": 1.5230939388275146, "rewards/accuracies": 1.0, "rewards/chosen": -0.15015564858913422, "rewards/margins": 0.04929143935441971, "rewards/rejected": -0.19944709539413452, "step": 480 }, { "epoch": 0.7580772261623325, "grad_norm": 0.20337559282779694, "learning_rate": 3.130206135368626e-06, "log_odds_chosen": 0.6041734218597412, "log_odds_ratio": -0.4474312365055084, "logits/chosen": -0.24175474047660828, "logits/rejected": -0.9721249341964722, "logps/chosen": -1.425898790359497, "logps/rejected": -1.9149250984191895, "loss": 1.5167, "nll_loss": 1.471928596496582, "rewards/accuracies": 1.0, "rewards/chosen": -0.14258988201618195, "rewards/margins": 0.04890260472893715, "rewards/rejected": -0.19149249792099, "step": 481 }, { "epoch": 0.7596532702915682, "grad_norm": 0.19193829596042633, "learning_rate": 3.1256666528072327e-06, "log_odds_chosen": 0.7867165803909302, "log_odds_ratio": -0.38077130913734436, "logits/chosen": -0.23528993129730225, "logits/rejected": -1.075020670890808, "logps/chosen": -1.472687840461731, "logps/rejected": -2.124319553375244, "loss": 1.5375, "nll_loss": 1.499396800994873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14726878702640533, "rewards/margins": 0.06516318768262863, "rewards/rejected": -0.21243198215961456, "step": 482 }, { "epoch": 0.7612293144208038, "grad_norm": 0.18599654734134674, "learning_rate": 3.121118666371937e-06, "log_odds_chosen": 0.595000147819519, "log_odds_ratio": -0.4452923834323883, "logits/chosen": -0.15184305608272552, "logits/rejected": -1.538995623588562, "logps/chosen": -1.5383219718933105, "logps/rejected": -2.033515453338623, "loss": 1.5951, "nll_loss": 1.550559639930725, "rewards/accuracies": 1.0, "rewards/chosen": -0.15383221209049225, "rewards/margins": 0.049519333988428116, "rewards/rejected": -0.20335155725479126, "step": 483 }, { "epoch": 0.7628053585500394, "grad_norm": 0.21291442215442657, "learning_rate": 3.1165622104206034e-06, "log_odds_chosen": 0.770659863948822, "log_odds_ratio": -0.3848019242286682, "logits/chosen": -0.3678995370864868, "logits/rejected": -1.2183688879013062, "logps/chosen": -1.4637348651885986, "logps/rejected": -2.1029365062713623, "loss": 1.5321, "nll_loss": 1.4936531782150269, "rewards/accuracies": 1.0, "rewards/chosen": -0.14637349545955658, "rewards/margins": 0.06392017006874084, "rewards/rejected": -0.21029365062713623, "step": 484 }, { "epoch": 0.764381402679275, "grad_norm": 0.18177950382232666, "learning_rate": 3.1119973193750816e-06, "log_odds_chosen": 0.6704604029655457, "log_odds_ratio": -0.41766875982284546, "logits/chosen": -0.2663368880748749, "logits/rejected": -1.2851604223251343, "logps/chosen": -1.475534439086914, "logps/rejected": -2.027827739715576, "loss": 1.536, "nll_loss": 1.4942355155944824, "rewards/accuracies": 1.0, "rewards/chosen": -0.1475534588098526, "rewards/margins": 0.05522932484745979, "rewards/rejected": -0.2027827799320221, "step": 485 }, { "epoch": 0.7659574468085106, "grad_norm": 0.21737127006053925, "learning_rate": 3.1074240277209408e-06, "log_odds_chosen": 0.6194983124732971, "log_odds_ratio": -0.4379619061946869, "logits/chosen": -0.24437181651592255, "logits/rejected": -1.2440481185913086, "logps/chosen": -1.494814157485962, "logps/rejected": -2.00154709815979, "loss": 1.5423, "nll_loss": 1.4985466003417969, "rewards/accuracies": 1.0, "rewards/chosen": -0.1494814157485962, "rewards/margins": 0.05067329481244087, "rewards/rejected": -0.20015469193458557, "step": 486 }, { "epoch": 0.7675334909377463, "grad_norm": 0.20863457024097443, "learning_rate": 3.102842370007217e-06, "log_odds_chosen": 0.6833222508430481, "log_odds_ratio": -0.41733595728874207, "logits/chosen": -0.16676893830299377, "logits/rejected": -1.15752375125885, "logps/chosen": -1.5041608810424805, "logps/rejected": -2.0647218227386475, "loss": 1.5703, "nll_loss": 1.5285258293151855, "rewards/accuracies": 1.0, "rewards/chosen": -0.1504160761833191, "rewards/margins": 0.05605611205101013, "rewards/rejected": -0.20647220313549042, "step": 487 }, { "epoch": 0.7691095350669819, "grad_norm": 0.1944776177406311, "learning_rate": 3.0982523808461454e-06, "log_odds_chosen": 0.5529102683067322, "log_odds_ratio": -0.4605258107185364, "logits/chosen": -0.15431943535804749, "logits/rejected": -1.2589623928070068, "logps/chosen": -1.5484925508499146, "logps/rejected": -2.0021567344665527, "loss": 1.6075, "nll_loss": 1.561496376991272, "rewards/accuracies": 1.0, "rewards/chosen": -0.15484926104545593, "rewards/margins": 0.04536642134189606, "rewards/rejected": -0.2002156674861908, "step": 488 }, { "epoch": 0.7706855791962175, "grad_norm": 0.18545880913734436, "learning_rate": 3.0936540949129006e-06, "log_odds_chosen": 0.6402296423912048, "log_odds_ratio": -0.4339551031589508, "logits/chosen": -0.2207900583744049, "logits/rejected": -1.1871285438537598, "logps/chosen": -1.5241451263427734, "logps/rejected": -2.057253122329712, "loss": 1.5905, "nll_loss": 1.5471105575561523, "rewards/accuracies": 1.0, "rewards/chosen": -0.15241453051567078, "rewards/margins": 0.053310781717300415, "rewards/rejected": -0.20572529733181, "step": 489 }, { "epoch": 0.7722616233254531, "grad_norm": 0.178094744682312, "learning_rate": 3.0890475469453378e-06, "log_odds_chosen": 0.7427234053611755, "log_odds_ratio": -0.40040361881256104, "logits/chosen": -0.2081877589225769, "logits/rejected": -1.3924274444580078, "logps/chosen": -1.4031827449798584, "logps/rejected": -2.006999969482422, "loss": 1.4721, "nll_loss": 1.4321045875549316, "rewards/accuracies": 1.0, "rewards/chosen": -0.14031827449798584, "rewards/margins": 0.06038173660635948, "rewards/rejected": -0.20069998502731323, "step": 490 }, { "epoch": 0.7738376674546887, "grad_norm": 0.18201249837875366, "learning_rate": 3.0844327717437263e-06, "log_odds_chosen": 0.6974368691444397, "log_odds_ratio": -0.41516321897506714, "logits/chosen": -0.21113747358322144, "logits/rejected": -1.2565526962280273, "logps/chosen": -1.4272940158843994, "logps/rejected": -2.0010313987731934, "loss": 1.4995, "nll_loss": 1.4580097198486328, "rewards/accuracies": 1.0, "rewards/chosen": -0.14272941648960114, "rewards/margins": 0.057373758405447006, "rewards/rejected": -0.20010316371917725, "step": 491 }, { "epoch": 0.7754137115839244, "grad_norm": 0.1853957176208496, "learning_rate": 3.0798098041704892e-06, "log_odds_chosen": 0.5092182159423828, "log_odds_ratio": -0.47541776299476624, "logits/chosen": -0.15838466584682465, "logits/rejected": -1.3052377700805664, "logps/chosen": -1.361024260520935, "logps/rejected": -1.7661519050598145, "loss": 1.4535, "nll_loss": 1.405916690826416, "rewards/accuracies": 1.0, "rewards/chosen": -0.13610242307186127, "rewards/margins": 0.04051277041435242, "rewards/rejected": -0.1766151785850525, "step": 492 }, { "epoch": 0.77698975571316, "grad_norm": 0.1893599033355713, "learning_rate": 3.0751786791499368e-06, "log_odds_chosen": 0.6547857522964478, "log_odds_ratio": -0.42588385939598083, "logits/chosen": -0.17713405191898346, "logits/rejected": -1.2708368301391602, "logps/chosen": -1.5448169708251953, "logps/rejected": -2.0935890674591064, "loss": 1.6139, "nll_loss": 1.5712815523147583, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544817090034485, "rewards/margins": 0.05487719178199768, "rewards/rejected": -0.20935890078544617, "step": 493 }, { "epoch": 0.7785657998423956, "grad_norm": 0.18722397089004517, "learning_rate": 3.070539431668008e-06, "log_odds_chosen": 0.6233894228935242, "log_odds_ratio": -0.43421441316604614, "logits/chosen": -0.1989862620830536, "logits/rejected": -1.2033601999282837, "logps/chosen": -1.5463478565216064, "logps/rejected": -2.0686848163604736, "loss": 1.6068, "nll_loss": 1.5633586645126343, "rewards/accuracies": 1.0, "rewards/chosen": -0.15463480353355408, "rewards/margins": 0.05223367363214493, "rewards/rejected": -0.2068684697151184, "step": 494 }, { "epoch": 0.7801418439716312, "grad_norm": 0.19007954001426697, "learning_rate": 3.0658920967720018e-06, "log_odds_chosen": 0.7926431894302368, "log_odds_ratio": -0.38211071491241455, "logits/chosen": -0.3403100073337555, "logits/rejected": -1.2223634719848633, "logps/chosen": -1.4851934909820557, "logps/rejected": -2.1482720375061035, "loss": 1.5559, "nll_loss": 1.5176681280136108, "rewards/accuracies": 1.0, "rewards/chosen": -0.148519366979599, "rewards/margins": 0.06630785018205643, "rewards/rejected": -0.21482720971107483, "step": 495 }, { "epoch": 0.7817178881008668, "grad_norm": 0.1839301884174347, "learning_rate": 3.0612367095703116e-06, "log_odds_chosen": 0.7492038607597351, "log_odds_ratio": -0.39107295870780945, "logits/chosen": -0.18261493742465973, "logits/rejected": -1.4831030368804932, "logps/chosen": -1.5249770879745483, "logps/rejected": -2.153897762298584, "loss": 1.5805, "nll_loss": 1.5413737297058105, "rewards/accuracies": 1.0, "rewards/chosen": -0.15249772369861603, "rewards/margins": 0.06289205700159073, "rewards/rejected": -0.21538978815078735, "step": 496 }, { "epoch": 0.7832939322301025, "grad_norm": 0.19066324830055237, "learning_rate": 3.056573305232167e-06, "log_odds_chosen": 0.7923998832702637, "log_odds_ratio": -0.380237877368927, "logits/chosen": -0.20253872871398926, "logits/rejected": -1.41769278049469, "logps/chosen": -1.4783601760864258, "logps/rejected": -2.1322264671325684, "loss": 1.5446, "nll_loss": 1.5065717697143555, "rewards/accuracies": 1.0, "rewards/chosen": -0.14783601462841034, "rewards/margins": 0.06538661569356918, "rewards/rejected": -0.21322263777256012, "step": 497 }, { "epoch": 0.7848699763593381, "grad_norm": 0.19244952499866486, "learning_rate": 3.051901918987359e-06, "log_odds_chosen": 0.7435587048530579, "log_odds_ratio": -0.39330264925956726, "logits/chosen": -0.33268722891807556, "logits/rejected": -1.3394025564193726, "logps/chosen": -1.4223885536193848, "logps/rejected": -2.0310678482055664, "loss": 1.4913, "nll_loss": 1.4519734382629395, "rewards/accuracies": 1.0, "rewards/chosen": -0.14223885536193848, "rewards/margins": 0.06086793541908264, "rewards/rejected": -0.20310677587985992, "step": 498 }, { "epoch": 0.7864460204885737, "grad_norm": 0.22321587800979614, "learning_rate": 3.047222586125979e-06, "log_odds_chosen": 0.7735339999198914, "log_odds_ratio": -0.3840080797672272, "logits/chosen": -0.1548488885164261, "logits/rejected": -0.9053974151611328, "logps/chosen": -1.431384801864624, "logps/rejected": -2.068115234375, "loss": 1.51, "nll_loss": 1.4715591669082642, "rewards/accuracies": 1.0, "rewards/chosen": -0.14313849806785583, "rewards/margins": 0.06367303431034088, "rewards/rejected": -0.20681151747703552, "step": 499 }, { "epoch": 0.7880220646178093, "grad_norm": 0.18041643500328064, "learning_rate": 3.042535341998152e-06, "log_odds_chosen": 0.5752748847007751, "log_odds_ratio": -0.4517236649990082, "logits/chosen": -0.07348179817199707, "logits/rejected": -1.2616822719573975, "logps/chosen": -1.578834056854248, "logps/rejected": -2.061535120010376, "loss": 1.6247, "nll_loss": 1.579504370689392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1578833907842636, "rewards/margins": 0.04827011376619339, "rewards/rejected": -0.2061535120010376, "step": 500 }, { "epoch": 0.789598108747045, "grad_norm": 0.19028055667877197, "learning_rate": 3.037840222013769e-06, "log_odds_chosen": 0.6691651344299316, "log_odds_ratio": -0.42311063408851624, "logits/chosen": -0.16774022579193115, "logits/rejected": -0.9798950552940369, "logps/chosen": -1.51216459274292, "logps/rejected": -2.0689010620117188, "loss": 1.5686, "nll_loss": 1.526324987411499, "rewards/accuracies": 1.0, "rewards/chosen": -0.15121646225452423, "rewards/margins": 0.05567363277077675, "rewards/rejected": -0.20689009130001068, "step": 501 }, { "epoch": 0.7911741528762806, "grad_norm": 0.18513523042201996, "learning_rate": 3.033137261642219e-06, "log_odds_chosen": 0.8165162801742554, "log_odds_ratio": -0.3758889436721802, "logits/chosen": -0.19638784229755402, "logits/rejected": -1.2416471242904663, "logps/chosen": -1.4641826152801514, "logps/rejected": -2.142982006072998, "loss": 1.5202, "nll_loss": 1.4826549291610718, "rewards/accuracies": 1.0, "rewards/chosen": -0.1464182585477829, "rewards/margins": 0.06787993758916855, "rewards/rejected": -0.21429818868637085, "step": 502 }, { "epoch": 0.7927501970055162, "grad_norm": 0.18089807033538818, "learning_rate": 3.02842649641212e-06, "log_odds_chosen": 0.7593850493431091, "log_odds_ratio": -0.3924046754837036, "logits/chosen": -0.10785488039255142, "logits/rejected": -1.0916216373443604, "logps/chosen": -1.5069478750228882, "logps/rejected": -2.1434710025787354, "loss": 1.5697, "nll_loss": 1.5304501056671143, "rewards/accuracies": 1.0, "rewards/chosen": -0.15069478750228882, "rewards/margins": 0.06365231424570084, "rewards/rejected": -0.21434709429740906, "step": 503 }, { "epoch": 0.7943262411347518, "grad_norm": 0.18175731599330902, "learning_rate": 3.0237079619110554e-06, "log_odds_chosen": 0.8134419918060303, "log_odds_ratio": -0.37328994274139404, "logits/chosen": -0.22376924753189087, "logits/rejected": -1.4354217052459717, "logps/chosen": -1.4759913682937622, "logps/rejected": -2.146923303604126, "loss": 1.5336, "nll_loss": 1.496294617652893, "rewards/accuracies": 1.0, "rewards/chosen": -0.14759913086891174, "rewards/margins": 0.06709320843219757, "rewards/rejected": -0.21469233930110931, "step": 504 }, { "epoch": 0.7959022852639874, "grad_norm": 0.2029658555984497, "learning_rate": 3.0189816937852976e-06, "log_odds_chosen": 0.7895228266716003, "log_odds_ratio": -0.38117578625679016, "logits/chosen": -0.28800487518310547, "logits/rejected": -1.3646546602249146, "logps/chosen": -1.4555811882019043, "logps/rejected": -2.1068646907806396, "loss": 1.5037, "nll_loss": 1.465606689453125, "rewards/accuracies": 1.0, "rewards/chosen": -0.14555811882019043, "rewards/margins": 0.06512835621833801, "rewards/rejected": -0.21068646013736725, "step": 505 }, { "epoch": 0.797478329393223, "grad_norm": 0.18529628217220306, "learning_rate": 3.014247727739546e-06, "log_odds_chosen": 0.9279834628105164, "log_odds_ratio": -0.3403416872024536, "logits/chosen": -0.2272339165210724, "logits/rejected": -1.315860390663147, "logps/chosen": -1.4553430080413818, "logps/rejected": -2.2307143211364746, "loss": 1.5119, "nll_loss": 1.4778820276260376, "rewards/accuracies": 1.0, "rewards/chosen": -0.14553430676460266, "rewards/margins": 0.07753713428974152, "rewards/rejected": -0.22307144105434418, "step": 506 }, { "epoch": 0.7990543735224587, "grad_norm": 0.19259987771511078, "learning_rate": 3.009506099536653e-06, "log_odds_chosen": 0.67513507604599, "log_odds_ratio": -0.4143328070640564, "logits/chosen": -0.17503556609153748, "logits/rejected": -1.3449033498764038, "logps/chosen": -1.5120768547058105, "logps/rejected": -2.0729129314422607, "loss": 1.5515, "nll_loss": 1.510113000869751, "rewards/accuracies": 1.0, "rewards/chosen": -0.15120768547058105, "rewards/margins": 0.056083619594573975, "rewards/rejected": -0.20729129016399384, "step": 507 }, { "epoch": 0.8006304176516943, "grad_norm": 0.18156689405441284, "learning_rate": 3.0047568449973544e-06, "log_odds_chosen": 0.9034937620162964, "log_odds_ratio": -0.3491254448890686, "logits/chosen": -0.2639719247817993, "logits/rejected": -1.3573088645935059, "logps/chosen": -1.362982153892517, "logps/rejected": -2.101900339126587, "loss": 1.4246, "nll_loss": 1.3897302150726318, "rewards/accuracies": 1.0, "rewards/chosen": -0.13629822432994843, "rewards/margins": 0.07389181852340698, "rewards/rejected": -0.2101900279521942, "step": 508 }, { "epoch": 0.8022064617809299, "grad_norm": 0.19325587153434753, "learning_rate": 3e-06, "log_odds_chosen": 0.5838625431060791, "log_odds_ratio": -0.45025166869163513, "logits/chosen": -0.22336238622665405, "logits/rejected": -1.168703317642212, "logps/chosen": -1.4419538974761963, "logps/rejected": -1.9156994819641113, "loss": 1.5075, "nll_loss": 1.4625194072723389, "rewards/accuracies": 1.0, "rewards/chosen": -0.14419539272785187, "rewards/margins": 0.047374557703733444, "rewards/rejected": -0.1915699541568756, "step": 509 }, { "epoch": 0.8037825059101655, "grad_norm": 0.1798471063375473, "learning_rate": 2.9952356004802813e-06, "log_odds_chosen": 0.5265605449676514, "log_odds_ratio": -0.4680787920951843, "logits/chosen": -0.17943690717220306, "logits/rejected": -1.2646390199661255, "logps/chosen": -1.5416685342788696, "logps/rejected": -1.97737455368042, "loss": 1.5931, "nll_loss": 1.546276569366455, "rewards/accuracies": 1.0, "rewards/chosen": -0.15416686236858368, "rewards/margins": 0.04357059299945831, "rewards/rejected": -0.197737455368042, "step": 510 }, { "epoch": 0.8053585500394012, "grad_norm": 0.17970964312553406, "learning_rate": 2.9904636824309625e-06, "log_odds_chosen": 0.5488556623458862, "log_odds_ratio": -0.4573056697845459, "logits/chosen": -0.20830032229423523, "logits/rejected": -0.967231273651123, "logps/chosen": -1.4516727924346924, "logps/rejected": -1.8982791900634766, "loss": 1.517, "nll_loss": 1.4712245464324951, "rewards/accuracies": 1.0, "rewards/chosen": -0.145167276263237, "rewards/margins": 0.04466064274311066, "rewards/rejected": -0.18982790410518646, "step": 511 }, { "epoch": 0.8069345941686368, "grad_norm": 0.20201221108436584, "learning_rate": 2.985684281901603e-06, "log_odds_chosen": 0.5933498740196228, "log_odds_ratio": -0.44515395164489746, "logits/chosen": -0.1818694919347763, "logits/rejected": -1.109740972518921, "logps/chosen": -1.5751166343688965, "logps/rejected": -2.0681002140045166, "loss": 1.6217, "nll_loss": 1.5771454572677612, "rewards/accuracies": 1.0, "rewards/chosen": -0.15751168131828308, "rewards/margins": 0.04929835721850395, "rewards/rejected": -0.20681002736091614, "step": 512 }, { "epoch": 0.8085106382978723, "grad_norm": 0.19857411086559296, "learning_rate": 2.980897434998293e-06, "log_odds_chosen": 0.8362709283828735, "log_odds_ratio": -0.36665236949920654, "logits/chosen": -0.26140159368515015, "logits/rejected": -1.1785284280776978, "logps/chosen": -1.4174448251724243, "logps/rejected": -2.10254168510437, "loss": 1.4779, "nll_loss": 1.4412541389465332, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417444795370102, "rewards/margins": 0.06850968301296234, "rewards/rejected": -0.21025416254997253, "step": 513 }, { "epoch": 0.8100866824271079, "grad_norm": 0.2068711668252945, "learning_rate": 2.976103177883374e-06, "log_odds_chosen": 0.6137781143188477, "log_odds_ratio": -0.44573453068733215, "logits/chosen": -0.20057058334350586, "logits/rejected": -1.0517152547836304, "logps/chosen": -1.5677953958511353, "logps/rejected": -2.0858044624328613, "loss": 1.622, "nll_loss": 1.5774500370025635, "rewards/accuracies": 1.0, "rewards/chosen": -0.15677955746650696, "rewards/margins": 0.05180090665817261, "rewards/rejected": -0.20858046412467957, "step": 514 }, { "epoch": 0.8116627265563435, "grad_norm": 0.1855117529630661, "learning_rate": 2.971301546775167e-06, "log_odds_chosen": 0.7630009651184082, "log_odds_ratio": -0.3854103684425354, "logits/chosen": -0.2538478374481201, "logits/rejected": -1.3625380992889404, "logps/chosen": -1.476394772529602, "logps/rejected": -2.106713056564331, "loss": 1.538, "nll_loss": 1.4994207620620728, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476394683122635, "rewards/margins": 0.06303183734416962, "rewards/rejected": -0.2106713056564331, "step": 515 }, { "epoch": 0.8132387706855791, "grad_norm": 0.19397298991680145, "learning_rate": 2.966492577947704e-06, "log_odds_chosen": 0.6858218908309937, "log_odds_ratio": -0.411394327878952, "logits/chosen": -0.237920880317688, "logits/rejected": -1.3812801837921143, "logps/chosen": -1.584381341934204, "logps/rejected": -2.1624438762664795, "loss": 1.6503, "nll_loss": 1.6091707944869995, "rewards/accuracies": 1.0, "rewards/chosen": -0.15843814611434937, "rewards/margins": 0.05780624598264694, "rewards/rejected": -0.2162443995475769, "step": 516 }, { "epoch": 0.8148148148148148, "grad_norm": 0.184777170419693, "learning_rate": 2.9616763077304457e-06, "log_odds_chosen": 0.8034108877182007, "log_odds_ratio": -0.3745085597038269, "logits/chosen": -0.1650959551334381, "logits/rejected": -1.287904143333435, "logps/chosen": -1.512897253036499, "logps/rejected": -2.18365216255188, "loss": 1.5811, "nll_loss": 1.5436216592788696, "rewards/accuracies": 1.0, "rewards/chosen": -0.15128973126411438, "rewards/margins": 0.06707549095153809, "rewards/rejected": -0.21836523711681366, "step": 517 }, { "epoch": 0.8163908589440504, "grad_norm": 0.18303681910037994, "learning_rate": 2.956852772508014e-06, "log_odds_chosen": 0.6197808384895325, "log_odds_ratio": -0.44592535495758057, "logits/chosen": -0.17690420150756836, "logits/rejected": -0.9530738592147827, "logps/chosen": -1.4160338640213013, "logps/rejected": -1.9253976345062256, "loss": 1.477, "nll_loss": 1.4324525594711304, "rewards/accuracies": 1.0, "rewards/chosen": -0.14160338044166565, "rewards/margins": 0.050936371088027954, "rewards/rejected": -0.1925397515296936, "step": 518 }, { "epoch": 0.817966903073286, "grad_norm": 0.20301342010498047, "learning_rate": 2.952022008719914e-06, "log_odds_chosen": 0.4952373206615448, "log_odds_ratio": -0.4794991910457611, "logits/chosen": -0.17073026299476624, "logits/rejected": -0.9922412633895874, "logps/chosen": -1.5578581094741821, "logps/rejected": -1.967944622039795, "loss": 1.6136, "nll_loss": 1.5656940937042236, "rewards/accuracies": 1.0, "rewards/chosen": -0.15578582882881165, "rewards/margins": 0.041008636355400085, "rewards/rejected": -0.19679445028305054, "step": 519 }, { "epoch": 0.8195429472025216, "grad_norm": 0.18457186222076416, "learning_rate": 2.9471840528602573e-06, "log_odds_chosen": 0.6501960158348083, "log_odds_ratio": -0.4303664565086365, "logits/chosen": -0.3231818675994873, "logits/rejected": -1.0612550973892212, "logps/chosen": -1.5010353326797485, "logps/rejected": -2.0381951332092285, "loss": 1.5551, "nll_loss": 1.51203191280365, "rewards/accuracies": 1.0, "rewards/chosen": -0.15010352432727814, "rewards/margins": 0.053715985268354416, "rewards/rejected": -0.20381951332092285, "step": 520 }, { "epoch": 0.8211189913317573, "grad_norm": 0.2020522803068161, "learning_rate": 2.9423389414774914e-06, "log_odds_chosen": 0.7233219742774963, "log_odds_ratio": -0.4016837179660797, "logits/chosen": -0.2887951135635376, "logits/rejected": -1.3780571222305298, "logps/chosen": -1.4648807048797607, "logps/rejected": -2.063239812850952, "loss": 1.5239, "nll_loss": 1.4837396144866943, "rewards/accuracies": 1.0, "rewards/chosen": -0.14648807048797607, "rewards/margins": 0.05983591079711914, "rewards/rejected": -0.20632398128509521, "step": 521 }, { "epoch": 0.8226950354609929, "grad_norm": 0.1893676072359085, "learning_rate": 2.9374867111741174e-06, "log_odds_chosen": 0.7161247134208679, "log_odds_ratio": -0.40280526876449585, "logits/chosen": -0.17054280638694763, "logits/rejected": -1.3533788919448853, "logps/chosen": -1.4935866594314575, "logps/rejected": -2.0865988731384277, "loss": 1.5451, "nll_loss": 1.5048449039459229, "rewards/accuracies": 1.0, "rewards/chosen": -0.14935867488384247, "rewards/margins": 0.059301216155290604, "rewards/rejected": -0.20865988731384277, "step": 522 }, { "epoch": 0.8242710795902285, "grad_norm": 0.20184732973575592, "learning_rate": 2.9326273986064177e-06, "log_odds_chosen": 0.7819587588310242, "log_odds_ratio": -0.3818580210208893, "logits/chosen": -0.22239316999912262, "logits/rejected": -1.2218939065933228, "logps/chosen": -1.511752724647522, "logps/rejected": -2.164085865020752, "loss": 1.5754, "nll_loss": 1.5372285842895508, "rewards/accuracies": 1.0, "rewards/chosen": -0.15117527544498444, "rewards/margins": 0.06523331999778748, "rewards/rejected": -0.21640858054161072, "step": 523 }, { "epoch": 0.8258471237194641, "grad_norm": 0.20823800563812256, "learning_rate": 2.9277610404841787e-06, "log_odds_chosen": 0.6158959865570068, "log_odds_ratio": -0.43306228518486023, "logits/chosen": -0.26541417837142944, "logits/rejected": -1.056063175201416, "logps/chosen": -1.48806893825531, "logps/rejected": -1.9945056438446045, "loss": 1.5477, "nll_loss": 1.5044422149658203, "rewards/accuracies": 1.0, "rewards/chosen": -0.14880691468715668, "rewards/margins": 0.0506436824798584, "rewards/rejected": -0.19945058226585388, "step": 524 }, { "epoch": 0.8274231678486997, "grad_norm": 0.1981971710920334, "learning_rate": 2.9228876735704107e-06, "log_odds_chosen": 0.5651233196258545, "log_odds_ratio": -0.4532637894153595, "logits/chosen": -0.23406416177749634, "logits/rejected": -1.1756788492202759, "logps/chosen": -1.3835755586624146, "logps/rejected": -1.8319984674453735, "loss": 1.4524, "nll_loss": 1.40702486038208, "rewards/accuracies": 1.0, "rewards/chosen": -0.13835756480693817, "rewards/margins": 0.044842299073934555, "rewards/rejected": -0.18319985270500183, "step": 525 }, { "epoch": 0.8289992119779354, "grad_norm": 0.20701991021633148, "learning_rate": 2.9180073346810738e-06, "log_odds_chosen": 0.5971174240112305, "log_odds_ratio": -0.44226890802383423, "logits/chosen": -0.2447807490825653, "logits/rejected": -1.2080211639404297, "logps/chosen": -1.6501985788345337, "logps/rejected": -2.156846284866333, "loss": 1.6873, "nll_loss": 1.6431210041046143, "rewards/accuracies": 1.0, "rewards/chosen": -0.16501986980438232, "rewards/margins": 0.05066476762294769, "rewards/rejected": -0.21568462252616882, "step": 526 }, { "epoch": 0.830575256107171, "grad_norm": 0.1888459324836731, "learning_rate": 2.9131200606847957e-06, "log_odds_chosen": 0.6569997668266296, "log_odds_ratio": -0.4262959361076355, "logits/chosen": -0.1883625090122223, "logits/rejected": -1.2298457622528076, "logps/chosen": -1.518366813659668, "logps/rejected": -2.064444065093994, "loss": 1.5756, "nll_loss": 1.5329397916793823, "rewards/accuracies": 1.0, "rewards/chosen": -0.15183669328689575, "rewards/margins": 0.054607708007097244, "rewards/rejected": -0.2064443975687027, "step": 527 }, { "epoch": 0.8321513002364066, "grad_norm": 0.19664834439754486, "learning_rate": 2.9082258885025995e-06, "log_odds_chosen": 0.7270923256874084, "log_odds_ratio": -0.4016813337802887, "logits/chosen": -0.34029343724250793, "logits/rejected": -1.2684504985809326, "logps/chosen": -1.5683590173721313, "logps/rejected": -2.1794798374176025, "loss": 1.6142, "nll_loss": 1.5739948749542236, "rewards/accuracies": 1.0, "rewards/chosen": -0.1568359136581421, "rewards/margins": 0.06111207604408264, "rewards/rejected": -0.21794798970222473, "step": 528 }, { "epoch": 0.8337273443656422, "grad_norm": 0.2009236365556717, "learning_rate": 2.9033248551076167e-06, "log_odds_chosen": 0.5915142893791199, "log_odds_ratio": -0.4430904686450958, "logits/chosen": -0.19656020402908325, "logits/rejected": -1.0940089225769043, "logps/chosen": -1.6268205642700195, "logps/rejected": -2.125175714492798, "loss": 1.6809, "nll_loss": 1.6366134881973267, "rewards/accuracies": 1.0, "rewards/chosen": -0.16268205642700195, "rewards/margins": 0.04983552545309067, "rewards/rejected": -0.21251758933067322, "step": 529 }, { "epoch": 0.8353033884948778, "grad_norm": 0.19963550567626953, "learning_rate": 2.8984169975248138e-06, "log_odds_chosen": 0.7648955583572388, "log_odds_ratio": -0.38624370098114014, "logits/chosen": -0.23580212891101837, "logits/rejected": -1.1444816589355469, "logps/chosen": -1.4914474487304688, "logps/rejected": -2.1245110034942627, "loss": 1.558, "nll_loss": 1.5194083452224731, "rewards/accuracies": 1.0, "rewards/chosen": -0.1491447538137436, "rewards/margins": 0.06330635398626328, "rewards/rejected": -0.21245113015174866, "step": 530 }, { "epoch": 0.8368794326241135, "grad_norm": 0.18562045693397522, "learning_rate": 2.893502352830712e-06, "log_odds_chosen": 0.9332537055015564, "log_odds_ratio": -0.33736011385917664, "logits/chosen": -0.26882943511009216, "logits/rejected": -1.3374062776565552, "logps/chosen": -1.3956873416900635, "logps/rejected": -2.1667256355285645, "loss": 1.4433, "nll_loss": 1.409551739692688, "rewards/accuracies": 1.0, "rewards/chosen": -0.1395687609910965, "rewards/margins": 0.07710380852222443, "rewards/rejected": -0.21667256951332092, "step": 531 }, { "epoch": 0.8384554767533491, "grad_norm": 0.18261106312274933, "learning_rate": 2.888580958153103e-06, "log_odds_chosen": 0.7341670989990234, "log_odds_ratio": -0.39641058444976807, "logits/chosen": -0.27277103066444397, "logits/rejected": -1.2492622137069702, "logps/chosen": -1.444199562072754, "logps/rejected": -2.0478129386901855, "loss": 1.5137, "nll_loss": 1.4740546941757202, "rewards/accuracies": 1.0, "rewards/chosen": -0.14441995322704315, "rewards/margins": 0.06036132946610451, "rewards/rejected": -0.20478127896785736, "step": 532 }, { "epoch": 0.8400315208825847, "grad_norm": 0.21797138452529907, "learning_rate": 2.8836528506707733e-06, "log_odds_chosen": 0.8146068453788757, "log_odds_ratio": -0.37162038683891296, "logits/chosen": -0.2771569490432739, "logits/rejected": -1.206723928451538, "logps/chosen": -1.5148285627365112, "logps/rejected": -2.1994688510894775, "loss": 1.5648, "nll_loss": 1.5275968313217163, "rewards/accuracies": 1.0, "rewards/chosen": -0.15148288011550903, "rewards/margins": 0.06846403330564499, "rewards/rejected": -0.21994687616825104, "step": 533 }, { "epoch": 0.8416075650118203, "grad_norm": 0.18425217270851135, "learning_rate": 2.878718067613222e-06, "log_odds_chosen": 0.6143471598625183, "log_odds_ratio": -0.43677818775177, "logits/chosen": -0.22780869901180267, "logits/rejected": -1.186183214187622, "logps/chosen": -1.566484808921814, "logps/rejected": -2.0800623893737793, "loss": 1.6194, "nll_loss": 1.5757222175598145, "rewards/accuracies": 1.0, "rewards/chosen": -0.15664850175380707, "rewards/margins": 0.05135776102542877, "rewards/rejected": -0.20800624787807465, "step": 534 }, { "epoch": 0.843183609141056, "grad_norm": 0.20346811413764954, "learning_rate": 2.8737766462603763e-06, "log_odds_chosen": 0.757498562335968, "log_odds_ratio": -0.39962002635002136, "logits/chosen": -0.2183956801891327, "logits/rejected": -1.0292649269104004, "logps/chosen": -1.5032674074172974, "logps/rejected": -2.140925407409668, "loss": 1.5535, "nll_loss": 1.5135215520858765, "rewards/accuracies": 1.0, "rewards/chosen": -0.15032674372196198, "rewards/margins": 0.06376579403877258, "rewards/rejected": -0.21409253776073456, "step": 535 }, { "epoch": 0.8447596532702916, "grad_norm": 0.18760064244270325, "learning_rate": 2.8688286239423167e-06, "log_odds_chosen": 0.6436713337898254, "log_odds_ratio": -0.4325043261051178, "logits/chosen": -0.21478521823883057, "logits/rejected": -1.2076549530029297, "logps/chosen": -1.544159173965454, "logps/rejected": -2.0843026638031006, "loss": 1.5881, "nll_loss": 1.544856071472168, "rewards/accuracies": 1.0, "rewards/chosen": -0.15441590547561646, "rewards/margins": 0.05401436612010002, "rewards/rejected": -0.20843026041984558, "step": 536 }, { "epoch": 0.8463356973995272, "grad_norm": 0.18854977190494537, "learning_rate": 2.8638740380389862e-06, "log_odds_chosen": 0.7302975058555603, "log_odds_ratio": -0.39930465817451477, "logits/chosen": -0.2619031071662903, "logits/rejected": -1.3238346576690674, "logps/chosen": -1.4519808292388916, "logps/rejected": -2.0523629188537598, "loss": 1.5152, "nll_loss": 1.4752285480499268, "rewards/accuracies": 1.0, "rewards/chosen": -0.14519809186458588, "rewards/margins": 0.06003819406032562, "rewards/rejected": -0.2052362859249115, "step": 537 }, { "epoch": 0.8479117415287628, "grad_norm": 0.20675887167453766, "learning_rate": 2.8589129259799164e-06, "log_odds_chosen": 0.8192201852798462, "log_odds_ratio": -0.3788171708583832, "logits/chosen": -0.2638776898384094, "logits/rejected": -1.114193320274353, "logps/chosen": -1.5262513160705566, "logps/rejected": -2.216820240020752, "loss": 1.5718, "nll_loss": 1.5339343547821045, "rewards/accuracies": 1.0, "rewards/chosen": -0.15262514352798462, "rewards/margins": 0.0690569132566452, "rewards/rejected": -0.22168205678462982, "step": 538 }, { "epoch": 0.8494877856579984, "grad_norm": 0.1934932917356491, "learning_rate": 2.853945325243938e-06, "log_odds_chosen": 0.6721277832984924, "log_odds_ratio": -0.4139913022518158, "logits/chosen": -0.26754871010780334, "logits/rejected": -1.1163392066955566, "logps/chosen": -1.5401967763900757, "logps/rejected": -2.0996975898742676, "loss": 1.587, "nll_loss": 1.5455737113952637, "rewards/accuracies": 1.0, "rewards/chosen": -0.15401966869831085, "rewards/margins": 0.055950067937374115, "rewards/rejected": -0.20996975898742676, "step": 539 }, { "epoch": 0.851063829787234, "grad_norm": 1.8241883516311646, "learning_rate": 2.848971273358903e-06, "log_odds_chosen": 0.8321257829666138, "log_odds_ratio": -0.3642383813858032, "logits/chosen": -0.3249572813510895, "logits/rejected": -1.1298644542694092, "logps/chosen": -1.4748423099517822, "logps/rejected": -2.1679956912994385, "loss": 1.5084, "nll_loss": 1.472002387046814, "rewards/accuracies": 1.0, "rewards/chosen": -0.14748422801494598, "rewards/margins": 0.0693153589963913, "rewards/rejected": -0.2167995721101761, "step": 540 }, { "epoch": 0.8526398739164697, "grad_norm": 0.20980137586593628, "learning_rate": 2.843990807901397e-06, "log_odds_chosen": 0.6258372068405151, "log_odds_ratio": -0.4339146912097931, "logits/chosen": -0.2298620641231537, "logits/rejected": -0.9028375744819641, "logps/chosen": -1.5608817338943481, "logps/rejected": -2.083118438720703, "loss": 1.6158, "nll_loss": 1.5724146366119385, "rewards/accuracies": 1.0, "rewards/chosen": -0.15608817338943481, "rewards/margins": 0.052223674952983856, "rewards/rejected": -0.20831184089183807, "step": 541 }, { "epoch": 0.8542159180457053, "grad_norm": 0.20256595313549042, "learning_rate": 2.839003966496458e-06, "log_odds_chosen": 0.8089483976364136, "log_odds_ratio": -0.38441747426986694, "logits/chosen": -0.14752182364463806, "logits/rejected": -1.2424575090408325, "logps/chosen": -1.526769757270813, "logps/rejected": -2.205688953399658, "loss": 1.5839, "nll_loss": 1.5454857349395752, "rewards/accuracies": 1.0, "rewards/chosen": -0.15267698466777802, "rewards/margins": 0.067891925573349, "rewards/rejected": -0.22056889533996582, "step": 542 }, { "epoch": 0.8557919621749409, "grad_norm": 0.18522503972053528, "learning_rate": 2.8340107868172905e-06, "log_odds_chosen": 0.7727735638618469, "log_odds_ratio": -0.3861379623413086, "logits/chosen": -0.26055678725242615, "logits/rejected": -1.236580491065979, "logps/chosen": -1.369378685951233, "logps/rejected": -1.999671459197998, "loss": 1.428, "nll_loss": 1.3893834352493286, "rewards/accuracies": 1.0, "rewards/chosen": -0.13693787157535553, "rewards/margins": 0.06302928924560547, "rewards/rejected": -0.1999671459197998, "step": 543 }, { "epoch": 0.8573680063041765, "grad_norm": 0.18243786692619324, "learning_rate": 2.8290113065849826e-06, "log_odds_chosen": 0.8464156985282898, "log_odds_ratio": -0.36339235305786133, "logits/chosen": -0.22719234228134155, "logits/rejected": -1.3813166618347168, "logps/chosen": -1.4748934507369995, "logps/rejected": -2.179795742034912, "loss": 1.5313, "nll_loss": 1.4949686527252197, "rewards/accuracies": 1.0, "rewards/chosen": -0.14748935401439667, "rewards/margins": 0.07049023360013962, "rewards/rejected": -0.2179795652627945, "step": 544 }, { "epoch": 0.8589440504334122, "grad_norm": 0.20985384285449982, "learning_rate": 2.8240055635682193e-06, "log_odds_chosen": 0.971196174621582, "log_odds_ratio": -0.33422982692718506, "logits/chosen": -0.2657662630081177, "logits/rejected": -1.2640752792358398, "logps/chosen": -1.4483425617218018, "logps/rejected": -2.2618367671966553, "loss": 1.4861, "nll_loss": 1.4526761770248413, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448342651128769, "rewards/margins": 0.08134942501783371, "rewards/rejected": -0.2261836677789688, "step": 545 }, { "epoch": 0.8605200945626478, "grad_norm": 0.19999882578849792, "learning_rate": 2.8189935955829973e-06, "log_odds_chosen": 0.8773307204246521, "log_odds_ratio": -0.35459497570991516, "logits/chosen": -0.25905877351760864, "logits/rejected": -1.07989501953125, "logps/chosen": -1.5043911933898926, "logps/rejected": -2.2429165840148926, "loss": 1.5494, "nll_loss": 1.5139833688735962, "rewards/accuracies": 1.0, "rewards/chosen": -0.15043911337852478, "rewards/margins": 0.0738525539636612, "rewards/rejected": -0.22429165244102478, "step": 546 }, { "epoch": 0.8620961386918834, "grad_norm": 0.19614462554454803, "learning_rate": 2.813975440492342e-06, "log_odds_chosen": 0.7755805253982544, "log_odds_ratio": -0.387691855430603, "logits/chosen": -0.28992438316345215, "logits/rejected": -1.2587565183639526, "logps/chosen": -1.501508116722107, "logps/rejected": -2.146627902984619, "loss": 1.5625, "nll_loss": 1.5237247943878174, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501508206129074, "rewards/margins": 0.0645119845867157, "rewards/rejected": -0.2146628051996231, "step": 547 }, { "epoch": 0.863672182821119, "grad_norm": 0.22099299728870392, "learning_rate": 2.8089511362060182e-06, "log_odds_chosen": 0.6048762798309326, "log_odds_ratio": -0.43883612751960754, "logits/chosen": -0.3003285229206085, "logits/rejected": -1.1491910219192505, "logps/chosen": -1.6390552520751953, "logps/rejected": -2.1528773307800293, "loss": 1.6681, "nll_loss": 1.624230980873108, "rewards/accuracies": 1.0, "rewards/chosen": -0.163905531167984, "rewards/margins": 0.05138222128152847, "rewards/rejected": -0.21528775990009308, "step": 548 }, { "epoch": 0.8652482269503546, "grad_norm": 0.21315144002437592, "learning_rate": 2.803920720680244e-06, "log_odds_chosen": 0.7596380710601807, "log_odds_ratio": -0.3877980411052704, "logits/chosen": -0.2508217394351959, "logits/rejected": -1.285441279411316, "logps/chosen": -1.497770071029663, "logps/rejected": -2.128038167953491, "loss": 1.5514, "nll_loss": 1.5126042366027832, "rewards/accuracies": 1.0, "rewards/chosen": -0.14977701008319855, "rewards/margins": 0.0630268082022667, "rewards/rejected": -0.21280381083488464, "step": 549 }, { "epoch": 0.8668242710795903, "grad_norm": 0.20469704270362854, "learning_rate": 2.7988842319174075e-06, "log_odds_chosen": 0.8056274056434631, "log_odds_ratio": -0.37798529863357544, "logits/chosen": -0.3726116418838501, "logits/rejected": -1.3260782957077026, "logps/chosen": -1.5661325454711914, "logps/rejected": -2.247863292694092, "loss": 1.603, "nll_loss": 1.5651633739471436, "rewards/accuracies": 1.0, "rewards/chosen": -0.15661326050758362, "rewards/margins": 0.06817308068275452, "rewards/rejected": -0.22478632628917694, "step": 550 }, { "epoch": 0.8684003152088259, "grad_norm": 0.1801159828901291, "learning_rate": 2.7938417079657743e-06, "log_odds_chosen": 0.8063299655914307, "log_odds_ratio": -0.3746653199195862, "logits/chosen": -0.3093138635158539, "logits/rejected": -1.38007652759552, "logps/chosen": -1.4094195365905762, "logps/rejected": -2.07232403755188, "loss": 1.4644, "nll_loss": 1.426937222480774, "rewards/accuracies": 1.0, "rewards/chosen": -0.14094194769859314, "rewards/margins": 0.06629044562578201, "rewards/rejected": -0.20723240077495575, "step": 551 }, { "epoch": 0.8699763593380615, "grad_norm": 0.18272462487220764, "learning_rate": 2.7887931869192047e-06, "log_odds_chosen": 0.7081438302993774, "log_odds_ratio": -0.40384554862976074, "logits/chosen": -0.3084084391593933, "logits/rejected": -1.2090731859207153, "logps/chosen": -1.4903384447097778, "logps/rejected": -2.0772554874420166, "loss": 1.5403, "nll_loss": 1.4998891353607178, "rewards/accuracies": 1.0, "rewards/chosen": -0.14903384447097778, "rewards/margins": 0.05869169905781746, "rewards/rejected": -0.20772555470466614, "step": 552 }, { "epoch": 0.8715524034672971, "grad_norm": 0.19639180600643158, "learning_rate": 2.783738706916865e-06, "log_odds_chosen": 0.847165584564209, "log_odds_ratio": -0.3647855520248413, "logits/chosen": -0.28008660674095154, "logits/rejected": -1.3739715814590454, "logps/chosen": -1.526125431060791, "logps/rejected": -2.23525333404541, "loss": 1.566, "nll_loss": 1.5295474529266357, "rewards/accuracies": 1.0, "rewards/chosen": -0.152612566947937, "rewards/margins": 0.07091278582811356, "rewards/rejected": -0.22352533042430878, "step": 553 }, { "epoch": 0.8731284475965327, "grad_norm": 0.19625112414360046, "learning_rate": 2.7786783061429356e-06, "log_odds_chosen": 0.6983753442764282, "log_odds_ratio": -0.40515345335006714, "logits/chosen": -0.34866851568222046, "logits/rejected": -1.0440998077392578, "logps/chosen": -1.4631657600402832, "logps/rejected": -2.0370101928710938, "loss": 1.4995, "nll_loss": 1.459031105041504, "rewards/accuracies": 1.0, "rewards/chosen": -0.14631657302379608, "rewards/margins": 0.05738444626331329, "rewards/rejected": -0.20370101928710938, "step": 554 }, { "epoch": 0.8747044917257684, "grad_norm": 0.19040443003177643, "learning_rate": 2.7736120228263287e-06, "log_odds_chosen": 0.8665981292724609, "log_odds_ratio": -0.35975712537765503, "logits/chosen": -0.26248475909233093, "logits/rejected": -1.2065774202346802, "logps/chosen": -1.4502075910568237, "logps/rejected": -2.1716394424438477, "loss": 1.5014, "nll_loss": 1.4654600620269775, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450207531452179, "rewards/margins": 0.07214318215847015, "rewards/rejected": -0.21716395020484924, "step": 555 }, { "epoch": 0.876280535855004, "grad_norm": 0.17877773940563202, "learning_rate": 2.768539895240394e-06, "log_odds_chosen": 0.7663182616233826, "log_odds_ratio": -0.3947910964488983, "logits/chosen": -0.2686600387096405, "logits/rejected": -1.2789851427078247, "logps/chosen": -1.4189510345458984, "logps/rejected": -2.044057607650757, "loss": 1.4688, "nll_loss": 1.4293001890182495, "rewards/accuracies": 1.0, "rewards/chosen": -0.1418951004743576, "rewards/margins": 0.0625106617808342, "rewards/rejected": -0.2044057548046112, "step": 556 }, { "epoch": 0.8778565799842396, "grad_norm": 0.19811755418777466, "learning_rate": 2.763461961702633e-06, "log_odds_chosen": 0.796636164188385, "log_odds_ratio": -0.3811954855918884, "logits/chosen": -0.30146193504333496, "logits/rejected": -1.3820403814315796, "logps/chosen": -1.5293595790863037, "logps/rejected": -2.1986844539642334, "loss": 1.5534, "nll_loss": 1.515283465385437, "rewards/accuracies": 1.0, "rewards/chosen": -0.1529359519481659, "rewards/margins": 0.06693252176046371, "rewards/rejected": -0.219868466258049, "step": 557 }, { "epoch": 0.8794326241134752, "grad_norm": 0.19135232269763947, "learning_rate": 2.758378260574409e-06, "log_odds_chosen": 0.721428632736206, "log_odds_ratio": -0.39849963784217834, "logits/chosen": -0.28630053997039795, "logits/rejected": -1.2492754459381104, "logps/chosen": -1.5141559839248657, "logps/rejected": -2.1138787269592285, "loss": 1.5738, "nll_loss": 1.533995509147644, "rewards/accuracies": 1.0, "rewards/chosen": -0.15141557157039642, "rewards/margins": 0.05997228994965553, "rewards/rejected": -0.21138787269592285, "step": 558 }, { "epoch": 0.8810086682427108, "grad_norm": 0.18617720901966095, "learning_rate": 2.753288830260655e-06, "log_odds_chosen": 0.9730425477027893, "log_odds_ratio": -0.3354828953742981, "logits/chosen": -0.271095335483551, "logits/rejected": -1.4628548622131348, "logps/chosen": -1.5113807916641235, "logps/rejected": -2.336665391921997, "loss": 1.5499, "nll_loss": 1.516315221786499, "rewards/accuracies": 1.0, "rewards/chosen": -0.1511380821466446, "rewards/margins": 0.08252845704555511, "rewards/rejected": -0.2336665242910385, "step": 559 }, { "epoch": 0.8825847123719465, "grad_norm": 0.19468240439891815, "learning_rate": 2.7481937092095866e-06, "log_odds_chosen": 0.5405460596084595, "log_odds_ratio": -0.46741983294487, "logits/chosen": -0.22300955653190613, "logits/rejected": -1.1993566751480103, "logps/chosen": -1.5644901990890503, "logps/rejected": -2.020087957382202, "loss": 1.5992, "nll_loss": 1.5524640083312988, "rewards/accuracies": 1.0, "rewards/chosen": -0.15644903481006622, "rewards/margins": 0.045559756457805634, "rewards/rejected": -0.20200878381729126, "step": 560 }, { "epoch": 0.8841607565011821, "grad_norm": 0.1859883815050125, "learning_rate": 2.7430929359124086e-06, "log_odds_chosen": 0.6600494980812073, "log_odds_ratio": -0.42028987407684326, "logits/chosen": -0.2863801121711731, "logits/rejected": -0.9506353139877319, "logps/chosen": -1.410943627357483, "logps/rejected": -1.9470175504684448, "loss": 1.4807, "nll_loss": 1.4386284351348877, "rewards/accuracies": 1.0, "rewards/chosen": -0.141094371676445, "rewards/margins": 0.053607381880283356, "rewards/rejected": -0.19470174610614777, "step": 561 }, { "epoch": 0.8857368006304176, "grad_norm": 0.20268099009990692, "learning_rate": 2.737986548903029e-06, "log_odds_chosen": 1.0620503425598145, "log_odds_ratio": -0.3001374900341034, "logits/chosen": -0.4660804271697998, "logits/rejected": -1.3468518257141113, "logps/chosen": -1.3900612592697144, "logps/rejected": -2.2740349769592285, "loss": 1.448, "nll_loss": 1.4180266857147217, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390061229467392, "rewards/margins": 0.08839737623929977, "rewards/rejected": -0.22740350663661957, "step": 562 }, { "epoch": 0.8873128447596532, "grad_norm": 0.19392478466033936, "learning_rate": 2.7328745867577604e-06, "log_odds_chosen": 0.6651920080184937, "log_odds_ratio": -0.4168819785118103, "logits/chosen": -0.2711959779262543, "logits/rejected": -1.2294808626174927, "logps/chosen": -1.545215368270874, "logps/rejected": -2.1002511978149414, "loss": 1.6007, "nll_loss": 1.5590217113494873, "rewards/accuracies": 1.0, "rewards/chosen": -0.15452155470848083, "rewards/margins": 0.055503591895103455, "rewards/rejected": -0.2100251317024231, "step": 563 }, { "epoch": 0.8888888888888888, "grad_norm": 0.20134912431240082, "learning_rate": 2.727757088095037e-06, "log_odds_chosen": 0.8869103193283081, "log_odds_ratio": -0.3482133150100708, "logits/chosen": -0.27464380860328674, "logits/rejected": -1.2576364278793335, "logps/chosen": -1.535430908203125, "logps/rejected": -2.284431219100952, "loss": 1.5755, "nll_loss": 1.5406620502471924, "rewards/accuracies": 1.0, "rewards/chosen": -0.15354308485984802, "rewards/margins": 0.07490003854036331, "rewards/rejected": -0.22844311594963074, "step": 564 }, { "epoch": 0.8904649330181245, "grad_norm": 0.19171306490898132, "learning_rate": 2.7226340915751156e-06, "log_odds_chosen": 0.7525812387466431, "log_odds_ratio": -0.3937699794769287, "logits/chosen": -0.26140308380126953, "logits/rejected": -1.071201205253601, "logps/chosen": -1.5400971174240112, "logps/rejected": -2.1722888946533203, "loss": 1.5952, "nll_loss": 1.5558509826660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.15400969982147217, "rewards/margins": 0.06321915239095688, "rewards/rejected": -0.21722885966300964, "step": 565 }, { "epoch": 0.8920409771473601, "grad_norm": 0.1916339099407196, "learning_rate": 2.7175056358997887e-06, "log_odds_chosen": 0.6478245258331299, "log_odds_ratio": -0.43004921078681946, "logits/chosen": -0.18337209522724152, "logits/rejected": -1.3477016687393188, "logps/chosen": -1.5133692026138306, "logps/rejected": -2.0519278049468994, "loss": 1.5588, "nll_loss": 1.5158239603042603, "rewards/accuracies": 1.0, "rewards/chosen": -0.1513369381427765, "rewards/margins": 0.05385584756731987, "rewards/rejected": -0.20519277453422546, "step": 566 }, { "epoch": 0.8936170212765957, "grad_norm": 0.19772540032863617, "learning_rate": 2.7123717598120892e-06, "log_odds_chosen": 0.8925088047981262, "log_odds_ratio": -0.35744139552116394, "logits/chosen": -0.3828020691871643, "logits/rejected": -1.2839354276657104, "logps/chosen": -1.4594718217849731, "logps/rejected": -2.210188627243042, "loss": 1.5056, "nll_loss": 1.4698803424835205, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459471881389618, "rewards/margins": 0.07507168501615524, "rewards/rejected": -0.22101886570453644, "step": 567 }, { "epoch": 0.8951930654058313, "grad_norm": 0.18732528388500214, "learning_rate": 2.7072325020959985e-06, "log_odds_chosen": 0.8665810227394104, "log_odds_ratio": -0.35743066668510437, "logits/chosen": -0.20690904557704926, "logits/rejected": -1.0224376916885376, "logps/chosen": -1.361433744430542, "logps/rejected": -2.068009376525879, "loss": 1.4292, "nll_loss": 1.3934705257415771, "rewards/accuracies": 1.0, "rewards/chosen": -0.13614337146282196, "rewards/margins": 0.07065757364034653, "rewards/rejected": -0.2068009376525879, "step": 568 }, { "epoch": 0.8967691095350669, "grad_norm": 0.22601142525672913, "learning_rate": 2.702087901576155e-06, "log_odds_chosen": 0.8955207467079163, "log_odds_ratio": -0.345583975315094, "logits/chosen": -0.16073869168758392, "logits/rejected": -1.188117265701294, "logps/chosen": -1.5008628368377686, "logps/rejected": -2.2542965412139893, "loss": 1.5314, "nll_loss": 1.4968734979629517, "rewards/accuracies": 1.0, "rewards/chosen": -0.15008629858493805, "rewards/margins": 0.07534339278936386, "rewards/rejected": -0.2254296839237213, "step": 569 }, { "epoch": 0.8983451536643026, "grad_norm": 0.1993560940027237, "learning_rate": 2.6969379971175576e-06, "log_odds_chosen": 0.84205162525177, "log_odds_ratio": -0.3631736934185028, "logits/chosen": -0.2777113616466522, "logits/rejected": -1.3776825666427612, "logps/chosen": -1.5447388887405396, "logps/rejected": -2.2568342685699463, "loss": 1.5938, "nll_loss": 1.5574393272399902, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544739156961441, "rewards/margins": 0.07120953500270844, "rewards/rejected": -0.22568343579769135, "step": 570 }, { "epoch": 0.8999211977935382, "grad_norm": 0.20275285840034485, "learning_rate": 2.6917828276252745e-06, "log_odds_chosen": 0.9280831217765808, "log_odds_ratio": -0.3389042019844055, "logits/chosen": -0.2606070041656494, "logits/rejected": -1.232492208480835, "logps/chosen": -1.456214427947998, "logps/rejected": -2.229267120361328, "loss": 1.5004, "nll_loss": 1.4664775133132935, "rewards/accuracies": 1.0, "rewards/chosen": -0.1456214338541031, "rewards/margins": 0.07730529457330704, "rewards/rejected": -0.22292673587799072, "step": 571 }, { "epoch": 0.9014972419227738, "grad_norm": 0.1964682787656784, "learning_rate": 2.686622432044149e-06, "log_odds_chosen": 0.7510740756988525, "log_odds_ratio": -0.39093631505966187, "logits/chosen": -0.2591177225112915, "logits/rejected": -1.2564524412155151, "logps/chosen": -1.4025276899337769, "logps/rejected": -2.0166800022125244, "loss": 1.463, "nll_loss": 1.4239420890808105, "rewards/accuracies": 1.0, "rewards/chosen": -0.14025276899337769, "rewards/margins": 0.061415232717990875, "rewards/rejected": -0.20166799426078796, "step": 572 }, { "epoch": 0.9030732860520094, "grad_norm": 0.1793847680091858, "learning_rate": 2.681456849358505e-06, "log_odds_chosen": 0.8104532957077026, "log_odds_ratio": -0.3840184509754181, "logits/chosen": -0.34616154432296753, "logits/rejected": -1.3318803310394287, "logps/chosen": -1.4472362995147705, "logps/rejected": -2.120614528656006, "loss": 1.4976, "nll_loss": 1.4592398405075073, "rewards/accuracies": 1.0, "rewards/chosen": -0.14472362399101257, "rewards/margins": 0.06733782589435577, "rewards/rejected": -0.21206147968769073, "step": 573 }, { "epoch": 0.904649330181245, "grad_norm": 0.18348178267478943, "learning_rate": 2.6762861185918528e-06, "log_odds_chosen": 1.182490348815918, "log_odds_ratio": -0.29787686467170715, "logits/chosen": -0.3689255714416504, "logits/rejected": -1.3221290111541748, "logps/chosen": -1.3581138849258423, "logps/rejected": -2.352473735809326, "loss": 1.4083, "nll_loss": 1.378502368927002, "rewards/accuracies": 1.0, "rewards/chosen": -0.13581138849258423, "rewards/margins": 0.09943599998950958, "rewards/rejected": -0.235247403383255, "step": 574 }, { "epoch": 0.9062253743104807, "grad_norm": 0.20440001785755157, "learning_rate": 2.6711102788065934e-06, "log_odds_chosen": 0.9215613603591919, "log_odds_ratio": -0.34810373187065125, "logits/chosen": -0.2908223867416382, "logits/rejected": -1.2476472854614258, "logps/chosen": -1.5398906469345093, "logps/rejected": -2.327841281890869, "loss": 1.5889, "nll_loss": 1.5541001558303833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1539890617132187, "rewards/margins": 0.07879504561424255, "rewards/rejected": -0.23278410732746124, "step": 575 }, { "epoch": 0.9078014184397163, "grad_norm": 0.210665762424469, "learning_rate": 2.665929369103724e-06, "log_odds_chosen": 0.6061529517173767, "log_odds_ratio": -0.44262316823005676, "logits/chosen": -0.2395247519016266, "logits/rejected": -0.9772445559501648, "logps/chosen": -1.5880591869354248, "logps/rejected": -2.0922789573669434, "loss": 1.6249, "nll_loss": 1.580668330192566, "rewards/accuracies": 1.0, "rewards/chosen": -0.1588059365749359, "rewards/margins": 0.050421975553035736, "rewards/rejected": -0.20922791957855225, "step": 576 }, { "epoch": 0.9093774625689519, "grad_norm": 0.19389891624450684, "learning_rate": 2.6607434286225427e-06, "log_odds_chosen": 0.8256452083587646, "log_odds_ratio": -0.3714563846588135, "logits/chosen": -0.2425473928451538, "logits/rejected": -1.2359596490859985, "logps/chosen": -1.4768764972686768, "logps/rejected": -2.1682870388031006, "loss": 1.5302, "nll_loss": 1.493094801902771, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476876437664032, "rewards/margins": 0.06914106011390686, "rewards/rejected": -0.21682868897914886, "step": 577 }, { "epoch": 0.9109535066981875, "grad_norm": 0.21000936627388, "learning_rate": 2.6555524965403533e-06, "log_odds_chosen": 1.0111608505249023, "log_odds_ratio": -0.31302177906036377, "logits/chosen": -0.3497047424316406, "logits/rejected": -1.2955108880996704, "logps/chosen": -1.4033231735229492, "logps/rejected": -2.244431734085083, "loss": 1.462, "nll_loss": 1.4307024478912354, "rewards/accuracies": 1.0, "rewards/chosen": -0.14033232629299164, "rewards/margins": 0.08411087095737457, "rewards/rejected": -0.22444318234920502, "step": 578 }, { "epoch": 0.9125295508274232, "grad_norm": 0.18803854286670685, "learning_rate": 2.6503566120721683e-06, "log_odds_chosen": 0.982942521572113, "log_odds_ratio": -0.3207738697528839, "logits/chosen": -0.28768619894981384, "logits/rejected": -1.302692174911499, "logps/chosen": -1.4879767894744873, "logps/rejected": -2.316041946411133, "loss": 1.5368, "nll_loss": 1.5047372579574585, "rewards/accuracies": 1.0, "rewards/chosen": -0.1487976759672165, "rewards/margins": 0.0828065350651741, "rewards/rejected": -0.2316042184829712, "step": 579 }, { "epoch": 0.9141055949566588, "grad_norm": 0.18727664649486542, "learning_rate": 2.6451558144704126e-06, "log_odds_chosen": 0.9223566055297852, "log_odds_ratio": -0.3405495584011078, "logits/chosen": -0.40365201234817505, "logits/rejected": -1.2617590427398682, "logps/chosen": -1.4504380226135254, "logps/rejected": -2.2192819118499756, "loss": 1.4889, "nll_loss": 1.4548324346542358, "rewards/accuracies": 1.0, "rewards/chosen": -0.14504380524158478, "rewards/margins": 0.07688435912132263, "rewards/rejected": -0.2219281643629074, "step": 580 }, { "epoch": 0.9156816390858944, "grad_norm": 0.1898116171360016, "learning_rate": 2.6399501430246286e-06, "log_odds_chosen": 0.8261522650718689, "log_odds_ratio": -0.36379167437553406, "logits/chosen": -0.2834393084049225, "logits/rejected": -1.21560537815094, "logps/chosen": -1.4255337715148926, "logps/rejected": -2.1033647060394287, "loss": 1.469, "nll_loss": 1.4326211214065552, "rewards/accuracies": 1.0, "rewards/chosen": -0.14255337417125702, "rewards/margins": 0.06778310239315033, "rewards/rejected": -0.21033647656440735, "step": 581 }, { "epoch": 0.91725768321513, "grad_norm": 0.19628842175006866, "learning_rate": 2.634739637061177e-06, "log_odds_chosen": 0.6971665620803833, "log_odds_ratio": -0.40822935104370117, "logits/chosen": -0.30961042642593384, "logits/rejected": -1.1811354160308838, "logps/chosen": -1.4803777933120728, "logps/rejected": -2.0561718940734863, "loss": 1.5319, "nll_loss": 1.4910557270050049, "rewards/accuracies": 1.0, "rewards/chosen": -0.14803776144981384, "rewards/margins": 0.05757942050695419, "rewards/rejected": -0.20561718940734863, "step": 582 }, { "epoch": 0.9188337273443656, "grad_norm": 0.19777609407901764, "learning_rate": 2.6295243359429423e-06, "log_odds_chosen": 0.9527621269226074, "log_odds_ratio": -0.33162370324134827, "logits/chosen": -0.2951660752296448, "logits/rejected": -1.293708086013794, "logps/chosen": -1.4976961612701416, "logps/rejected": -2.3028030395507812, "loss": 1.5405, "nll_loss": 1.5073657035827637, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497696191072464, "rewards/margins": 0.0805106908082962, "rewards/rejected": -0.2302803099155426, "step": 583 }, { "epoch": 0.9204097714736013, "grad_norm": 0.19010689854621887, "learning_rate": 2.624304279069033e-06, "log_odds_chosen": 0.954330563545227, "log_odds_ratio": -0.3304038941860199, "logits/chosen": -0.262991726398468, "logits/rejected": -1.220430612564087, "logps/chosen": -1.4275193214416504, "logps/rejected": -2.21905517578125, "loss": 1.4945, "nll_loss": 1.461504578590393, "rewards/accuracies": 1.0, "rewards/chosen": -0.14275193214416504, "rewards/margins": 0.07915358990430832, "rewards/rejected": -0.22190551459789276, "step": 584 }, { "epoch": 0.9219858156028369, "grad_norm": 0.23942263424396515, "learning_rate": 2.6190795058744854e-06, "log_odds_chosen": 0.9014804363250732, "log_odds_ratio": -0.35250112414360046, "logits/chosen": -0.32867133617401123, "logits/rejected": -1.2367173433303833, "logps/chosen": -1.51943838596344, "logps/rejected": -2.2841804027557373, "loss": 1.5637, "nll_loss": 1.5284459590911865, "rewards/accuracies": 1.0, "rewards/chosen": -0.1519438624382019, "rewards/margins": 0.07647417485713959, "rewards/rejected": -0.2284180372953415, "step": 585 }, { "epoch": 0.9235618597320725, "grad_norm": 0.21306301653385162, "learning_rate": 2.6138500558299664e-06, "log_odds_chosen": 0.7792106866836548, "log_odds_ratio": -0.3837544918060303, "logits/chosen": -0.30056121945381165, "logits/rejected": -1.131338357925415, "logps/chosen": -1.4999313354492188, "logps/rejected": -2.1518428325653076, "loss": 1.5432, "nll_loss": 1.504861831665039, "rewards/accuracies": 1.0, "rewards/chosen": -0.1499931514263153, "rewards/margins": 0.06519114971160889, "rewards/rejected": -0.215184286236763, "step": 586 }, { "epoch": 0.9251379038613081, "grad_norm": 0.20388370752334595, "learning_rate": 2.608615968441472e-06, "log_odds_chosen": 0.8730366230010986, "log_odds_ratio": -0.35827386379241943, "logits/chosen": -0.3451239764690399, "logits/rejected": -1.2428234815597534, "logps/chosen": -1.541178584098816, "logps/rejected": -2.2841219902038574, "loss": 1.5874, "nll_loss": 1.551594614982605, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541178673505783, "rewards/margins": 0.0742943286895752, "rewards/rejected": -0.2284121960401535, "step": 587 }, { "epoch": 0.9267139479905437, "grad_norm": 0.19729666411876678, "learning_rate": 2.6033772832500333e-06, "log_odds_chosen": 0.8212614059448242, "log_odds_ratio": -0.3690330982208252, "logits/chosen": -0.3706430196762085, "logits/rejected": -1.1535141468048096, "logps/chosen": -1.5374579429626465, "logps/rejected": -2.2309741973876953, "loss": 1.5836, "nll_loss": 1.5466666221618652, "rewards/accuracies": 1.0, "rewards/chosen": -0.15374578535556793, "rewards/margins": 0.06935164332389832, "rewards/rejected": -0.22309744358062744, "step": 588 }, { "epoch": 0.9282899921197794, "grad_norm": 0.18803627789020538, "learning_rate": 2.5981340398314146e-06, "log_odds_chosen": 0.8717142343521118, "log_odds_ratio": -0.35949403047561646, "logits/chosen": -0.3949166536331177, "logits/rejected": -1.278796672821045, "logps/chosen": -1.4226505756378174, "logps/rejected": -2.1424102783203125, "loss": 1.4741, "nll_loss": 1.4381307363510132, "rewards/accuracies": 1.0, "rewards/chosen": -0.14226505160331726, "rewards/margins": 0.07197597622871399, "rewards/rejected": -0.21424104273319244, "step": 589 }, { "epoch": 0.929866036249015, "grad_norm": 0.200876384973526, "learning_rate": 2.592886277795815e-06, "log_odds_chosen": 0.8502908945083618, "log_odds_ratio": -0.36463305354118347, "logits/chosen": -0.34665432572364807, "logits/rejected": -1.574115514755249, "logps/chosen": -1.545432209968567, "logps/rejected": -2.2671260833740234, "loss": 1.6, "nll_loss": 1.5635088682174683, "rewards/accuracies": 1.0, "rewards/chosen": -0.1545432060956955, "rewards/margins": 0.07216940075159073, "rewards/rejected": -0.22671261429786682, "step": 590 }, { "epoch": 0.9314420803782506, "grad_norm": 0.20108841359615326, "learning_rate": 2.5876340367875706e-06, "log_odds_chosen": 1.0153826475143433, "log_odds_ratio": -0.31579023599624634, "logits/chosen": -0.34771737456321716, "logits/rejected": -1.486975908279419, "logps/chosen": -1.4971017837524414, "logps/rejected": -2.3579812049865723, "loss": 1.5475, "nll_loss": 1.5159598588943481, "rewards/accuracies": 1.0, "rewards/chosen": -0.14971019327640533, "rewards/margins": 0.08608793467283249, "rewards/rejected": -0.23579810559749603, "step": 591 }, { "epoch": 0.9330181245074862, "grad_norm": 0.18956692516803741, "learning_rate": 2.582377356484853e-06, "log_odds_chosen": 0.837839663028717, "log_odds_ratio": -0.3628246784210205, "logits/chosen": -0.37122786045074463, "logits/rejected": -1.2203317880630493, "logps/chosen": -1.3939440250396729, "logps/rejected": -2.0772705078125, "loss": 1.4546, "nll_loss": 1.4183294773101807, "rewards/accuracies": 1.0, "rewards/chosen": -0.13939440250396729, "rewards/margins": 0.06833265721797943, "rewards/rejected": -0.20772705972194672, "step": 592 }, { "epoch": 0.9345941686367218, "grad_norm": 0.19498895108699799, "learning_rate": 2.577116276599373e-06, "log_odds_chosen": 0.7960597276687622, "log_odds_ratio": -0.37910643219947815, "logits/chosen": -0.34351596236228943, "logits/rejected": -1.0250897407531738, "logps/chosen": -1.4233973026275635, "logps/rejected": -2.075965404510498, "loss": 1.4954, "nll_loss": 1.4574451446533203, "rewards/accuracies": 1.0, "rewards/chosen": -0.14233973622322083, "rewards/margins": 0.06525681167840958, "rewards/rejected": -0.2075965404510498, "step": 593 }, { "epoch": 0.9361702127659575, "grad_norm": 0.19851456582546234, "learning_rate": 2.5718508368760737e-06, "log_odds_chosen": 0.8462440371513367, "log_odds_ratio": -0.3784284293651581, "logits/chosen": -0.3738991916179657, "logits/rejected": -1.237477421760559, "logps/chosen": -1.4328763484954834, "logps/rejected": -2.136536121368408, "loss": 1.4951, "nll_loss": 1.4572863578796387, "rewards/accuracies": 1.0, "rewards/chosen": -0.14328764379024506, "rewards/margins": 0.07036596536636353, "rewards/rejected": -0.21365360915660858, "step": 594 }, { "epoch": 0.9377462568951931, "grad_norm": 0.18769197165966034, "learning_rate": 2.5665810770928386e-06, "log_odds_chosen": 0.9634343385696411, "log_odds_ratio": -0.3288772702217102, "logits/chosen": -0.3851248621940613, "logits/rejected": -1.3970948457717896, "logps/chosen": -1.4221051931381226, "logps/rejected": -2.2232911586761475, "loss": 1.4897, "nll_loss": 1.4568034410476685, "rewards/accuracies": 1.0, "rewards/chosen": -0.14221051335334778, "rewards/margins": 0.08011861145496368, "rewards/rejected": -0.22232912480831146, "step": 595 }, { "epoch": 0.9393223010244287, "grad_norm": 0.20603147149085999, "learning_rate": 2.5613070370601863e-06, "log_odds_chosen": 0.9289364218711853, "log_odds_ratio": -0.335774302482605, "logits/chosen": -0.31814393401145935, "logits/rejected": -1.2499024868011475, "logps/chosen": -1.4785757064819336, "logps/rejected": -2.2590699195861816, "loss": 1.5156, "nll_loss": 1.4820557832717896, "rewards/accuracies": 1.0, "rewards/chosen": -0.14785756170749664, "rewards/margins": 0.07804940640926361, "rewards/rejected": -0.22590698301792145, "step": 596 }, { "epoch": 0.9408983451536643, "grad_norm": 0.20211490988731384, "learning_rate": 2.556028756620969e-06, "log_odds_chosen": 0.856348991394043, "log_odds_ratio": -0.3590443730354309, "logits/chosen": -0.33854496479034424, "logits/rejected": -1.3129234313964844, "logps/chosen": -1.5026987791061401, "logps/rejected": -2.2209367752075195, "loss": 1.5329, "nll_loss": 1.4969648122787476, "rewards/accuracies": 1.0, "rewards/chosen": -0.15026986598968506, "rewards/margins": 0.07182382047176361, "rewards/rejected": -0.22209370136260986, "step": 597 }, { "epoch": 0.9424743892829, "grad_norm": 0.19991546869277954, "learning_rate": 2.5507462756500747e-06, "log_odds_chosen": 0.9981376528739929, "log_odds_ratio": -0.33000436425209045, "logits/chosen": -0.23800677061080933, "logits/rejected": -1.2048419713974, "logps/chosen": -1.5049721002578735, "logps/rejected": -2.351780652999878, "loss": 1.5433, "nll_loss": 1.5103166103363037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1504971981048584, "rewards/margins": 0.08468086272478104, "rewards/rejected": -0.23517806828022003, "step": 598 }, { "epoch": 0.9440504334121356, "grad_norm": 0.2179379016160965, "learning_rate": 2.5454596340541245e-06, "log_odds_chosen": 0.8718122839927673, "log_odds_ratio": -0.3630530834197998, "logits/chosen": -0.4387681186199188, "logits/rejected": -1.3172277212142944, "logps/chosen": -1.5390511751174927, "logps/rejected": -2.2787020206451416, "loss": 1.5792, "nll_loss": 1.5428839921951294, "rewards/accuracies": 1.0, "rewards/chosen": -0.15390512347221375, "rewards/margins": 0.07396508753299713, "rewards/rejected": -0.22787019610404968, "step": 599 }, { "epoch": 0.9456264775413712, "grad_norm": 0.19863368570804596, "learning_rate": 2.5401688717711702e-06, "log_odds_chosen": 0.7998033165931702, "log_odds_ratio": -0.3759251832962036, "logits/chosen": -0.3329557776451111, "logits/rejected": -1.2800992727279663, "logps/chosen": -1.4736813306808472, "logps/rejected": -2.1393368244171143, "loss": 1.5281, "nll_loss": 1.4904803037643433, "rewards/accuracies": 1.0, "rewards/chosen": -0.14736813306808472, "rewards/margins": 0.06656555086374283, "rewards/rejected": -0.21393369138240814, "step": 600 }, { "epoch": 0.9472025216706068, "grad_norm": 0.19201233983039856, "learning_rate": 2.5348740287703937e-06, "log_odds_chosen": 1.0029901266098022, "log_odds_ratio": -0.3156769871711731, "logits/chosen": -0.34973573684692383, "logits/rejected": -1.1656208038330078, "logps/chosen": -1.3755903244018555, "logps/rejected": -2.201836347579956, "loss": 1.4415, "nll_loss": 1.4099783897399902, "rewards/accuracies": 1.0, "rewards/chosen": -0.13755902647972107, "rewards/margins": 0.08262462913990021, "rewards/rejected": -0.22018365561962128, "step": 601 }, { "epoch": 0.9487785657998424, "grad_norm": 0.2115398347377777, "learning_rate": 2.529575145051805e-06, "log_odds_chosen": 0.9399322867393494, "log_odds_ratio": -0.3423665761947632, "logits/chosen": -0.37769484519958496, "logits/rejected": -1.224244236946106, "logps/chosen": -1.4493341445922852, "logps/rejected": -2.236774444580078, "loss": 1.5193, "nll_loss": 1.485029697418213, "rewards/accuracies": 1.0, "rewards/chosen": -0.14493340253829956, "rewards/margins": 0.07874403148889542, "rewards/rejected": -0.22367745637893677, "step": 602 }, { "epoch": 0.950354609929078, "grad_norm": 0.19805483520030975, "learning_rate": 2.52427226064594e-06, "log_odds_chosen": 0.987168550491333, "log_odds_ratio": -0.3237678110599518, "logits/chosen": -0.44117000699043274, "logits/rejected": -1.3285408020019531, "logps/chosen": -1.4890186786651611, "logps/rejected": -2.3211069107055664, "loss": 1.5198, "nll_loss": 1.487433910369873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14890186488628387, "rewards/margins": 0.0832088366150856, "rewards/rejected": -0.23211072385311127, "step": 603 }, { "epoch": 0.9519306540583137, "grad_norm": 0.18601743876934052, "learning_rate": 2.518965415613557e-06, "log_odds_chosen": 1.137446641921997, "log_odds_ratio": -0.2949294149875641, "logits/chosen": -0.37900564074516296, "logits/rejected": -1.2488760948181152, "logps/chosen": -1.3822598457336426, "logps/rejected": -2.332451105117798, "loss": 1.4432, "nll_loss": 1.4137518405914307, "rewards/accuracies": 1.0, "rewards/chosen": -0.1382259875535965, "rewards/margins": 0.0950191393494606, "rewards/rejected": -0.2332451194524765, "step": 604 }, { "epoch": 0.9535066981875493, "grad_norm": 0.19979554414749146, "learning_rate": 2.513654650045336e-06, "log_odds_chosen": 0.9225171208381653, "log_odds_ratio": -0.3389977216720581, "logits/chosen": -0.38119933009147644, "logits/rejected": -1.5180516242980957, "logps/chosen": -1.4174445867538452, "logps/rejected": -2.1828620433807373, "loss": 1.4656, "nll_loss": 1.4316853284835815, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417444497346878, "rewards/margins": 0.07654174417257309, "rewards/rejected": -0.2182862013578415, "step": 605 }, { "epoch": 0.9550827423167849, "grad_norm": 0.1927998960018158, "learning_rate": 2.508340004061574e-06, "log_odds_chosen": 1.0575662851333618, "log_odds_ratio": -0.3081508278846741, "logits/chosen": -0.3997096121311188, "logits/rejected": -1.0927016735076904, "logps/chosen": -1.4310603141784668, "logps/rejected": -2.318833112716675, "loss": 1.4849, "nll_loss": 1.454042673110962, "rewards/accuracies": 1.0, "rewards/chosen": -0.14310602843761444, "rewards/margins": 0.08877727389335632, "rewards/rejected": -0.23188331723213196, "step": 606 }, { "epoch": 0.9566587864460205, "grad_norm": 0.19888868927955627, "learning_rate": 2.503021517811882e-06, "log_odds_chosen": 0.9682137370109558, "log_odds_ratio": -0.3253340423107147, "logits/chosen": -0.4364151358604431, "logits/rejected": -1.3818237781524658, "logps/chosen": -1.5068976879119873, "logps/rejected": -2.3259003162384033, "loss": 1.5557, "nll_loss": 1.5231314897537231, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506897658109665, "rewards/margins": 0.08190026879310608, "rewards/rejected": -0.23259004950523376, "step": 607 }, { "epoch": 0.9582348305752562, "grad_norm": 0.19276244938373566, "learning_rate": 2.497699231474885e-06, "log_odds_chosen": 1.0809459686279297, "log_odds_ratio": -0.3042202889919281, "logits/chosen": -0.4018801152706146, "logits/rejected": -1.3968017101287842, "logps/chosen": -1.400070071220398, "logps/rejected": -2.3094584941864014, "loss": 1.4403, "nll_loss": 1.4098955392837524, "rewards/accuracies": 1.0, "rewards/chosen": -0.14000701904296875, "rewards/margins": 0.09093883633613586, "rewards/rejected": -0.23094584047794342, "step": 608 }, { "epoch": 0.9598108747044918, "grad_norm": 0.20758643746376038, "learning_rate": 2.4923731852579127e-06, "log_odds_chosen": 1.003240704536438, "log_odds_ratio": -0.32600492238998413, "logits/chosen": -0.41749560832977295, "logits/rejected": -1.0546338558197021, "logps/chosen": -1.4264339208602905, "logps/rejected": -2.2706849575042725, "loss": 1.4808, "nll_loss": 1.448164701461792, "rewards/accuracies": 1.0, "rewards/chosen": -0.14264339208602905, "rewards/margins": 0.08442509919404984, "rewards/rejected": -0.22706851363182068, "step": 609 }, { "epoch": 0.9613869188337274, "grad_norm": 0.19250842928886414, "learning_rate": 2.4870434193967017e-06, "log_odds_chosen": 1.1638853549957275, "log_odds_ratio": -0.28101596236228943, "logits/chosen": -0.3525513708591461, "logits/rejected": -1.4325730800628662, "logps/chosen": -1.4275925159454346, "logps/rejected": -2.4097812175750732, "loss": 1.457, "nll_loss": 1.4288923740386963, "rewards/accuracies": 1.0, "rewards/chosen": -0.1427592635154724, "rewards/margins": 0.0982188731431961, "rewards/rejected": -0.24097813665866852, "step": 610 }, { "epoch": 0.9629629629629629, "grad_norm": 0.21151085197925568, "learning_rate": 2.481709974155086e-06, "log_odds_chosen": 1.1808512210845947, "log_odds_ratio": -0.28556308150291443, "logits/chosen": -0.3375462591648102, "logits/rejected": -1.2200756072998047, "logps/chosen": -1.4453498125076294, "logps/rejected": -2.448586940765381, "loss": 1.4846, "nll_loss": 1.456089973449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.14453496038913727, "rewards/margins": 0.10032372176647186, "rewards/rejected": -0.24485869705677032, "step": 611 }, { "epoch": 0.9645390070921985, "grad_norm": 0.22557130455970764, "learning_rate": 2.4763728898246983e-06, "log_odds_chosen": 1.2929182052612305, "log_odds_ratio": -0.251591295003891, "logits/chosen": -0.23864808678627014, "logits/rejected": -1.2782857418060303, "logps/chosen": -1.3319588899612427, "logps/rejected": -2.397937774658203, "loss": 1.3652, "nll_loss": 1.340023398399353, "rewards/accuracies": 1.0, "rewards/chosen": -0.1331958919763565, "rewards/margins": 0.1065979078412056, "rewards/rejected": -0.2397937923669815, "step": 612 }, { "epoch": 0.9661150512214342, "grad_norm": 0.18006299436092377, "learning_rate": 2.4710322067246607e-06, "log_odds_chosen": 0.9720537066459656, "log_odds_ratio": -0.3259618282318115, "logits/chosen": -0.30720773339271545, "logits/rejected": -1.2487545013427734, "logps/chosen": -1.461777687072754, "logps/rejected": -2.275256395339966, "loss": 1.4946, "nll_loss": 1.4619730710983276, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461777687072754, "rewards/margins": 0.08134786784648895, "rewards/rejected": -0.22752563655376434, "step": 613 }, { "epoch": 0.9676910953506698, "grad_norm": 0.17892460525035858, "learning_rate": 2.465687965201283e-06, "log_odds_chosen": 1.0376200675964355, "log_odds_ratio": -0.31072306632995605, "logits/chosen": -0.35413774847984314, "logits/rejected": -1.2665860652923584, "logps/chosen": -1.3643009662628174, "logps/rejected": -2.222691535949707, "loss": 1.4241, "nll_loss": 1.3930423259735107, "rewards/accuracies": 1.0, "rewards/chosen": -0.13643009960651398, "rewards/margins": 0.08583903312683105, "rewards/rejected": -0.22226913273334503, "step": 614 }, { "epoch": 0.9692671394799054, "grad_norm": 0.19563743472099304, "learning_rate": 2.4603402056277577e-06, "log_odds_chosen": 0.9878734350204468, "log_odds_ratio": -0.32292407751083374, "logits/chosen": -0.4882684051990509, "logits/rejected": -1.2250369787216187, "logps/chosen": -1.4257678985595703, "logps/rejected": -2.247316837310791, "loss": 1.4822, "nll_loss": 1.4498778581619263, "rewards/accuracies": 1.0, "rewards/chosen": -0.14257679879665375, "rewards/margins": 0.08215488493442535, "rewards/rejected": -0.2247316688299179, "step": 615 }, { "epoch": 0.970843183609141, "grad_norm": 0.22117209434509277, "learning_rate": 2.454988968403854e-06, "log_odds_chosen": 0.7999638319015503, "log_odds_ratio": -0.3741395175457001, "logits/chosen": -0.3372759222984314, "logits/rejected": -1.3164011240005493, "logps/chosen": -1.479479193687439, "logps/rejected": -2.144162654876709, "loss": 1.5122, "nll_loss": 1.4747817516326904, "rewards/accuracies": 1.0, "rewards/chosen": -0.14794793725013733, "rewards/margins": 0.06646835803985596, "rewards/rejected": -0.2144162803888321, "step": 616 }, { "epoch": 0.9724192277383766, "grad_norm": 0.195278599858284, "learning_rate": 2.4496342939556133e-06, "log_odds_chosen": 1.010886549949646, "log_odds_ratio": -0.31771498918533325, "logits/chosen": -0.2681885361671448, "logits/rejected": -1.2319934368133545, "logps/chosen": -1.4390006065368652, "logps/rejected": -2.285721778869629, "loss": 1.4874, "nll_loss": 1.4556019306182861, "rewards/accuracies": 1.0, "rewards/chosen": -0.143900066614151, "rewards/margins": 0.08467209339141846, "rewards/rejected": -0.22857216000556946, "step": 617 }, { "epoch": 0.9739952718676123, "grad_norm": 0.19526216387748718, "learning_rate": 2.444276222735043e-06, "log_odds_chosen": 1.008836030960083, "log_odds_ratio": -0.3135777711868286, "logits/chosen": -0.4565156102180481, "logits/rejected": -1.3782358169555664, "logps/chosen": -1.3998832702636719, "logps/rejected": -2.2375426292419434, "loss": 1.4704, "nll_loss": 1.4390738010406494, "rewards/accuracies": 1.0, "rewards/chosen": -0.13998833298683167, "rewards/margins": 0.0837659239768982, "rewards/rejected": -0.22375425696372986, "step": 618 }, { "epoch": 0.9755713159968479, "grad_norm": 0.19684012234210968, "learning_rate": 2.4389147952198127e-06, "log_odds_chosen": 1.0080350637435913, "log_odds_ratio": -0.315501868724823, "logits/chosen": -0.46619752049446106, "logits/rejected": -1.2587003707885742, "logps/chosen": -1.443904161453247, "logps/rejected": -2.2875938415527344, "loss": 1.4711, "nll_loss": 1.4395172595977783, "rewards/accuracies": 1.0, "rewards/chosen": -0.14439040422439575, "rewards/margins": 0.08436896651983261, "rewards/rejected": -0.22875937819480896, "step": 619 }, { "epoch": 0.9771473601260835, "grad_norm": 0.18595977127552032, "learning_rate": 2.433550051912946e-06, "log_odds_chosen": 1.1093815565109253, "log_odds_ratio": -0.29099801182746887, "logits/chosen": -0.4006117284297943, "logits/rejected": -1.4136881828308105, "logps/chosen": -1.3357937335968018, "logps/rejected": -2.2475531101226807, "loss": 1.3835, "nll_loss": 1.3544049263000488, "rewards/accuracies": 1.0, "rewards/chosen": -0.13357935845851898, "rewards/margins": 0.09117594361305237, "rewards/rejected": -0.22475531697273254, "step": 620 }, { "epoch": 0.9787234042553191, "grad_norm": 0.21153992414474487, "learning_rate": 2.4281820333425167e-06, "log_odds_chosen": 1.0128014087677002, "log_odds_ratio": -0.3216671049594879, "logits/chosen": -0.39767417311668396, "logits/rejected": -1.2548097372055054, "logps/chosen": -1.5447418689727783, "logps/rejected": -2.411409854888916, "loss": 1.5688, "nll_loss": 1.5366218090057373, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544741690158844, "rewards/margins": 0.08666681498289108, "rewards/rejected": -0.24114099144935608, "step": 621 }, { "epoch": 0.9802994483845547, "grad_norm": 0.20115748047828674, "learning_rate": 2.42281078006134e-06, "log_odds_chosen": 1.2284798622131348, "log_odds_ratio": -0.25919806957244873, "logits/chosen": -0.4328358769416809, "logits/rejected": -1.545608639717102, "logps/chosen": -1.5295909643173218, "logps/rejected": -2.5866165161132812, "loss": 1.5635, "nll_loss": 1.5375945568084717, "rewards/accuracies": 1.0, "rewards/chosen": -0.15295909345149994, "rewards/margins": 0.10570257902145386, "rewards/rejected": -0.2586616575717926, "step": 622 }, { "epoch": 0.9818754925137904, "grad_norm": 0.2268103063106537, "learning_rate": 2.4174363326466703e-06, "log_odds_chosen": 0.9978740811347961, "log_odds_ratio": -0.3261033892631531, "logits/chosen": -0.43390950560569763, "logits/rejected": -1.5303535461425781, "logps/chosen": -1.4596524238586426, "logps/rejected": -2.306164503097534, "loss": 1.5012, "nll_loss": 1.4685872793197632, "rewards/accuracies": 1.0, "rewards/chosen": -0.14596523344516754, "rewards/margins": 0.08465121686458588, "rewards/rejected": -0.23061645030975342, "step": 623 }, { "epoch": 0.983451536643026, "grad_norm": 0.2076517939567566, "learning_rate": 2.41205873169989e-06, "log_odds_chosen": 0.9155547618865967, "log_odds_ratio": -0.3434157967567444, "logits/chosen": -0.3254348635673523, "logits/rejected": -1.0746028423309326, "logps/chosen": -1.4498534202575684, "logps/rejected": -2.212179660797119, "loss": 1.4989, "nll_loss": 1.4645111560821533, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449853479862213, "rewards/margins": 0.07623264193534851, "rewards/rejected": -0.22121797502040863, "step": 624 }, { "epoch": 0.9850275807722616, "grad_norm": 0.18755526840686798, "learning_rate": 2.4066780178462058e-06, "log_odds_chosen": 1.093489646911621, "log_odds_ratio": -0.30363929271698, "logits/chosen": -0.3011537492275238, "logits/rejected": -1.2025160789489746, "logps/chosen": -1.4793384075164795, "logps/rejected": -2.4127535820007324, "loss": 1.5154, "nll_loss": 1.4850668907165527, "rewards/accuracies": 1.0, "rewards/chosen": -0.14793384075164795, "rewards/margins": 0.09334155172109604, "rewards/rejected": -0.2412753701210022, "step": 625 }, { "epoch": 0.9866036249014972, "grad_norm": 0.19269263744354248, "learning_rate": 2.40129423173434e-06, "log_odds_chosen": 1.0034220218658447, "log_odds_ratio": -0.3211997449398041, "logits/chosen": -0.35841816663742065, "logits/rejected": -1.3340610265731812, "logps/chosen": -1.4337854385375977, "logps/rejected": -2.2714056968688965, "loss": 1.4894, "nll_loss": 1.4572951793670654, "rewards/accuracies": 1.0, "rewards/chosen": -0.14337855577468872, "rewards/margins": 0.08376200497150421, "rewards/rejected": -0.22714056074619293, "step": 626 }, { "epoch": 0.9881796690307328, "grad_norm": 0.20670926570892334, "learning_rate": 2.3959074140362274e-06, "log_odds_chosen": 1.0268993377685547, "log_odds_ratio": -0.3118314743041992, "logits/chosen": -0.4891017973423004, "logits/rejected": -1.2871733903884888, "logps/chosen": -1.4102925062179565, "logps/rejected": -2.26517391204834, "loss": 1.4519, "nll_loss": 1.4207662343978882, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410292536020279, "rewards/margins": 0.08548815548419952, "rewards/rejected": -0.22651740908622742, "step": 627 }, { "epoch": 0.9897557131599685, "grad_norm": 0.22095555067062378, "learning_rate": 2.3905176054467007e-06, "log_odds_chosen": 1.2333874702453613, "log_odds_ratio": -0.2664095163345337, "logits/chosen": -0.47534334659576416, "logits/rejected": -1.3395469188690186, "logps/chosen": -1.39500892162323, "logps/rejected": -2.438161611557007, "loss": 1.4493, "nll_loss": 1.4226487874984741, "rewards/accuracies": 1.0, "rewards/chosen": -0.13950088620185852, "rewards/margins": 0.10431528836488724, "rewards/rejected": -0.24381616711616516, "step": 628 }, { "epoch": 0.9913317572892041, "grad_norm": 0.21743904054164886, "learning_rate": 2.3851248466831905e-06, "log_odds_chosen": 1.0874625444412231, "log_odds_ratio": -0.3082122206687927, "logits/chosen": -0.3590807020664215, "logits/rejected": -1.2268338203430176, "logps/chosen": -1.397624135017395, "logps/rejected": -2.311945676803589, "loss": 1.4329, "nll_loss": 1.4020652770996094, "rewards/accuracies": 1.0, "rewards/chosen": -0.13976241648197174, "rewards/margins": 0.09143215417861938, "rewards/rejected": -0.23119457066059113, "step": 629 }, { "epoch": 0.9929078014184397, "grad_norm": 0.2329930067062378, "learning_rate": 2.379729178485412e-06, "log_odds_chosen": 0.9880236387252808, "log_odds_ratio": -0.3203542232513428, "logits/chosen": -0.39962947368621826, "logits/rejected": -1.357712984085083, "logps/chosen": -1.5019173622131348, "logps/rejected": -2.340007781982422, "loss": 1.5417, "nll_loss": 1.5096969604492188, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501917541027069, "rewards/margins": 0.08380904793739319, "rewards/rejected": -0.2340007871389389, "step": 630 }, { "epoch": 0.9944838455476753, "grad_norm": 0.2334175556898117, "learning_rate": 2.3743306416150636e-06, "log_odds_chosen": 1.0434774160385132, "log_odds_ratio": -0.3047768473625183, "logits/chosen": -0.3637744188308716, "logits/rejected": -1.3631768226623535, "logps/chosen": -1.4570072889328003, "logps/rejected": -2.332566738128662, "loss": 1.4981, "nll_loss": 1.467656135559082, "rewards/accuracies": 1.0, "rewards/chosen": -0.14570073783397675, "rewards/margins": 0.08755593746900558, "rewards/rejected": -0.23325668275356293, "step": 631 }, { "epoch": 0.996059889676911, "grad_norm": 0.22371363639831543, "learning_rate": 2.368929276855512e-06, "log_odds_chosen": 0.8800406455993652, "log_odds_ratio": -0.348661869764328, "logits/chosen": -0.3670199513435364, "logits/rejected": -1.2765319347381592, "logps/chosen": -1.5614867210388184, "logps/rejected": -2.3103034496307373, "loss": 1.5793, "nll_loss": 1.5443942546844482, "rewards/accuracies": 1.0, "rewards/chosen": -0.15614868700504303, "rewards/margins": 0.0748816654086113, "rewards/rejected": -0.23103034496307373, "step": 632 }, { "epoch": 0.9976359338061466, "grad_norm": 0.20589956641197205, "learning_rate": 2.363525125011487e-06, "log_odds_chosen": 0.9788938164710999, "log_odds_ratio": -0.33115172386169434, "logits/chosen": -0.4048473834991455, "logits/rejected": -1.3237719535827637, "logps/chosen": -1.5760952234268188, "logps/rejected": -2.4175729751586914, "loss": 1.6168, "nll_loss": 1.5837026834487915, "rewards/accuracies": 1.0, "rewards/chosen": -0.15760952234268188, "rewards/margins": 0.08414778858423233, "rewards/rejected": -0.24175730347633362, "step": 633 }, { "epoch": 0.9992119779353822, "grad_norm": 0.19094868004322052, "learning_rate": 2.3581182269087755e-06, "log_odds_chosen": 1.389491081237793, "log_odds_ratio": -0.26652011275291443, "logits/chosen": -0.37861159443855286, "logits/rejected": -1.4625403881072998, "logps/chosen": -1.385037899017334, "logps/rejected": -2.5498814582824707, "loss": 1.4362, "nll_loss": 1.4095302820205688, "rewards/accuracies": 1.0, "rewards/chosen": -0.1385038048028946, "rewards/margins": 0.11648434400558472, "rewards/rejected": -0.2549881637096405, "step": 634 }, { "epoch": 1.0, "grad_norm": 0.24677427113056183, "learning_rate": 2.3527086233939097e-06, "log_odds_chosen": 1.143390417098999, "log_odds_ratio": -0.28132104873657227, "logits/chosen": -0.38930320739746094, "logits/rejected": -1.5918021202087402, "logps/chosen": -1.4692234992980957, "logps/rejected": -2.4415197372436523, "loss": 1.5151, "nll_loss": 1.4869499206542969, "rewards/accuracies": 1.0, "rewards/chosen": -0.14692234992980957, "rewards/margins": 0.09722961485385895, "rewards/rejected": -0.2441519796848297, "step": 635 }, { "epoch": 1.0015760441292356, "grad_norm": 0.22635005414485931, "learning_rate": 2.347296355333861e-06, "log_odds_chosen": 1.0286297798156738, "log_odds_ratio": -0.30935704708099365, "logits/chosen": -0.48233625292778015, "logits/rejected": -1.2280995845794678, "logps/chosen": -1.4753402471542358, "logps/rejected": -2.3454596996307373, "loss": 1.5161, "nll_loss": 1.4851717948913574, "rewards/accuracies": 1.0, "rewards/chosen": -0.14753402769565582, "rewards/margins": 0.08701195567846298, "rewards/rejected": -0.2345459908246994, "step": 636 }, { "epoch": 1.0031520882584712, "grad_norm": 0.20084941387176514, "learning_rate": 2.3418814636157283e-06, "log_odds_chosen": 0.9146707653999329, "log_odds_ratio": -0.3415667712688446, "logits/chosen": -0.4143337607383728, "logits/rejected": -1.3947221040725708, "logps/chosen": -1.5152298212051392, "logps/rejected": -2.2845675945281982, "loss": 1.5458, "nll_loss": 1.5116223096847534, "rewards/accuracies": 1.0, "rewards/chosen": -0.15152299404144287, "rewards/margins": 0.07693378627300262, "rewards/rejected": -0.2284567654132843, "step": 637 }, { "epoch": 1.0047281323877069, "grad_norm": 0.1994769275188446, "learning_rate": 2.336463989146434e-06, "log_odds_chosen": 1.0365430116653442, "log_odds_ratio": -0.311045378446579, "logits/chosen": -0.4344879686832428, "logits/rejected": -1.1822190284729004, "logps/chosen": -1.4108353853225708, "logps/rejected": -2.2783117294311523, "loss": 1.4477, "nll_loss": 1.4165457487106323, "rewards/accuracies": 1.0, "rewards/chosen": -0.14108355343341827, "rewards/margins": 0.08674763143062592, "rewards/rejected": -0.2278311848640442, "step": 638 }, { "epoch": 1.0063041765169425, "grad_norm": 0.19928711652755737, "learning_rate": 2.3310439728524074e-06, "log_odds_chosen": 1.0843204259872437, "log_odds_ratio": -0.2937708795070648, "logits/chosen": -0.45674261450767517, "logits/rejected": -1.2861055135726929, "logps/chosen": -1.4331440925598145, "logps/rejected": -2.343839168548584, "loss": 1.4823, "nll_loss": 1.4529598951339722, "rewards/accuracies": 1.0, "rewards/chosen": -0.1433144211769104, "rewards/margins": 0.09106951951980591, "rewards/rejected": -0.23438391089439392, "step": 639 }, { "epoch": 1.007880220646178, "grad_norm": 0.19886772334575653, "learning_rate": 2.325621455679286e-06, "log_odds_chosen": 1.2139259576797485, "log_odds_ratio": -0.2702604830265045, "logits/chosen": -0.37458765506744385, "logits/rejected": -1.2667701244354248, "logps/chosen": -1.41457998752594, "logps/rejected": -2.4413113594055176, "loss": 1.449, "nll_loss": 1.4219905138015747, "rewards/accuracies": 1.0, "rewards/chosen": -0.14145800471305847, "rewards/margins": 0.10267314314842224, "rewards/rejected": -0.2441311478614807, "step": 640 }, { "epoch": 1.0094562647754137, "grad_norm": 0.19491668045520782, "learning_rate": 2.3201964785915953e-06, "log_odds_chosen": 0.919166088104248, "log_odds_ratio": -0.33753031492233276, "logits/chosen": -0.4605422019958496, "logits/rejected": -1.1579219102859497, "logps/chosen": -1.4780750274658203, "logps/rejected": -2.248640298843384, "loss": 1.5201, "nll_loss": 1.486314058303833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1478075087070465, "rewards/margins": 0.07705653458833694, "rewards/rejected": -0.22486402094364166, "step": 641 }, { "epoch": 1.0110323089046493, "grad_norm": 0.21980389952659607, "learning_rate": 2.3147690825724457e-06, "log_odds_chosen": 0.859315037727356, "log_odds_ratio": -0.35994282364845276, "logits/chosen": -0.4084959626197815, "logits/rejected": -1.2679862976074219, "logps/chosen": -1.5238345861434937, "logps/rejected": -2.2519776821136475, "loss": 1.5578, "nll_loss": 1.5217998027801514, "rewards/accuracies": 1.0, "rewards/chosen": -0.1523834466934204, "rewards/margins": 0.07281431555747986, "rewards/rejected": -0.22519777715206146, "step": 642 }, { "epoch": 1.012608353033885, "grad_norm": 0.19822460412979126, "learning_rate": 2.3093393086232226e-06, "log_odds_chosen": 1.149411916732788, "log_odds_ratio": -0.27911868691444397, "logits/chosen": -0.3707679808139801, "logits/rejected": -1.2237002849578857, "logps/chosen": -1.3857028484344482, "logps/rejected": -2.3470818996429443, "loss": 1.4383, "nll_loss": 1.4103491306304932, "rewards/accuracies": 1.0, "rewards/chosen": -0.13857027888298035, "rewards/margins": 0.09613790363073349, "rewards/rejected": -0.23470818996429443, "step": 643 }, { "epoch": 1.0141843971631206, "grad_norm": 0.18972162902355194, "learning_rate": 2.3039071977632748e-06, "log_odds_chosen": 1.1466765403747559, "log_odds_ratio": -0.2902736961841583, "logits/chosen": -0.46120280027389526, "logits/rejected": -1.5394573211669922, "logps/chosen": -1.4458996057510376, "logps/rejected": -2.4237475395202637, "loss": 1.4804, "nll_loss": 1.4513590335845947, "rewards/accuracies": 1.0, "rewards/chosen": -0.14458994567394257, "rewards/margins": 0.09778478741645813, "rewards/rejected": -0.2423747479915619, "step": 644 }, { "epoch": 1.0157604412923562, "grad_norm": 0.1946789175271988, "learning_rate": 2.2984727910296044e-06, "log_odds_chosen": 0.935196042060852, "log_odds_ratio": -0.336031049489975, "logits/chosen": -0.46645069122314453, "logits/rejected": -1.354770302772522, "logps/chosen": -1.506697177886963, "logps/rejected": -2.299145221710205, "loss": 1.5601, "nll_loss": 1.5265041589736938, "rewards/accuracies": 1.0, "rewards/chosen": -0.15066972374916077, "rewards/margins": 0.07924479246139526, "rewards/rejected": -0.22991451621055603, "step": 645 }, { "epoch": 1.0173364854215918, "grad_norm": 0.23612742125988007, "learning_rate": 2.2930361294765594e-06, "log_odds_chosen": 1.0463613271713257, "log_odds_ratio": -0.3042789101600647, "logits/chosen": -0.3468957543373108, "logits/rejected": -1.4972220659255981, "logps/chosen": -1.5089102983474731, "logps/rejected": -2.399858236312866, "loss": 1.5605, "nll_loss": 1.5300790071487427, "rewards/accuracies": 1.0, "rewards/chosen": -0.15089105069637299, "rewards/margins": 0.08909478038549423, "rewards/rejected": -0.23998583853244781, "step": 646 }, { "epoch": 1.0189125295508275, "grad_norm": 0.19889263808727264, "learning_rate": 2.287597254175521e-06, "log_odds_chosen": 1.190199375152588, "log_odds_ratio": -0.28450313210487366, "logits/chosen": -0.40667012333869934, "logits/rejected": -1.3844438791275024, "logps/chosen": -1.3774466514587402, "logps/rejected": -2.3757009506225586, "loss": 1.4397, "nll_loss": 1.4112755060195923, "rewards/accuracies": 1.0, "rewards/chosen": -0.13774468004703522, "rewards/margins": 0.0998254269361496, "rewards/rejected": -0.23757009208202362, "step": 647 }, { "epoch": 1.020488573680063, "grad_norm": 0.21212461590766907, "learning_rate": 2.2821562062145938e-06, "log_odds_chosen": 1.2260313034057617, "log_odds_ratio": -0.27425557374954224, "logits/chosen": -0.4070795774459839, "logits/rejected": -1.502755045890808, "logps/chosen": -1.529916763305664, "logps/rejected": -2.596616506576538, "loss": 1.543, "nll_loss": 1.5155991315841675, "rewards/accuracies": 1.0, "rewards/chosen": -0.1529916673898697, "rewards/margins": 0.10667000710964203, "rewards/rejected": -0.2596616744995117, "step": 648 }, { "epoch": 1.0220646178092987, "grad_norm": 0.22124163806438446, "learning_rate": 2.2767130266982967e-06, "log_odds_chosen": 0.9073204398155212, "log_odds_ratio": -0.35024213790893555, "logits/chosen": -0.47316858172416687, "logits/rejected": -1.1234474182128906, "logps/chosen": -1.443469762802124, "logps/rejected": -2.2021484375, "loss": 1.4911, "nll_loss": 1.4561150074005127, "rewards/accuracies": 1.0, "rewards/chosen": -0.14434699714183807, "rewards/margins": 0.07586785405874252, "rewards/rejected": -0.22021484375, "step": 649 }, { "epoch": 1.0236406619385343, "grad_norm": 0.20071031153202057, "learning_rate": 2.271267756747251e-06, "log_odds_chosen": 1.093822956085205, "log_odds_ratio": -0.2981938123703003, "logits/chosen": -0.3957046866416931, "logits/rejected": -1.3798214197158813, "logps/chosen": -1.4157538414001465, "logps/rejected": -2.3325469493865967, "loss": 1.4483, "nll_loss": 1.4184434413909912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1415753960609436, "rewards/margins": 0.09167931228876114, "rewards/rejected": -0.23325468599796295, "step": 650 }, { "epoch": 1.02521670606777, "grad_norm": 0.2593681514263153, "learning_rate": 2.265820437497871e-06, "log_odds_chosen": 0.9342566728591919, "log_odds_ratio": -0.34289321303367615, "logits/chosen": -0.3919522166252136, "logits/rejected": -1.1586806774139404, "logps/chosen": -1.410290241241455, "logps/rejected": -2.187276840209961, "loss": 1.4708, "nll_loss": 1.4364949464797974, "rewards/accuracies": 1.0, "rewards/chosen": -0.14102903008460999, "rewards/margins": 0.07769866287708282, "rewards/rejected": -0.2187276929616928, "step": 651 }, { "epoch": 1.0267927501970056, "grad_norm": 0.23078316450119019, "learning_rate": 2.26037111010205e-06, "log_odds_chosen": 0.9340367317199707, "log_odds_ratio": -0.3341951370239258, "logits/chosen": -0.4020829200744629, "logits/rejected": -1.2824766635894775, "logps/chosen": -1.405263900756836, "logps/rejected": -2.176396369934082, "loss": 1.4516, "nll_loss": 1.4181625843048096, "rewards/accuracies": 1.0, "rewards/chosen": -0.1405263990163803, "rewards/margins": 0.07711324095726013, "rewards/rejected": -0.21763963997364044, "step": 652 }, { "epoch": 1.0283687943262412, "grad_norm": 0.20919527113437653, "learning_rate": 2.254919815726856e-06, "log_odds_chosen": 0.9500594735145569, "log_odds_ratio": -0.33337053656578064, "logits/chosen": -0.45734572410583496, "logits/rejected": -1.2711719274520874, "logps/chosen": -1.4963735342025757, "logps/rejected": -2.2994837760925293, "loss": 1.5504, "nll_loss": 1.5170687437057495, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496373564004898, "rewards/margins": 0.08031102269887924, "rewards/rejected": -0.22994837164878845, "step": 653 }, { "epoch": 1.0299448384554768, "grad_norm": 0.20019294321537018, "learning_rate": 2.2494665955542127e-06, "log_odds_chosen": 1.2779302597045898, "log_odds_ratio": -0.2577243149280548, "logits/chosen": -0.5304352045059204, "logits/rejected": -1.4261908531188965, "logps/chosen": -1.406901240348816, "logps/rejected": -2.486321210861206, "loss": 1.4461, "nll_loss": 1.4202921390533447, "rewards/accuracies": 1.0, "rewards/chosen": -0.14069011807441711, "rewards/margins": 0.10794198513031006, "rewards/rejected": -0.24863210320472717, "step": 654 }, { "epoch": 1.0315208825847124, "grad_norm": 0.22600282728672028, "learning_rate": 2.2440114907805942e-06, "log_odds_chosen": 1.0152065753936768, "log_odds_ratio": -0.3147715628147125, "logits/chosen": -0.5199546217918396, "logits/rejected": -1.3554050922393799, "logps/chosen": -1.538907527923584, "logps/rejected": -2.4036970138549805, "loss": 1.5585, "nll_loss": 1.527039885520935, "rewards/accuracies": 1.0, "rewards/chosen": -0.15389074385166168, "rewards/margins": 0.08647895604372025, "rewards/rejected": -0.24036970734596252, "step": 655 }, { "epoch": 1.033096926713948, "grad_norm": 0.21135546267032623, "learning_rate": 2.2385545426167112e-06, "log_odds_chosen": 0.9998137354850769, "log_odds_ratio": -0.3207527697086334, "logits/chosen": -0.34665676951408386, "logits/rejected": -1.5091569423675537, "logps/chosen": -1.5195560455322266, "logps/rejected": -2.3657279014587402, "loss": 1.5643, "nll_loss": 1.532178282737732, "rewards/accuracies": 1.0, "rewards/chosen": -0.15195560455322266, "rewards/margins": 0.08461718261241913, "rewards/rejected": -0.2365727722644806, "step": 656 }, { "epoch": 1.0346729708431837, "grad_norm": 0.1999482959508896, "learning_rate": 2.2330957922872016e-06, "log_odds_chosen": 1.3584487438201904, "log_odds_ratio": -0.24272942543029785, "logits/chosen": -0.5136032104492188, "logits/rejected": -1.5450217723846436, "logps/chosen": -1.4356147050857544, "logps/rejected": -2.6008501052856445, "loss": 1.4744, "nll_loss": 1.4501111507415771, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435614824295044, "rewards/margins": 0.11652353405952454, "rewards/rejected": -0.26008501648902893, "step": 657 }, { "epoch": 1.0362490149724193, "grad_norm": 0.19058655202388763, "learning_rate": 2.2276352810303166e-06, "log_odds_chosen": 1.2888612747192383, "log_odds_ratio": -0.25243476033210754, "logits/chosen": -0.43797439336776733, "logits/rejected": -1.427096962928772, "logps/chosen": -1.3446942567825317, "logps/rejected": -2.425525426864624, "loss": 1.3929, "nll_loss": 1.367623209953308, "rewards/accuracies": 1.0, "rewards/chosen": -0.1344694197177887, "rewards/margins": 0.10808312147855759, "rewards/rejected": -0.24255254864692688, "step": 658 }, { "epoch": 1.037825059101655, "grad_norm": 0.20395569503307343, "learning_rate": 2.222173050097609e-06, "log_odds_chosen": 1.0824915170669556, "log_odds_ratio": -0.2948096692562103, "logits/chosen": -0.4529469609260559, "logits/rejected": -1.4607760906219482, "logps/chosen": -1.4559506177902222, "logps/rejected": -2.369581699371338, "loss": 1.5011, "nll_loss": 1.471666932106018, "rewards/accuracies": 1.0, "rewards/chosen": -0.14559505879878998, "rewards/margins": 0.09136311709880829, "rewards/rejected": -0.23695819079875946, "step": 659 }, { "epoch": 1.0394011032308905, "grad_norm": 0.21224889159202576, "learning_rate": 2.2167091407536272e-06, "log_odds_chosen": 1.037617564201355, "log_odds_ratio": -0.30816492438316345, "logits/chosen": -0.4756828248500824, "logits/rejected": -1.2282439470291138, "logps/chosen": -1.4746648073196411, "logps/rejected": -2.348268508911133, "loss": 1.5271, "nll_loss": 1.4963159561157227, "rewards/accuracies": 1.0, "rewards/chosen": -0.1474664807319641, "rewards/margins": 0.08736037462949753, "rewards/rejected": -0.23482683300971985, "step": 660 }, { "epoch": 1.0409771473601261, "grad_norm": 0.20707234740257263, "learning_rate": 2.211243594275594e-06, "log_odds_chosen": 1.0012534856796265, "log_odds_ratio": -0.3229444622993469, "logits/chosen": -0.47621166706085205, "logits/rejected": -1.1613657474517822, "logps/chosen": -1.496647596359253, "logps/rejected": -2.3439135551452637, "loss": 1.5196, "nll_loss": 1.4873143434524536, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496647596359253, "rewards/margins": 0.08472661674022675, "rewards/rejected": -0.23439136147499084, "step": 661 }, { "epoch": 1.0425531914893618, "grad_norm": 0.2119607776403427, "learning_rate": 2.2057764519531034e-06, "log_odds_chosen": 1.0933870077133179, "log_odds_ratio": -0.2962178587913513, "logits/chosen": -0.4307108223438263, "logits/rejected": -1.3217809200286865, "logps/chosen": -1.4943647384643555, "logps/rejected": -2.426520347595215, "loss": 1.5165, "nll_loss": 1.4868603944778442, "rewards/accuracies": 1.0, "rewards/chosen": -0.14943647384643555, "rewards/margins": 0.09321555495262146, "rewards/rejected": -0.242652028799057, "step": 662 }, { "epoch": 1.0441292356185974, "grad_norm": 0.2170783281326294, "learning_rate": 2.2003077550878053e-06, "log_odds_chosen": 1.1991064548492432, "log_odds_ratio": -0.26581087708473206, "logits/chosen": -0.44469496607780457, "logits/rejected": -1.3662856817245483, "logps/chosen": -1.4822015762329102, "logps/rejected": -2.5079472064971924, "loss": 1.5001, "nll_loss": 1.473563313484192, "rewards/accuracies": 1.0, "rewards/chosen": -0.14822015166282654, "rewards/margins": 0.10257457941770554, "rewards/rejected": -0.25079473853111267, "step": 663 }, { "epoch": 1.045705279747833, "grad_norm": 0.21621710062026978, "learning_rate": 2.1948375449930915e-06, "log_odds_chosen": 1.0592302083969116, "log_odds_ratio": -0.3070540130138397, "logits/chosen": -0.4349663257598877, "logits/rejected": -1.4020671844482422, "logps/chosen": -1.5268006324768066, "logps/rejected": -2.436373233795166, "loss": 1.5509, "nll_loss": 1.5201623439788818, "rewards/accuracies": 1.0, "rewards/chosen": -0.15268008410930634, "rewards/margins": 0.09095728397369385, "rewards/rejected": -0.24363736808300018, "step": 664 }, { "epoch": 1.0472813238770686, "grad_norm": 0.20191088318824768, "learning_rate": 2.189365862993787e-06, "log_odds_chosen": 1.1319061517715454, "log_odds_ratio": -0.28574445843696594, "logits/chosen": -0.3920745551586151, "logits/rejected": -1.3230290412902832, "logps/chosen": -1.5087662935256958, "logps/rejected": -2.4781603813171387, "loss": 1.5468, "nll_loss": 1.5181792974472046, "rewards/accuracies": 1.0, "rewards/chosen": -0.15087662637233734, "rewards/margins": 0.09693944454193115, "rewards/rejected": -0.2478160858154297, "step": 665 }, { "epoch": 1.0488573680063042, "grad_norm": 0.2054862082004547, "learning_rate": 2.1838927504258354e-06, "log_odds_chosen": 1.120408296585083, "log_odds_ratio": -0.28678375482559204, "logits/chosen": -0.4047078788280487, "logits/rejected": -1.3636611700057983, "logps/chosen": -1.4629921913146973, "logps/rejected": -2.413411855697632, "loss": 1.495, "nll_loss": 1.4663009643554688, "rewards/accuracies": 1.0, "rewards/chosen": -0.14629922807216644, "rewards/margins": 0.09504196047782898, "rewards/rejected": -0.24134118854999542, "step": 666 }, { "epoch": 1.0504334121355399, "grad_norm": 0.22820371389389038, "learning_rate": 2.178418248635988e-06, "log_odds_chosen": 1.0012179613113403, "log_odds_ratio": -0.3233652412891388, "logits/chosen": -0.4469676613807678, "logits/rejected": -1.095099687576294, "logps/chosen": -1.534761667251587, "logps/rejected": -2.3932883739471436, "loss": 1.5999, "nll_loss": 1.5675679445266724, "rewards/accuracies": 1.0, "rewards/chosen": -0.15347616374492645, "rewards/margins": 0.08585266023874283, "rewards/rejected": -0.23932884633541107, "step": 667 }, { "epoch": 1.0520094562647755, "grad_norm": 0.20692212879657745, "learning_rate": 2.1729423989814912e-06, "log_odds_chosen": 0.9865133762359619, "log_odds_ratio": -0.323873370885849, "logits/chosen": -0.4800672233104706, "logits/rejected": -1.2725329399108887, "logps/chosen": -1.4586519002914429, "logps/rejected": -2.289828300476074, "loss": 1.4836, "nll_loss": 1.4511914253234863, "rewards/accuracies": 1.0, "rewards/chosen": -0.14586518704891205, "rewards/margins": 0.08311763405799866, "rewards/rejected": -0.2289828509092331, "step": 668 }, { "epoch": 1.053585500394011, "grad_norm": 0.2024935632944107, "learning_rate": 2.167465242829774e-06, "log_odds_chosen": 1.0498971939086914, "log_odds_ratio": -0.3074461817741394, "logits/chosen": -0.49104076623916626, "logits/rejected": -1.2772282361984253, "logps/chosen": -1.5097532272338867, "logps/rejected": -2.407884120941162, "loss": 1.5443, "nll_loss": 1.5135592222213745, "rewards/accuracies": 1.0, "rewards/chosen": -0.1509753167629242, "rewards/margins": 0.08981308341026306, "rewards/rejected": -0.24078840017318726, "step": 669 }, { "epoch": 1.0551615445232467, "grad_norm": 0.20358148217201233, "learning_rate": 2.1619868215581343e-06, "log_odds_chosen": 1.1426955461502075, "log_odds_ratio": -0.2820016145706177, "logits/chosen": -0.4818193316459656, "logits/rejected": -1.3105734586715698, "logps/chosen": -1.4363994598388672, "logps/rejected": -2.3980278968811035, "loss": 1.4916, "nll_loss": 1.4634190797805786, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436399668455124, "rewards/margins": 0.0961628332734108, "rewards/rejected": -0.2398027926683426, "step": 670 }, { "epoch": 1.0567375886524824, "grad_norm": 0.20384307205677032, "learning_rate": 2.1565071765534287e-06, "log_odds_chosen": 1.0277100801467896, "log_odds_ratio": -0.31216877698898315, "logits/chosen": -0.4087562561035156, "logits/rejected": -1.33811616897583, "logps/chosen": -1.5160971879959106, "logps/rejected": -2.3919973373413086, "loss": 1.5464, "nll_loss": 1.5151844024658203, "rewards/accuracies": 1.0, "rewards/chosen": -0.15160971879959106, "rewards/margins": 0.08759000897407532, "rewards/rejected": -0.2391997128725052, "step": 671 }, { "epoch": 1.058313632781718, "grad_norm": 0.20280694961547852, "learning_rate": 2.1510263492117574e-06, "log_odds_chosen": 0.8706097602844238, "log_odds_ratio": -0.35547569394111633, "logits/chosen": -0.4099719822406769, "logits/rejected": -1.165898084640503, "logps/chosen": -1.5018154382705688, "logps/rejected": -2.2361526489257812, "loss": 1.5544, "nll_loss": 1.5188325643539429, "rewards/accuracies": 1.0, "rewards/chosen": -0.15018156170845032, "rewards/margins": 0.07343369722366333, "rewards/rejected": -0.22361525893211365, "step": 672 }, { "epoch": 1.0598896769109536, "grad_norm": 0.24474608898162842, "learning_rate": 2.1455443809381535e-06, "log_odds_chosen": 1.22650146484375, "log_odds_ratio": -0.26890575885772705, "logits/chosen": -0.49158066511154175, "logits/rejected": -1.5457218885421753, "logps/chosen": -1.4304301738739014, "logps/rejected": -2.4712085723876953, "loss": 1.4672, "nll_loss": 1.4402765035629272, "rewards/accuracies": 1.0, "rewards/chosen": -0.14304301142692566, "rewards/margins": 0.10407783836126328, "rewards/rejected": -0.24712085723876953, "step": 673 }, { "epoch": 1.0614657210401892, "grad_norm": 0.21208275854587555, "learning_rate": 2.1400613131462697e-06, "log_odds_chosen": 1.1482954025268555, "log_odds_ratio": -0.281281441450119, "logits/chosen": -0.41779255867004395, "logits/rejected": -1.416609764099121, "logps/chosen": -1.5766055583953857, "logps/rejected": -2.5737593173980713, "loss": 1.5968, "nll_loss": 1.5687103271484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.157660573720932, "rewards/margins": 0.09971538186073303, "rewards/rejected": -0.25737592577934265, "step": 674 }, { "epoch": 1.0630417651694248, "grad_norm": 0.19848236441612244, "learning_rate": 2.1345771872580628e-06, "log_odds_chosen": 1.1715642213821411, "log_odds_ratio": -0.2934086322784424, "logits/chosen": -0.45807644724845886, "logits/rejected": -1.4059028625488281, "logps/chosen": -1.484837532043457, "logps/rejected": -2.4954147338867188, "loss": 1.5094, "nll_loss": 1.4800400733947754, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484837383031845, "rewards/margins": 0.1010577380657196, "rewards/rejected": -0.2495414763689041, "step": 675 }, { "epoch": 1.0646178092986605, "grad_norm": 0.19303160905838013, "learning_rate": 2.1290920447034846e-06, "log_odds_chosen": 1.0953112840652466, "log_odds_ratio": -0.2924834191799164, "logits/chosen": -0.4072478115558624, "logits/rejected": -1.357849359512329, "logps/chosen": -1.461911916732788, "logps/rejected": -2.386472225189209, "loss": 1.4992, "nll_loss": 1.4699064493179321, "rewards/accuracies": 1.0, "rewards/chosen": -0.14619119465351105, "rewards/margins": 0.09245604276657104, "rewards/rejected": -0.2386472523212433, "step": 676 }, { "epoch": 1.066193853427896, "grad_norm": 0.22716405987739563, "learning_rate": 2.1236059269201683e-06, "log_odds_chosen": 0.9468636512756348, "log_odds_ratio": -0.334963858127594, "logits/chosen": -0.5059394240379333, "logits/rejected": -1.2632321119308472, "logps/chosen": -1.4986002445220947, "logps/rejected": -2.3012125492095947, "loss": 1.5213, "nll_loss": 1.4878305196762085, "rewards/accuracies": 1.0, "rewards/chosen": -0.14986002445220947, "rewards/margins": 0.0802612453699112, "rewards/rejected": -0.23012126982212067, "step": 677 }, { "epoch": 1.0677698975571317, "grad_norm": 0.25305354595184326, "learning_rate": 2.1181188753531124e-06, "log_odds_chosen": 1.0967220067977905, "log_odds_ratio": -0.29285427927970886, "logits/chosen": -0.5359491109848022, "logits/rejected": -1.3216474056243896, "logps/chosen": -1.632015585899353, "logps/rejected": -2.5898711681365967, "loss": 1.6498, "nll_loss": 1.6205224990844727, "rewards/accuracies": 1.0, "rewards/chosen": -0.16320157051086426, "rewards/margins": 0.09578555822372437, "rewards/rejected": -0.2589871287345886, "step": 678 }, { "epoch": 1.0693459416863673, "grad_norm": 0.20264583826065063, "learning_rate": 2.112630931454371e-06, "log_odds_chosen": 1.1878125667572021, "log_odds_ratio": -0.272763192653656, "logits/chosen": -0.4376254975795746, "logits/rejected": -1.2670501470565796, "logps/chosen": -1.4005696773529053, "logps/rejected": -2.396745204925537, "loss": 1.4444, "nll_loss": 1.417091965675354, "rewards/accuracies": 1.0, "rewards/chosen": -0.14005698263645172, "rewards/margins": 0.09961752593517303, "rewards/rejected": -0.23967449367046356, "step": 679 }, { "epoch": 1.070921985815603, "grad_norm": 0.19328324496746063, "learning_rate": 2.10714213668274e-06, "log_odds_chosen": 1.0389231443405151, "log_odds_ratio": -0.3068993389606476, "logits/chosen": -0.4587363302707672, "logits/rejected": -1.2263991832733154, "logps/chosen": -1.448409914970398, "logps/rejected": -2.3234221935272217, "loss": 1.4967, "nll_loss": 1.466004490852356, "rewards/accuracies": 1.0, "rewards/chosen": -0.14484098553657532, "rewards/margins": 0.08750123530626297, "rewards/rejected": -0.23234222829341888, "step": 680 }, { "epoch": 1.0724980299448386, "grad_norm": 0.21751874685287476, "learning_rate": 2.1016525325034403e-06, "log_odds_chosen": 1.3174195289611816, "log_odds_ratio": -0.2651524543762207, "logits/chosen": -0.45115169882774353, "logits/rejected": -1.3691192865371704, "logps/chosen": -1.4566643238067627, "logps/rejected": -2.591353416442871, "loss": 1.4862, "nll_loss": 1.4596649408340454, "rewards/accuracies": 1.0, "rewards/chosen": -0.1456664353609085, "rewards/margins": 0.11346893012523651, "rewards/rejected": -0.259135365486145, "step": 681 }, { "epoch": 1.074074074074074, "grad_norm": 0.21977226436138153, "learning_rate": 2.096162160387811e-06, "log_odds_chosen": 0.9342979192733765, "log_odds_ratio": -0.33705762028694153, "logits/chosen": -0.4070799648761749, "logits/rejected": -1.302762746810913, "logps/chosen": -1.393695592880249, "logps/rejected": -2.1565520763397217, "loss": 1.4537, "nll_loss": 1.4199846982955933, "rewards/accuracies": 1.0, "rewards/chosen": -0.13936956226825714, "rewards/margins": 0.07628564536571503, "rewards/rejected": -0.21565520763397217, "step": 682 }, { "epoch": 1.0756501182033098, "grad_norm": 0.18787585198879242, "learning_rate": 2.09067106181299e-06, "log_odds_chosen": 1.1954203844070435, "log_odds_ratio": -0.2733722925186157, "logits/chosen": -0.4470306634902954, "logits/rejected": -1.3488725423812866, "logps/chosen": -1.3385844230651855, "logps/rejected": -2.3301005363464355, "loss": 1.384, "nll_loss": 1.3566768169403076, "rewards/accuracies": 1.0, "rewards/chosen": -0.13385844230651855, "rewards/margins": 0.0991516038775444, "rewards/rejected": -0.23301005363464355, "step": 683 }, { "epoch": 1.0772261623325452, "grad_norm": 0.21121583878993988, "learning_rate": 2.0851792782616052e-06, "log_odds_chosen": 1.3500101566314697, "log_odds_ratio": -0.24266520142555237, "logits/chosen": -0.3705444931983948, "logits/rejected": -1.5028289556503296, "logps/chosen": -1.4343652725219727, "logps/rejected": -2.5927631855010986, "loss": 1.4733, "nll_loss": 1.4490647315979004, "rewards/accuracies": 1.0, "rewards/chosen": -0.14343653619289398, "rewards/margins": 0.11583980917930603, "rewards/rejected": -0.2592763304710388, "step": 684 }, { "epoch": 1.078802206461781, "grad_norm": 0.20326411724090576, "learning_rate": 2.0796868512214576e-06, "log_odds_chosen": 1.1894053220748901, "log_odds_ratio": -0.2836572229862213, "logits/chosen": -0.5265865325927734, "logits/rejected": -1.2781651020050049, "logps/chosen": -1.4270437955856323, "logps/rejected": -2.43418288230896, "loss": 1.4809, "nll_loss": 1.4525479078292847, "rewards/accuracies": 1.0, "rewards/chosen": -0.14270438253879547, "rewards/margins": 0.10071390867233276, "rewards/rejected": -0.24341829121112823, "step": 685 }, { "epoch": 1.0803782505910164, "grad_norm": 0.1927499771118164, "learning_rate": 2.0741938221852103e-06, "log_odds_chosen": 1.1493513584136963, "log_odds_ratio": -0.28375622630119324, "logits/chosen": -0.4246770143508911, "logits/rejected": -1.3402469158172607, "logps/chosen": -1.4302904605865479, "logps/rejected": -2.40423583984375, "loss": 1.4764, "nll_loss": 1.4480384588241577, "rewards/accuracies": 1.0, "rewards/chosen": -0.14302903413772583, "rewards/margins": 0.09739455580711365, "rewards/rejected": -0.24042358994483948, "step": 686 }, { "epoch": 1.081954294720252, "grad_norm": 0.2168681025505066, "learning_rate": 2.0687002326500743e-06, "log_odds_chosen": 1.2200334072113037, "log_odds_ratio": -0.26408666372299194, "logits/chosen": -0.5207791328430176, "logits/rejected": -1.4260272979736328, "logps/chosen": -1.4295238256454468, "logps/rejected": -2.4644484519958496, "loss": 1.4635, "nll_loss": 1.437109112739563, "rewards/accuracies": 1.0, "rewards/chosen": -0.14295236766338348, "rewards/margins": 0.10349246859550476, "rewards/rejected": -0.24644485116004944, "step": 687 }, { "epoch": 1.0835303388494877, "grad_norm": 0.2626137137413025, "learning_rate": 2.0632061241174942e-06, "log_odds_chosen": 1.1877446174621582, "log_odds_ratio": -0.28063517808914185, "logits/chosen": -0.5086207389831543, "logits/rejected": -1.3838472366333008, "logps/chosen": -1.4812184572219849, "logps/rejected": -2.4984750747680664, "loss": 1.5082, "nll_loss": 1.48015296459198, "rewards/accuracies": 1.0, "rewards/chosen": -0.14812184870243073, "rewards/margins": 0.10172563791275024, "rewards/rejected": -0.24984750151634216, "step": 688 }, { "epoch": 1.0851063829787233, "grad_norm": 0.2078801542520523, "learning_rate": 2.0577115380928364e-06, "log_odds_chosen": 1.1658936738967896, "log_odds_ratio": -0.28187406063079834, "logits/chosen": -0.46922481060028076, "logits/rejected": -1.3566447496414185, "logps/chosen": -1.4518340826034546, "logps/rejected": -2.446504831314087, "loss": 1.4766, "nll_loss": 1.448391318321228, "rewards/accuracies": 1.0, "rewards/chosen": -0.14518341422080994, "rewards/margins": 0.09946707636117935, "rewards/rejected": -0.24465049803256989, "step": 689 }, { "epoch": 1.086682427107959, "grad_norm": 0.2070070058107376, "learning_rate": 2.052216516085073e-06, "log_odds_chosen": 0.9943616390228271, "log_odds_ratio": -0.33048465847969055, "logits/chosen": -0.4671243727207184, "logits/rejected": -1.3567644357681274, "logps/chosen": -1.4065361022949219, "logps/rejected": -2.240896224975586, "loss": 1.4587, "nll_loss": 1.4256466627120972, "rewards/accuracies": 1.0, "rewards/chosen": -0.1406536102294922, "rewards/margins": 0.083436019718647, "rewards/rejected": -0.2240896373987198, "step": 690 }, { "epoch": 1.0882584712371945, "grad_norm": 0.2293894737958908, "learning_rate": 2.0467210996064707e-06, "log_odds_chosen": 0.9658500552177429, "log_odds_ratio": -0.3337486684322357, "logits/chosen": -0.5552599430084229, "logits/rejected": -0.977747917175293, "logps/chosen": -1.462015151977539, "logps/rejected": -2.276854991912842, "loss": 1.4938, "nll_loss": 1.4604203701019287, "rewards/accuracies": 1.0, "rewards/chosen": -0.14620152115821838, "rewards/margins": 0.08148398995399475, "rewards/rejected": -0.22768549621105194, "step": 691 }, { "epoch": 1.0898345153664302, "grad_norm": 0.1888820230960846, "learning_rate": 2.0412253301722774e-06, "log_odds_chosen": 1.2150609493255615, "log_odds_ratio": -0.2623293995857239, "logits/chosen": -0.43161195516586304, "logits/rejected": -1.4845757484436035, "logps/chosen": -1.512966275215149, "logps/rejected": -2.5592939853668213, "loss": 1.5458, "nll_loss": 1.519562840461731, "rewards/accuracies": 1.0, "rewards/chosen": -0.15129663050174713, "rewards/margins": 0.10463276505470276, "rewards/rejected": -0.2559293806552887, "step": 692 }, { "epoch": 1.0914105594956658, "grad_norm": 0.21559658646583557, "learning_rate": 2.0357292493004044e-06, "log_odds_chosen": 1.1779356002807617, "log_odds_ratio": -0.2732158601284027, "logits/chosen": -0.4328814446926117, "logits/rejected": -1.3747599124908447, "logps/chosen": -1.4345402717590332, "logps/rejected": -2.4330968856811523, "loss": 1.456, "nll_loss": 1.4286876916885376, "rewards/accuracies": 1.0, "rewards/chosen": -0.14345404505729675, "rewards/margins": 0.09985566139221191, "rewards/rejected": -0.24330970644950867, "step": 693 }, { "epoch": 1.0929866036249014, "grad_norm": 0.19508136808872223, "learning_rate": 2.0302328985111193e-06, "log_odds_chosen": 1.24747633934021, "log_odds_ratio": -0.27322325110435486, "logits/chosen": -0.37207263708114624, "logits/rejected": -1.3356413841247559, "logps/chosen": -1.3938806056976318, "logps/rejected": -2.4547274112701416, "loss": 1.4437, "nll_loss": 1.4163326025009155, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393880695104599, "rewards/margins": 0.1060846745967865, "rewards/rejected": -0.2454727590084076, "step": 694 }, { "epoch": 1.094562647754137, "grad_norm": 0.22678373754024506, "learning_rate": 2.0247363193267256e-06, "log_odds_chosen": 1.120569109916687, "log_odds_ratio": -0.28781750798225403, "logits/chosen": -0.38653212785720825, "logits/rejected": -1.3823848962783813, "logps/chosen": -1.4960999488830566, "logps/rejected": -2.4544363021850586, "loss": 1.5224, "nll_loss": 1.4936158657073975, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496100127696991, "rewards/margins": 0.0958336591720581, "rewards/rejected": -0.2454436719417572, "step": 695 }, { "epoch": 1.0961386918833727, "grad_norm": 0.22228041291236877, "learning_rate": 2.019239553271255e-06, "log_odds_chosen": 1.2738690376281738, "log_odds_ratio": -0.2555113732814789, "logits/chosen": -0.46568235754966736, "logits/rejected": -1.5240552425384521, "logps/chosen": -1.44100821018219, "logps/rejected": -2.527820587158203, "loss": 1.4779, "nll_loss": 1.4523212909698486, "rewards/accuracies": 1.0, "rewards/chosen": -0.1441008299589157, "rewards/margins": 0.10868123918771744, "rewards/rejected": -0.25278207659721375, "step": 696 }, { "epoch": 1.0977147360126083, "grad_norm": 0.20070266723632812, "learning_rate": 2.0137426418701488e-06, "log_odds_chosen": 1.274893045425415, "log_odds_ratio": -0.25972452759742737, "logits/chosen": -0.42226606607437134, "logits/rejected": -1.3668261766433716, "logps/chosen": -1.4452632665634155, "logps/rejected": -2.54134202003479, "loss": 1.4818, "nll_loss": 1.4557902812957764, "rewards/accuracies": 1.0, "rewards/chosen": -0.14452631771564484, "rewards/margins": 0.10960787534713745, "rewards/rejected": -0.2541341781616211, "step": 697 }, { "epoch": 1.099290780141844, "grad_norm": 0.19114573299884796, "learning_rate": 2.008245626649947e-06, "log_odds_chosen": 1.2128827571868896, "log_odds_ratio": -0.28301626443862915, "logits/chosen": -0.4753851592540741, "logits/rejected": -1.6074962615966797, "logps/chosen": -1.3890125751495361, "logps/rejected": -2.417355537414551, "loss": 1.4411, "nll_loss": 1.412771463394165, "rewards/accuracies": 1.0, "rewards/chosen": -0.1389012634754181, "rewards/margins": 0.10283426940441132, "rewards/rejected": -0.2417355477809906, "step": 698 }, { "epoch": 1.1008668242710795, "grad_norm": 0.24698396027088165, "learning_rate": 2.0027485491379746e-06, "log_odds_chosen": 1.299547791481018, "log_odds_ratio": -0.252640038728714, "logits/chosen": -0.6217135190963745, "logits/rejected": -1.2584782838821411, "logps/chosen": -1.5652302503585815, "logps/rejected": -2.6947779655456543, "loss": 1.5874, "nll_loss": 1.5620999336242676, "rewards/accuracies": 1.0, "rewards/chosen": -0.15652303397655487, "rewards/margins": 0.1129547655582428, "rewards/rejected": -0.2694777846336365, "step": 699 }, { "epoch": 1.1024428684003151, "grad_norm": 0.22090387344360352, "learning_rate": 1.9972514508620256e-06, "log_odds_chosen": 1.1659311056137085, "log_odds_ratio": -0.28398028016090393, "logits/chosen": -0.45424994826316833, "logits/rejected": -1.3774477243423462, "logps/chosen": -1.4243271350860596, "logps/rejected": -2.4104866981506348, "loss": 1.4658, "nll_loss": 1.4373921155929565, "rewards/accuracies": 1.0, "rewards/chosen": -0.14243271946907043, "rewards/margins": 0.09861597418785095, "rewards/rejected": -0.241048663854599, "step": 700 }, { "epoch": 1.1040189125295508, "grad_norm": 0.20541685819625854, "learning_rate": 1.991754373350053e-06, "log_odds_chosen": 0.9334115386009216, "log_odds_ratio": -0.340568870306015, "logits/chosen": -0.4905482828617096, "logits/rejected": -1.374817967414856, "logps/chosen": -1.484605312347412, "logps/rejected": -2.271920680999756, "loss": 1.529, "nll_loss": 1.4949533939361572, "rewards/accuracies": 1.0, "rewards/chosen": -0.14846055209636688, "rewards/margins": 0.07873153686523438, "rewards/rejected": -0.22719207406044006, "step": 701 }, { "epoch": 1.1055949566587864, "grad_norm": 0.19397854804992676, "learning_rate": 1.986257358129852e-06, "log_odds_chosen": 1.1067744493484497, "log_odds_ratio": -0.2919045388698578, "logits/chosen": -0.47195306420326233, "logits/rejected": -1.3318650722503662, "logps/chosen": -1.382297396659851, "logps/rejected": -2.2992775440216064, "loss": 1.4258, "nll_loss": 1.3965654373168945, "rewards/accuracies": 1.0, "rewards/chosen": -0.13822975754737854, "rewards/margins": 0.09169799834489822, "rewards/rejected": -0.22992774844169617, "step": 702 }, { "epoch": 1.107171000788022, "grad_norm": 0.2922585606575012, "learning_rate": 1.9807604467287453e-06, "log_odds_chosen": 1.085465908050537, "log_odds_ratio": -0.2986772060394287, "logits/chosen": -0.5647338628768921, "logits/rejected": -1.22553551197052, "logps/chosen": -1.4989755153656006, "logps/rejected": -2.421286106109619, "loss": 1.5251, "nll_loss": 1.4952261447906494, "rewards/accuracies": 1.0, "rewards/chosen": -0.14989756047725677, "rewards/margins": 0.09223109483718872, "rewards/rejected": -0.2421286404132843, "step": 703 }, { "epoch": 1.1087470449172576, "grad_norm": 0.23126116394996643, "learning_rate": 1.9752636806732742e-06, "log_odds_chosen": 1.1335054636001587, "log_odds_ratio": -0.28281736373901367, "logits/chosen": -0.41041299700737, "logits/rejected": -1.4300845861434937, "logps/chosen": -1.422570824623108, "logps/rejected": -2.3780839443206787, "loss": 1.4513, "nll_loss": 1.4230315685272217, "rewards/accuracies": 1.0, "rewards/chosen": -0.14225709438323975, "rewards/margins": 0.09555128961801529, "rewards/rejected": -0.23780837655067444, "step": 704 }, { "epoch": 1.1103230890464932, "grad_norm": 0.21834450960159302, "learning_rate": 1.9697671014888805e-06, "log_odds_chosen": 1.173021912574768, "log_odds_ratio": -0.2767985463142395, "logits/chosen": -0.5870257019996643, "logits/rejected": -1.2767181396484375, "logps/chosen": -1.4964110851287842, "logps/rejected": -2.500709056854248, "loss": 1.5257, "nll_loss": 1.497986912727356, "rewards/accuracies": 1.0, "rewards/chosen": -0.14964111149311066, "rewards/margins": 0.10042980313301086, "rewards/rejected": -0.2500708997249603, "step": 705 }, { "epoch": 1.1118991331757289, "grad_norm": 0.20281967520713806, "learning_rate": 1.9642707506995954e-06, "log_odds_chosen": 1.3563827276229858, "log_odds_ratio": -0.23615798354148865, "logits/chosen": -0.509278416633606, "logits/rejected": -1.42978835105896, "logps/chosen": -1.4364010095596313, "logps/rejected": -2.599912166595459, "loss": 1.4685, "nll_loss": 1.4448447227478027, "rewards/accuracies": 1.0, "rewards/chosen": -0.14364011585712433, "rewards/margins": 0.11635109782218933, "rewards/rejected": -0.25999119877815247, "step": 706 }, { "epoch": 1.1134751773049645, "grad_norm": 0.19448623061180115, "learning_rate": 1.9587746698277232e-06, "log_odds_chosen": 1.0534982681274414, "log_odds_ratio": -0.3130910098552704, "logits/chosen": -0.5047029852867126, "logits/rejected": -1.230925440788269, "logps/chosen": -1.4441640377044678, "logps/rejected": -2.335139274597168, "loss": 1.4782, "nll_loss": 1.4469194412231445, "rewards/accuracies": 1.0, "rewards/chosen": -0.1444164216518402, "rewards/margins": 0.0890975221991539, "rewards/rejected": -0.23351392149925232, "step": 707 }, { "epoch": 1.1150512214342, "grad_norm": 0.23463737964630127, "learning_rate": 1.953278900393529e-06, "log_odds_chosen": 1.1329559087753296, "log_odds_ratio": -0.294190376996994, "logits/chosen": -0.5475265979766846, "logits/rejected": -1.2021700143814087, "logps/chosen": -1.487652063369751, "logps/rejected": -2.456540584564209, "loss": 1.4983, "nll_loss": 1.4689185619354248, "rewards/accuracies": 1.0, "rewards/chosen": -0.1487652063369751, "rewards/margins": 0.09688883274793625, "rewards/rejected": -0.24565403163433075, "step": 708 }, { "epoch": 1.1166272655634357, "grad_norm": 0.20823369920253754, "learning_rate": 1.9477834839149274e-06, "log_odds_chosen": 1.1705896854400635, "log_odds_ratio": -0.27801841497421265, "logits/chosen": -0.5280231237411499, "logits/rejected": -1.339627742767334, "logps/chosen": -1.3890221118927002, "logps/rejected": -2.372847557067871, "loss": 1.4468, "nll_loss": 1.4190031290054321, "rewards/accuracies": 1.0, "rewards/chosen": -0.1389022022485733, "rewards/margins": 0.09838256239891052, "rewards/rejected": -0.23728476464748383, "step": 709 }, { "epoch": 1.1182033096926713, "grad_norm": 0.2220274657011032, "learning_rate": 1.942288461907164e-06, "log_odds_chosen": 1.2102402448654175, "log_odds_ratio": -0.27753397822380066, "logits/chosen": -0.5286645889282227, "logits/rejected": -1.386061429977417, "logps/chosen": -1.3907581567764282, "logps/rejected": -2.4172701835632324, "loss": 1.4442, "nll_loss": 1.4164468050003052, "rewards/accuracies": 1.0, "rewards/chosen": -0.13907580077648163, "rewards/margins": 0.1026512086391449, "rewards/rejected": -0.24172700941562653, "step": 710 }, { "epoch": 1.119779353821907, "grad_norm": 0.21669377386569977, "learning_rate": 1.936793875882505e-06, "log_odds_chosen": 1.1954429149627686, "log_odds_ratio": -0.2695174217224121, "logits/chosen": -0.34124839305877686, "logits/rejected": -1.3979601860046387, "logps/chosen": -1.5485085248947144, "logps/rejected": -2.5840556621551514, "loss": 1.5938, "nll_loss": 1.5668448209762573, "rewards/accuracies": 1.0, "rewards/chosen": -0.15485085546970367, "rewards/margins": 0.10355471074581146, "rewards/rejected": -0.25840556621551514, "step": 711 }, { "epoch": 1.1213553979511426, "grad_norm": 0.21096524596214294, "learning_rate": 1.931299767349926e-06, "log_odds_chosen": 1.2031110525131226, "log_odds_ratio": -0.27347713708877563, "logits/chosen": -0.5242701172828674, "logits/rejected": -1.194074034690857, "logps/chosen": -1.378991723060608, "logps/rejected": -2.381162643432617, "loss": 1.4227, "nll_loss": 1.3953834772109985, "rewards/accuracies": 1.0, "rewards/chosen": -0.13789916038513184, "rewards/margins": 0.10021708905696869, "rewards/rejected": -0.23811623454093933, "step": 712 }, { "epoch": 1.1229314420803782, "grad_norm": 0.23077386617660522, "learning_rate": 1.9258061778147895e-06, "log_odds_chosen": 1.1153610944747925, "log_odds_ratio": -0.29129940271377563, "logits/chosen": -0.5413553714752197, "logits/rejected": -1.4865033626556396, "logps/chosen": -1.4882041215896606, "logps/rejected": -2.4407083988189697, "loss": 1.5247, "nll_loss": 1.4956085681915283, "rewards/accuracies": 1.0, "rewards/chosen": -0.1488204151391983, "rewards/margins": 0.09525042027235031, "rewards/rejected": -0.24407082796096802, "step": 713 }, { "epoch": 1.1245074862096138, "grad_norm": 0.2107950747013092, "learning_rate": 1.9203131487785426e-06, "log_odds_chosen": 1.3729069232940674, "log_odds_ratio": -0.23002268373966217, "logits/chosen": -0.6425501108169556, "logits/rejected": -1.2580373287200928, "logps/chosen": -1.3680059909820557, "logps/rejected": -2.529069423675537, "loss": 1.4156, "nll_loss": 1.392630696296692, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368006020784378, "rewards/margins": 0.11610635370016098, "rewards/rejected": -0.2529069483280182, "step": 714 }, { "epoch": 1.1260835303388494, "grad_norm": 0.21586111187934875, "learning_rate": 1.9148207217383946e-06, "log_odds_chosen": 1.5254570245742798, "log_odds_ratio": -0.2026120126247406, "logits/chosen": -0.4808170795440674, "logits/rejected": -1.5110218524932861, "logps/chosen": -1.4799153804779053, "logps/rejected": -2.810546398162842, "loss": 1.4902, "nll_loss": 1.469893217086792, "rewards/accuracies": 1.0, "rewards/chosen": -0.14799155294895172, "rewards/margins": 0.13306309282779694, "rewards/rejected": -0.28105467557907104, "step": 715 }, { "epoch": 1.127659574468085, "grad_norm": 0.2213343381881714, "learning_rate": 1.9093289381870094e-06, "log_odds_chosen": 1.044459581375122, "log_odds_ratio": -0.307096391916275, "logits/chosen": -0.5094771385192871, "logits/rejected": -1.4036896228790283, "logps/chosen": -1.473573088645935, "logps/rejected": -2.359524726867676, "loss": 1.5123, "nll_loss": 1.4815442562103271, "rewards/accuracies": 1.0, "rewards/chosen": -0.14735731482505798, "rewards/margins": 0.08859515190124512, "rewards/rejected": -0.2359524816274643, "step": 716 }, { "epoch": 1.1292356185973207, "grad_norm": 0.21230585873126984, "learning_rate": 1.9038378396121895e-06, "log_odds_chosen": 1.2633765935897827, "log_odds_ratio": -0.25486746430397034, "logits/chosen": -0.3245435953140259, "logits/rejected": -1.5540175437927246, "logps/chosen": -1.5153406858444214, "logps/rejected": -2.610530138015747, "loss": 1.5438, "nll_loss": 1.518311858177185, "rewards/accuracies": 1.0, "rewards/chosen": -0.1515340805053711, "rewards/margins": 0.10951897501945496, "rewards/rejected": -0.26105305552482605, "step": 717 }, { "epoch": 1.1308116627265563, "grad_norm": 0.1942850798368454, "learning_rate": 1.8983474674965597e-06, "log_odds_chosen": 1.1623995304107666, "log_odds_ratio": -0.28777241706848145, "logits/chosen": -0.49636614322662354, "logits/rejected": -1.5351665019989014, "logps/chosen": -1.449415683746338, "logps/rejected": -2.436007499694824, "loss": 1.4854, "nll_loss": 1.4566153287887573, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449415683746338, "rewards/margins": 0.09865917265415192, "rewards/rejected": -0.2436007559299469, "step": 718 }, { "epoch": 1.132387706855792, "grad_norm": 0.21141377091407776, "learning_rate": 1.8928578633172603e-06, "log_odds_chosen": 1.3506557941436768, "log_odds_ratio": -0.2339305281639099, "logits/chosen": -0.4822527766227722, "logits/rejected": -1.5536284446716309, "logps/chosen": -1.4843451976776123, "logps/rejected": -2.6501717567443848, "loss": 1.5204, "nll_loss": 1.4970332384109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.14843453466892242, "rewards/margins": 0.11658263951539993, "rewards/rejected": -0.26501715183258057, "step": 719 }, { "epoch": 1.1339637509850276, "grad_norm": 0.20757867395877838, "learning_rate": 1.8873690685456283e-06, "log_odds_chosen": 1.2760004997253418, "log_odds_ratio": -0.25464388728141785, "logits/chosen": -0.5191957950592041, "logits/rejected": -1.3725414276123047, "logps/chosen": -1.4265276193618774, "logps/rejected": -2.5132930278778076, "loss": 1.4593, "nll_loss": 1.433866262435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.14265277981758118, "rewards/margins": 0.10867653042078018, "rewards/rejected": -0.25132930278778076, "step": 720 }, { "epoch": 1.1355397951142632, "grad_norm": 0.2010820060968399, "learning_rate": 1.8818811246468872e-06, "log_odds_chosen": 1.4654037952423096, "log_odds_ratio": -0.22392913699150085, "logits/chosen": -0.503913938999176, "logits/rejected": -1.5154976844787598, "logps/chosen": -1.415074348449707, "logps/rejected": -2.6756412982940674, "loss": 1.4484, "nll_loss": 1.425994634628296, "rewards/accuracies": 1.0, "rewards/chosen": -0.14150743186473846, "rewards/margins": 0.12605668604373932, "rewards/rejected": -0.2675641179084778, "step": 721 }, { "epoch": 1.1371158392434988, "grad_norm": 0.22116105258464813, "learning_rate": 1.876394073079832e-06, "log_odds_chosen": 1.3847888708114624, "log_odds_ratio": -0.23863618075847626, "logits/chosen": -0.5928460359573364, "logits/rejected": -1.412048578262329, "logps/chosen": -1.5432716608047485, "logps/rejected": -2.754733085632324, "loss": 1.5645, "nll_loss": 1.5405950546264648, "rewards/accuracies": 1.0, "rewards/chosen": -0.1543271839618683, "rewards/margins": 0.12114612758159637, "rewards/rejected": -0.27547329664230347, "step": 722 }, { "epoch": 1.1386918833727344, "grad_norm": 0.20633108913898468, "learning_rate": 1.8709079552965152e-06, "log_odds_chosen": 1.2970001697540283, "log_odds_ratio": -0.2590065002441406, "logits/chosen": -0.4998897612094879, "logits/rejected": -1.3825888633728027, "logps/chosen": -1.4237260818481445, "logps/rejected": -2.5296103954315186, "loss": 1.458, "nll_loss": 1.4320578575134277, "rewards/accuracies": 1.0, "rewards/chosen": -0.14237259328365326, "rewards/margins": 0.1105884537100792, "rewards/rejected": -0.25296103954315186, "step": 723 }, { "epoch": 1.14026792750197, "grad_norm": 0.21756547689437866, "learning_rate": 1.8654228127419375e-06, "log_odds_chosen": 1.1096466779708862, "log_odds_ratio": -0.29027462005615234, "logits/chosen": -0.5779076814651489, "logits/rejected": -1.3305450677871704, "logps/chosen": -1.437713623046875, "logps/rejected": -2.3763113021850586, "loss": 1.4718, "nll_loss": 1.4427886009216309, "rewards/accuracies": 1.0, "rewards/chosen": -0.14377135038375854, "rewards/margins": 0.09385980665683746, "rewards/rejected": -0.237631157040596, "step": 724 }, { "epoch": 1.1418439716312057, "grad_norm": 0.19929639995098114, "learning_rate": 1.8599386868537306e-06, "log_odds_chosen": 1.3706843852996826, "log_odds_ratio": -0.22976359724998474, "logits/chosen": -0.5125320553779602, "logits/rejected": -1.487987756729126, "logps/chosen": -1.3186641931533813, "logps/rejected": -2.464041233062744, "loss": 1.3672, "nll_loss": 1.344267725944519, "rewards/accuracies": 1.0, "rewards/chosen": -0.1318664252758026, "rewards/margins": 0.11453770846128464, "rewards/rejected": -0.24640414118766785, "step": 725 }, { "epoch": 1.1434200157604413, "grad_norm": 0.21918931603431702, "learning_rate": 1.8544556190618464e-06, "log_odds_chosen": 1.2842426300048828, "log_odds_ratio": -0.24738024175167084, "logits/chosen": -0.5883026719093323, "logits/rejected": -1.3024299144744873, "logps/chosen": -1.5094125270843506, "logps/rejected": -2.6171648502349854, "loss": 1.5315, "nll_loss": 1.5067555904388428, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094126760959625, "rewards/margins": 0.11077524721622467, "rewards/rejected": -0.2617165148258209, "step": 726 }, { "epoch": 1.144996059889677, "grad_norm": 0.2221018373966217, "learning_rate": 1.8489736507882424e-06, "log_odds_chosen": 1.594733715057373, "log_odds_ratio": -0.20838479697704315, "logits/chosen": -0.5265178680419922, "logits/rejected": -1.4495673179626465, "logps/chosen": -1.4184130430221558, "logps/rejected": -2.80631685256958, "loss": 1.4437, "nll_loss": 1.4228373765945435, "rewards/accuracies": 1.0, "rewards/chosen": -0.14184130728244781, "rewards/margins": 0.13879039883613586, "rewards/rejected": -0.2806317210197449, "step": 727 }, { "epoch": 1.1465721040189125, "grad_norm": 0.1980506330728531, "learning_rate": 1.8434928234465716e-06, "log_odds_chosen": 1.3545522689819336, "log_odds_ratio": -0.23778237402439117, "logits/chosen": -0.6014202237129211, "logits/rejected": -1.3518621921539307, "logps/chosen": -1.3901243209838867, "logps/rejected": -2.542637825012207, "loss": 1.4309, "nll_loss": 1.4071658849716187, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390124261379242, "rewards/margins": 0.11525137722492218, "rewards/rejected": -0.2542637884616852, "step": 728 }, { "epoch": 1.1481481481481481, "grad_norm": 0.22705380618572235, "learning_rate": 1.8380131784418657e-06, "log_odds_chosen": 1.3542594909667969, "log_odds_ratio": -0.24852502346038818, "logits/chosen": -0.46507954597473145, "logits/rejected": -1.4865875244140625, "logps/chosen": -1.4420115947723389, "logps/rejected": -2.6088292598724365, "loss": 1.4827, "nll_loss": 1.4578158855438232, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442011594772339, "rewards/margins": 0.116681769490242, "rewards/rejected": -0.2608829140663147, "step": 729 }, { "epoch": 1.1497241922773838, "grad_norm": 0.2376445233821869, "learning_rate": 1.8325347571702259e-06, "log_odds_chosen": 1.357885479927063, "log_odds_ratio": -0.23886026442050934, "logits/chosen": -0.5753093957901001, "logits/rejected": -1.4901759624481201, "logps/chosen": -1.4975333213806152, "logps/rejected": -2.6757655143737793, "loss": 1.5096, "nll_loss": 1.4857057332992554, "rewards/accuracies": 1.0, "rewards/chosen": -0.14975333213806152, "rewards/margins": 0.11782322824001312, "rewards/rejected": -0.26757654547691345, "step": 730 }, { "epoch": 1.1513002364066194, "grad_norm": 0.22765523195266724, "learning_rate": 1.827057601018509e-06, "log_odds_chosen": 1.399768352508545, "log_odds_ratio": -0.23107658326625824, "logits/chosen": -0.6271060109138489, "logits/rejected": -1.4669194221496582, "logps/chosen": -1.3811513185501099, "logps/rejected": -2.567168951034546, "loss": 1.4307, "nll_loss": 1.4075775146484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.13811513781547546, "rewards/margins": 0.11860179901123047, "rewards/rejected": -0.25671693682670593, "step": 731 }, { "epoch": 1.152876280535855, "grad_norm": 0.23158101737499237, "learning_rate": 1.8215817513640122e-06, "log_odds_chosen": 1.396857500076294, "log_odds_ratio": -0.23694464564323425, "logits/chosen": -0.5242437720298767, "logits/rejected": -1.471669316291809, "logps/chosen": -1.4370704889297485, "logps/rejected": -2.635478973388672, "loss": 1.4605, "nll_loss": 1.4368555545806885, "rewards/accuracies": 1.0, "rewards/chosen": -0.1437070667743683, "rewards/margins": 0.1198408305644989, "rewards/rejected": -0.2635478675365448, "step": 732 }, { "epoch": 1.1544523246650906, "grad_norm": 0.21417687833309174, "learning_rate": 1.8161072495741647e-06, "log_odds_chosen": 1.3921798467636108, "log_odds_ratio": -0.22892777621746063, "logits/chosen": -0.5019906759262085, "logits/rejected": -1.516732096672058, "logps/chosen": -1.3953202962875366, "logps/rejected": -2.57902455329895, "loss": 1.4339, "nll_loss": 1.4110405445098877, "rewards/accuracies": 1.0, "rewards/chosen": -0.13953202962875366, "rewards/margins": 0.1183704137802124, "rewards/rejected": -0.25790247321128845, "step": 733 }, { "epoch": 1.1560283687943262, "grad_norm": 0.22115546464920044, "learning_rate": 1.810634137006213e-06, "log_odds_chosen": 1.0537023544311523, "log_odds_ratio": -0.30371353030204773, "logits/chosen": -0.5922952890396118, "logits/rejected": -1.1409456729888916, "logps/chosen": -1.4845179319381714, "logps/rejected": -2.3795814514160156, "loss": 1.526, "nll_loss": 1.4956529140472412, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484518051147461, "rewards/margins": 0.0895063579082489, "rewards/rejected": -0.237958163022995, "step": 734 }, { "epoch": 1.1576044129235619, "grad_norm": 0.21111159026622772, "learning_rate": 1.805162455006908e-06, "log_odds_chosen": 1.2015193700790405, "log_odds_ratio": -0.2717169523239136, "logits/chosen": -0.5222191214561462, "logits/rejected": -1.27971351146698, "logps/chosen": -1.4912288188934326, "logps/rejected": -2.5226693153381348, "loss": 1.5047, "nll_loss": 1.4774832725524902, "rewards/accuracies": 1.0, "rewards/chosen": -0.14912287890911102, "rewards/margins": 0.10314405709505081, "rewards/rejected": -0.25226691365242004, "step": 735 }, { "epoch": 1.1591804570527975, "grad_norm": 0.21160832047462463, "learning_rate": 1.799692244912195e-06, "log_odds_chosen": 1.1387715339660645, "log_odds_ratio": -0.3090120255947113, "logits/chosen": -0.5213903188705444, "logits/rejected": -1.130449891090393, "logps/chosen": -1.4878404140472412, "logps/rejected": -2.469958543777466, "loss": 1.5314, "nll_loss": 1.5005372762680054, "rewards/accuracies": 1.0, "rewards/chosen": -0.14878404140472412, "rewards/margins": 0.09821182489395142, "rewards/rejected": -0.24699586629867554, "step": 736 }, { "epoch": 1.160756501182033, "grad_norm": 0.21755388379096985, "learning_rate": 1.7942235480468964e-06, "log_odds_chosen": 1.2541836500167847, "log_odds_ratio": -0.26203587651252747, "logits/chosen": -0.560463011264801, "logits/rejected": -1.3185467720031738, "logps/chosen": -1.3833380937576294, "logps/rejected": -2.4418272972106934, "loss": 1.4227, "nll_loss": 1.3965139389038086, "rewards/accuracies": 1.0, "rewards/chosen": -0.13833379745483398, "rewards/margins": 0.10584891587495804, "rewards/rejected": -0.24418272078037262, "step": 737 }, { "epoch": 1.1623325453112687, "grad_norm": 0.20753788948059082, "learning_rate": 1.7887564057244062e-06, "log_odds_chosen": 1.2912025451660156, "log_odds_ratio": -0.2690528631210327, "logits/chosen": -0.5081381797790527, "logits/rejected": -1.401484727859497, "logps/chosen": -1.3404988050460815, "logps/rejected": -2.4288179874420166, "loss": 1.3837, "nll_loss": 1.3567984104156494, "rewards/accuracies": 1.0, "rewards/chosen": -0.1340498924255371, "rewards/margins": 0.10883191972970963, "rewards/rejected": -0.24288181960582733, "step": 738 }, { "epoch": 1.1639085894405043, "grad_norm": 0.22165168821811676, "learning_rate": 1.7832908592463732e-06, "log_odds_chosen": 1.221674919128418, "log_odds_ratio": -0.26628822088241577, "logits/chosen": -0.6422454714775085, "logits/rejected": -1.4249920845031738, "logps/chosen": -1.4700878858566284, "logps/rejected": -2.504058837890625, "loss": 1.5102, "nll_loss": 1.4835454225540161, "rewards/accuracies": 1.0, "rewards/chosen": -0.14700879156589508, "rewards/margins": 0.10339709371328354, "rewards/rejected": -0.250405877828598, "step": 739 }, { "epoch": 1.16548463356974, "grad_norm": 0.23696185648441315, "learning_rate": 1.7778269499023908e-06, "log_odds_chosen": 1.337630271911621, "log_odds_ratio": -0.24248188734054565, "logits/chosen": -0.5418239831924438, "logits/rejected": -1.4796379804611206, "logps/chosen": -1.507027506828308, "logps/rejected": -2.668131113052368, "loss": 1.5347, "nll_loss": 1.5104109048843384, "rewards/accuracies": 1.0, "rewards/chosen": -0.15070275962352753, "rewards/margins": 0.11611035466194153, "rewards/rejected": -0.26681309938430786, "step": 740 }, { "epoch": 1.1670606776989756, "grad_norm": 0.24330352246761322, "learning_rate": 1.7723647189696843e-06, "log_odds_chosen": 1.2329293489456177, "log_odds_ratio": -0.26677531003952026, "logits/chosen": -0.5035621523857117, "logits/rejected": -1.101192831993103, "logps/chosen": -1.393128514289856, "logps/rejected": -2.431424140930176, "loss": 1.4447, "nll_loss": 1.4179928302764893, "rewards/accuracies": 1.0, "rewards/chosen": -0.13931286334991455, "rewards/margins": 0.10382957756519318, "rewards/rejected": -0.24314244091510773, "step": 741 }, { "epoch": 1.1686367218282112, "grad_norm": 0.22313028573989868, "learning_rate": 1.7669042077127982e-06, "log_odds_chosen": 1.422914981842041, "log_odds_ratio": -0.23566770553588867, "logits/chosen": -0.5073877573013306, "logits/rejected": -1.4369897842407227, "logps/chosen": -1.465867280960083, "logps/rejected": -2.6980741024017334, "loss": 1.4886, "nll_loss": 1.4650605916976929, "rewards/accuracies": 1.0, "rewards/chosen": -0.14658673107624054, "rewards/margins": 0.12322070449590683, "rewards/rejected": -0.2698074281215668, "step": 742 }, { "epoch": 1.1702127659574468, "grad_norm": 0.20472803711891174, "learning_rate": 1.7614454573832886e-06, "log_odds_chosen": 1.4325921535491943, "log_odds_ratio": -0.21896687150001526, "logits/chosen": -0.6333247423171997, "logits/rejected": -1.4073162078857422, "logps/chosen": -1.3432472944259644, "logps/rejected": -2.550940990447998, "loss": 1.3907, "nll_loss": 1.3687745332717896, "rewards/accuracies": 1.0, "rewards/chosen": -0.13432474434375763, "rewards/margins": 0.12076936662197113, "rewards/rejected": -0.25509411096572876, "step": 743 }, { "epoch": 1.1717888100866825, "grad_norm": 0.2390693724155426, "learning_rate": 1.7559885092194058e-06, "log_odds_chosen": 1.0897796154022217, "log_odds_ratio": -0.2942361533641815, "logits/chosen": -0.5000759363174438, "logits/rejected": -1.45731782913208, "logps/chosen": -1.4935035705566406, "logps/rejected": -2.421919822692871, "loss": 1.5341, "nll_loss": 1.5046358108520508, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493503749370575, "rewards/margins": 0.09284161031246185, "rewards/rejected": -0.24219197034835815, "step": 744 }, { "epoch": 1.173364854215918, "grad_norm": 0.23357681930065155, "learning_rate": 1.7505334044457871e-06, "log_odds_chosen": 1.3224742412567139, "log_odds_ratio": -0.239582359790802, "logits/chosen": -0.5304093360900879, "logits/rejected": -1.427680253982544, "logps/chosen": -1.3786001205444336, "logps/rejected": -2.4917540550231934, "loss": 1.4094, "nll_loss": 1.3854491710662842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1378600001335144, "rewards/margins": 0.11131538450717926, "rewards/rejected": -0.24917539954185486, "step": 745 }, { "epoch": 1.1749408983451537, "grad_norm": 0.19663824141025543, "learning_rate": 1.7450801842731443e-06, "log_odds_chosen": 1.3291776180267334, "log_odds_ratio": -0.2390061318874359, "logits/chosen": -0.46576210856437683, "logits/rejected": -1.3635282516479492, "logps/chosen": -1.4895015954971313, "logps/rejected": -2.638615846633911, "loss": 1.5181, "nll_loss": 1.494153618812561, "rewards/accuracies": 1.0, "rewards/chosen": -0.14895015954971313, "rewards/margins": 0.11491142213344574, "rewards/rejected": -0.2638615667819977, "step": 746 }, { "epoch": 1.1765169424743893, "grad_norm": 0.21254904568195343, "learning_rate": 1.7396288898979497e-06, "log_odds_chosen": 1.4820916652679443, "log_odds_ratio": -0.21064743399620056, "logits/chosen": -0.5675363540649414, "logits/rejected": -1.4828413724899292, "logps/chosen": -1.3795688152313232, "logps/rejected": -2.646085739135742, "loss": 1.4106, "nll_loss": 1.3895608186721802, "rewards/accuracies": 1.0, "rewards/chosen": -0.1379568874835968, "rewards/margins": 0.12665165960788727, "rewards/rejected": -0.26460856199264526, "step": 747 }, { "epoch": 1.178092986603625, "grad_norm": 0.22074899077415466, "learning_rate": 1.7341795625021292e-06, "log_odds_chosen": 1.424844741821289, "log_odds_ratio": -0.22192633152008057, "logits/chosen": -0.5427080392837524, "logits/rejected": -1.6632615327835083, "logps/chosen": -1.4167243242263794, "logps/rejected": -2.6381478309631348, "loss": 1.4551, "nll_loss": 1.4328675270080566, "rewards/accuracies": 1.0, "rewards/chosen": -0.14167243242263794, "rewards/margins": 0.12214237451553345, "rewards/rejected": -0.2638148069381714, "step": 748 }, { "epoch": 1.1796690307328606, "grad_norm": 0.21415913105010986, "learning_rate": 1.7287322432527485e-06, "log_odds_chosen": 1.3086893558502197, "log_odds_ratio": -0.2472424954175949, "logits/chosen": -0.58354651927948, "logits/rejected": -1.566764235496521, "logps/chosen": -1.5024094581604004, "logps/rejected": -2.634434700012207, "loss": 1.5273, "nll_loss": 1.5025382041931152, "rewards/accuracies": 1.0, "rewards/chosen": -0.1502409428358078, "rewards/margins": 0.11320249736309052, "rewards/rejected": -0.2634434401988983, "step": 749 }, { "epoch": 1.1812450748620962, "grad_norm": 0.22282186150550842, "learning_rate": 1.7232869733017038e-06, "log_odds_chosen": 1.291513442993164, "log_odds_ratio": -0.25042369961738586, "logits/chosen": -0.5659542083740234, "logits/rejected": -1.5032849311828613, "logps/chosen": -1.4535952806472778, "logps/rejected": -2.5596208572387695, "loss": 1.4805, "nll_loss": 1.455505132675171, "rewards/accuracies": 1.0, "rewards/chosen": -0.14535953104496002, "rewards/margins": 0.11060254275798798, "rewards/rejected": -0.255962073802948, "step": 750 }, { "epoch": 1.1828211189913318, "grad_norm": 0.22748686373233795, "learning_rate": 1.7178437937854065e-06, "log_odds_chosen": 1.658095359802246, "log_odds_ratio": -0.17879220843315125, "logits/chosen": -0.6560875773429871, "logits/rejected": -1.786608338356018, "logps/chosen": -1.492921233177185, "logps/rejected": -2.9493677616119385, "loss": 1.5139, "nll_loss": 1.4960277080535889, "rewards/accuracies": 1.0, "rewards/chosen": -0.14929211139678955, "rewards/margins": 0.1456446647644043, "rewards/rejected": -0.29493677616119385, "step": 751 }, { "epoch": 1.1843971631205674, "grad_norm": 0.25552818179130554, "learning_rate": 1.7124027458244794e-06, "log_odds_chosen": 1.3929933309555054, "log_odds_ratio": -0.2336614727973938, "logits/chosen": -0.5347802042961121, "logits/rejected": -1.3094077110290527, "logps/chosen": -1.4163881540298462, "logps/rejected": -2.6132845878601074, "loss": 1.4573, "nll_loss": 1.4338853359222412, "rewards/accuracies": 1.0, "rewards/chosen": -0.1416388303041458, "rewards/margins": 0.11968961358070374, "rewards/rejected": -0.26132845878601074, "step": 752 }, { "epoch": 1.185973207249803, "grad_norm": 0.20070816576480865, "learning_rate": 1.7069638705234407e-06, "log_odds_chosen": 1.0706877708435059, "log_odds_ratio": -0.3014824092388153, "logits/chosen": -0.43309080600738525, "logits/rejected": -1.2945914268493652, "logps/chosen": -1.4858583211898804, "logps/rejected": -2.396928071975708, "loss": 1.5283, "nll_loss": 1.4981718063354492, "rewards/accuracies": 1.0, "rewards/chosen": -0.14858584105968475, "rewards/margins": 0.09110698103904724, "rewards/rejected": -0.2396928071975708, "step": 753 }, { "epoch": 1.1875492513790387, "grad_norm": 0.21234571933746338, "learning_rate": 1.7015272089703954e-06, "log_odds_chosen": 1.2921807765960693, "log_odds_ratio": -0.2518993616104126, "logits/chosen": -0.5934471487998962, "logits/rejected": -1.5490331649780273, "logps/chosen": -1.4556087255477905, "logps/rejected": -2.555610179901123, "loss": 1.4785, "nll_loss": 1.453262209892273, "rewards/accuracies": 1.0, "rewards/chosen": -0.14556089043617249, "rewards/margins": 0.1100001335144043, "rewards/rejected": -0.2555610239505768, "step": 754 }, { "epoch": 1.1891252955082743, "grad_norm": 0.21953803300857544, "learning_rate": 1.6960928022367261e-06, "log_odds_chosen": 1.2742233276367188, "log_odds_ratio": -0.26117175817489624, "logits/chosen": -0.5909973978996277, "logits/rejected": -1.424904704093933, "logps/chosen": -1.4837849140167236, "logps/rejected": -2.586916446685791, "loss": 1.5123, "nll_loss": 1.4862273931503296, "rewards/accuracies": 1.0, "rewards/chosen": -0.14837850630283356, "rewards/margins": 0.11031313240528107, "rewards/rejected": -0.2586916387081146, "step": 755 }, { "epoch": 1.19070133963751, "grad_norm": 0.2024756819009781, "learning_rate": 1.6906606913767776e-06, "log_odds_chosen": 1.249240517616272, "log_odds_ratio": -0.26217758655548096, "logits/chosen": -0.5883452892303467, "logits/rejected": -1.4659888744354248, "logps/chosen": -1.3932411670684814, "logps/rejected": -2.4510996341705322, "loss": 1.4315, "nll_loss": 1.4052568674087524, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393241137266159, "rewards/margins": 0.10578584671020508, "rewards/rejected": -0.24510996043682098, "step": 756 }, { "epoch": 1.1922773837667455, "grad_norm": 0.22757992148399353, "learning_rate": 1.6852309174275543e-06, "log_odds_chosen": 1.2733908891677856, "log_odds_ratio": -0.2527121305465698, "logits/chosen": -0.6954429745674133, "logits/rejected": -1.4367382526397705, "logps/chosen": -1.3156039714813232, "logps/rejected": -2.373481512069702, "loss": 1.3517, "nll_loss": 1.3264782428741455, "rewards/accuracies": 1.0, "rewards/chosen": -0.13156040012836456, "rewards/margins": 0.10578775405883789, "rewards/rejected": -0.23734815418720245, "step": 757 }, { "epoch": 1.1938534278959811, "grad_norm": 0.2215537428855896, "learning_rate": 1.6798035214084047e-06, "log_odds_chosen": 1.345110297203064, "log_odds_ratio": -0.23759454488754272, "logits/chosen": -0.6308282017707825, "logits/rejected": -1.6211813688278198, "logps/chosen": -1.4159858226776123, "logps/rejected": -2.564734697341919, "loss": 1.4532, "nll_loss": 1.4294607639312744, "rewards/accuracies": 1.0, "rewards/chosen": -0.14159858226776123, "rewards/margins": 0.11487489938735962, "rewards/rejected": -0.25647348165512085, "step": 758 }, { "epoch": 1.1954294720252168, "grad_norm": 0.22554029524326324, "learning_rate": 1.674378544320714e-06, "log_odds_chosen": 1.590653419494629, "log_odds_ratio": -0.19607162475585938, "logits/chosen": -0.5363159775733948, "logits/rejected": -1.542907476425171, "logps/chosen": -1.3737821578979492, "logps/rejected": -2.734147787094116, "loss": 1.4116, "nll_loss": 1.3920382261276245, "rewards/accuracies": 1.0, "rewards/chosen": -0.13737823069095612, "rewards/margins": 0.13603655993938446, "rewards/rejected": -0.2734147906303406, "step": 759 }, { "epoch": 1.1970055161544524, "grad_norm": 0.20527444779872894, "learning_rate": 1.6689560271475922e-06, "log_odds_chosen": 1.46551513671875, "log_odds_ratio": -0.21081432700157166, "logits/chosen": -0.5736262798309326, "logits/rejected": -1.462472677230835, "logps/chosen": -1.3918555974960327, "logps/rejected": -2.643242359161377, "loss": 1.4212, "nll_loss": 1.4000998735427856, "rewards/accuracies": 1.0, "rewards/chosen": -0.1391855627298355, "rewards/margins": 0.12513871490955353, "rewards/rejected": -0.26432427763938904, "step": 760 }, { "epoch": 1.198581560283688, "grad_norm": 0.22382031381130219, "learning_rate": 1.6635360108535665e-06, "log_odds_chosen": 1.4534813165664673, "log_odds_ratio": -0.2228448987007141, "logits/chosen": -0.5872243046760559, "logits/rejected": -1.4932541847229004, "logps/chosen": -1.34547758102417, "logps/rejected": -2.580556869506836, "loss": 1.3918, "nll_loss": 1.3695387840270996, "rewards/accuracies": 1.0, "rewards/chosen": -0.13454777002334595, "rewards/margins": 0.12350792437791824, "rewards/rejected": -0.2580556869506836, "step": 761 }, { "epoch": 1.2001576044129236, "grad_norm": 0.2164224088191986, "learning_rate": 1.6581185363842717e-06, "log_odds_chosen": 1.4758771657943726, "log_odds_ratio": -0.22314564883708954, "logits/chosen": -0.648838996887207, "logits/rejected": -1.5060044527053833, "logps/chosen": -1.391494870185852, "logps/rejected": -2.658966541290283, "loss": 1.4153, "nll_loss": 1.393001914024353, "rewards/accuracies": 1.0, "rewards/chosen": -0.1391494870185852, "rewards/margins": 0.12674716114997864, "rewards/rejected": -0.26589664816856384, "step": 762 }, { "epoch": 1.2017336485421592, "grad_norm": 0.2243705540895462, "learning_rate": 1.6527036446661393e-06, "log_odds_chosen": 1.4079028367996216, "log_odds_ratio": -0.23095834255218506, "logits/chosen": -0.5770010948181152, "logits/rejected": -1.523849368095398, "logps/chosen": -1.4122446775436401, "logps/rejected": -2.6187679767608643, "loss": 1.4419, "nll_loss": 1.4188222885131836, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412244737148285, "rewards/margins": 0.12065234035253525, "rewards/rejected": -0.26187682151794434, "step": 763 }, { "epoch": 1.2033096926713949, "grad_norm": 0.21240510046482086, "learning_rate": 1.6472913766060901e-06, "log_odds_chosen": 1.5549542903900146, "log_odds_ratio": -0.1982915848493576, "logits/chosen": -0.7087709903717041, "logits/rejected": -1.4476606845855713, "logps/chosen": -1.3446422815322876, "logps/rejected": -2.662215232849121, "loss": 1.3825, "nll_loss": 1.3626868724822998, "rewards/accuracies": 1.0, "rewards/chosen": -0.13446423411369324, "rewards/margins": 0.13175728917121887, "rewards/rejected": -0.2662215232849121, "step": 764 }, { "epoch": 1.2048857368006305, "grad_norm": 0.206766277551651, "learning_rate": 1.6418817730912252e-06, "log_odds_chosen": 1.28862726688385, "log_odds_ratio": -0.26395970582962036, "logits/chosen": -0.6465386152267456, "logits/rejected": -1.2131530046463013, "logps/chosen": -1.4487866163253784, "logps/rejected": -2.554746627807617, "loss": 1.4801, "nll_loss": 1.4536832571029663, "rewards/accuracies": 1.0, "rewards/chosen": -0.14487865567207336, "rewards/margins": 0.11059600114822388, "rewards/rejected": -0.25547468662261963, "step": 765 }, { "epoch": 1.2064617809298661, "grad_norm": 0.21988733112812042, "learning_rate": 1.6364748749885133e-06, "log_odds_chosen": 1.0742143392562866, "log_odds_ratio": -0.2990318238735199, "logits/chosen": -0.623568594455719, "logits/rejected": -1.2595133781433105, "logps/chosen": -1.4102067947387695, "logps/rejected": -2.311211347579956, "loss": 1.4432, "nll_loss": 1.4133214950561523, "rewards/accuracies": 1.0, "rewards/chosen": -0.14102068543434143, "rewards/margins": 0.09010044485330582, "rewards/rejected": -0.23112112283706665, "step": 766 }, { "epoch": 1.2080378250591017, "grad_norm": 0.23654837906360626, "learning_rate": 1.6310707231444883e-06, "log_odds_chosen": 1.3700404167175293, "log_odds_ratio": -0.2373034507036209, "logits/chosen": -0.6271353960037231, "logits/rejected": -1.3166310787200928, "logps/chosen": -1.4159679412841797, "logps/rejected": -2.5892958641052246, "loss": 1.4419, "nll_loss": 1.418202519416809, "rewards/accuracies": 1.0, "rewards/chosen": -0.14159680902957916, "rewards/margins": 0.11733277887105942, "rewards/rejected": -0.258929580450058, "step": 767 }, { "epoch": 1.2096138691883374, "grad_norm": 0.23533912003040314, "learning_rate": 1.625669358384936e-06, "log_odds_chosen": 1.1100581884384155, "log_odds_ratio": -0.3063144087791443, "logits/chosen": -0.5259105563163757, "logits/rejected": -1.1709703207015991, "logps/chosen": -1.4853869676589966, "logps/rejected": -2.4346115589141846, "loss": 1.5203, "nll_loss": 1.489625334739685, "rewards/accuracies": 1.0, "rewards/chosen": -0.14853869378566742, "rewards/margins": 0.09492245316505432, "rewards/rejected": -0.24346116185188293, "step": 768 }, { "epoch": 1.211189913317573, "grad_norm": 0.22458776831626892, "learning_rate": 1.620270821514587e-06, "log_odds_chosen": 1.5604948997497559, "log_odds_ratio": -0.21382805705070496, "logits/chosen": -0.49775218963623047, "logits/rejected": -1.4751949310302734, "logps/chosen": -1.500352382659912, "logps/rejected": -2.876207113265991, "loss": 1.5172, "nll_loss": 1.495776653289795, "rewards/accuracies": 1.0, "rewards/chosen": -0.15003523230552673, "rewards/margins": 0.13758549094200134, "rewards/rejected": -0.2876207232475281, "step": 769 }, { "epoch": 1.2127659574468086, "grad_norm": 0.24959568679332733, "learning_rate": 1.6148751533168104e-06, "log_odds_chosen": 1.7171392440795898, "log_odds_ratio": -0.18354278802871704, "logits/chosen": -0.6221411228179932, "logits/rejected": -1.692143440246582, "logps/chosen": -1.4705101251602173, "logps/rejected": -2.9811625480651855, "loss": 1.4828, "nll_loss": 1.4644263982772827, "rewards/accuracies": 1.0, "rewards/chosen": -0.14705102145671844, "rewards/margins": 0.15106526017189026, "rewards/rejected": -0.2981162667274475, "step": 770 }, { "epoch": 1.2143420015760442, "grad_norm": 0.221751868724823, "learning_rate": 1.6094823945532996e-06, "log_odds_chosen": 1.3184762001037598, "log_odds_ratio": -0.256094366312027, "logits/chosen": -0.6579760909080505, "logits/rejected": -1.5384408235549927, "logps/chosen": -1.424337387084961, "logps/rejected": -2.5556325912475586, "loss": 1.4483, "nll_loss": 1.4226460456848145, "rewards/accuracies": 1.0, "rewards/chosen": -0.1424337476491928, "rewards/margins": 0.11312951892614365, "rewards/rejected": -0.25556325912475586, "step": 771 }, { "epoch": 1.2159180457052798, "grad_norm": 0.24769911170005798, "learning_rate": 1.6040925859637728e-06, "log_odds_chosen": 1.3353594541549683, "log_odds_ratio": -0.24589507281780243, "logits/chosen": -0.6162819266319275, "logits/rejected": -1.3712636232376099, "logps/chosen": -1.5013405084609985, "logps/rejected": -2.6573030948638916, "loss": 1.5274, "nll_loss": 1.5027711391448975, "rewards/accuracies": 1.0, "rewards/chosen": -0.15013404190540314, "rewards/margins": 0.11559627950191498, "rewards/rejected": -0.2657303214073181, "step": 772 }, { "epoch": 1.2174940898345155, "grad_norm": 0.25154909491539, "learning_rate": 1.5987057682656596e-06, "log_odds_chosen": 1.273819088935852, "log_odds_ratio": -0.2507978081703186, "logits/chosen": -0.5333799123764038, "logits/rejected": -1.2989473342895508, "logps/chosen": -1.544511079788208, "logps/rejected": -2.6521706581115723, "loss": 1.5747, "nll_loss": 1.5496604442596436, "rewards/accuracies": 1.0, "rewards/chosen": -0.15445111691951752, "rewards/margins": 0.1107659637928009, "rewards/rejected": -0.2652170658111572, "step": 773 }, { "epoch": 1.219070133963751, "grad_norm": 0.24077482521533966, "learning_rate": 1.593321982153795e-06, "log_odds_chosen": 1.2832716703414917, "log_odds_ratio": -0.2492670714855194, "logits/chosen": -0.6236528158187866, "logits/rejected": -1.5098700523376465, "logps/chosen": -1.5666507482528687, "logps/rejected": -2.6859042644500732, "loss": 1.581, "nll_loss": 1.5561209917068481, "rewards/accuracies": 1.0, "rewards/chosen": -0.15666507184505463, "rewards/margins": 0.11192534118890762, "rewards/rejected": -0.26859042048454285, "step": 774 }, { "epoch": 1.2206461780929867, "grad_norm": 0.24258244037628174, "learning_rate": 1.5879412683001106e-06, "log_odds_chosen": 1.2435587644577026, "log_odds_ratio": -0.2599763870239258, "logits/chosen": -0.6440672874450684, "logits/rejected": -1.2146906852722168, "logps/chosen": -1.3805272579193115, "logps/rejected": -2.4296422004699707, "loss": 1.4255, "nll_loss": 1.3995163440704346, "rewards/accuracies": 1.0, "rewards/chosen": -0.13805273175239563, "rewards/margins": 0.10491149127483368, "rewards/rejected": -0.24296420812606812, "step": 775 }, { "epoch": 1.2222222222222223, "grad_norm": 0.2312171459197998, "learning_rate": 1.5825636673533298e-06, "log_odds_chosen": 1.7001415491104126, "log_odds_ratio": -0.17940793931484222, "logits/chosen": -0.6745871305465698, "logits/rejected": -1.7740318775177002, "logps/chosen": -1.4513566493988037, "logps/rejected": -2.9394679069519043, "loss": 1.457, "nll_loss": 1.4390698671340942, "rewards/accuracies": 1.0, "rewards/chosen": -0.14513565599918365, "rewards/margins": 0.14881114661693573, "rewards/rejected": -0.2939468026161194, "step": 776 }, { "epoch": 1.2237982663514577, "grad_norm": 0.22569940984249115, "learning_rate": 1.5771892199386598e-06, "log_odds_chosen": 1.2924063205718994, "log_odds_ratio": -0.25367188453674316, "logits/chosen": -0.5892406105995178, "logits/rejected": -1.359096884727478, "logps/chosen": -1.4653481245040894, "logps/rejected": -2.5802369117736816, "loss": 1.5082, "nll_loss": 1.4827901124954224, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465347856283188, "rewards/margins": 0.11148889362812042, "rewards/rejected": -0.2580237090587616, "step": 777 }, { "epoch": 1.2253743104806936, "grad_norm": 0.20317105948925018, "learning_rate": 1.5718179666574834e-06, "log_odds_chosen": 1.671058177947998, "log_odds_ratio": -0.18040546774864197, "logits/chosen": -0.5229330658912659, "logits/rejected": -1.6276131868362427, "logps/chosen": -1.4343481063842773, "logps/rejected": -2.8913261890411377, "loss": 1.4658, "nll_loss": 1.4477105140686035, "rewards/accuracies": 1.0, "rewards/chosen": -0.1434348076581955, "rewards/margins": 0.14569780230522156, "rewards/rejected": -0.28913259506225586, "step": 778 }, { "epoch": 1.226950354609929, "grad_norm": 0.23067185282707214, "learning_rate": 1.5664499480870539e-06, "log_odds_chosen": 1.5963189601898193, "log_odds_ratio": -0.19177693128585815, "logits/chosen": -0.5456362366676331, "logits/rejected": -1.5545574426651, "logps/chosen": -1.4592418670654297, "logps/rejected": -2.8497893810272217, "loss": 1.4949, "nll_loss": 1.475722312927246, "rewards/accuracies": 1.0, "rewards/chosen": -0.14592419564723969, "rewards/margins": 0.13905476033687592, "rewards/rejected": -0.2849789559841156, "step": 779 }, { "epoch": 1.2285263987391648, "grad_norm": 0.2926139831542969, "learning_rate": 1.5610852047801875e-06, "log_odds_chosen": 1.197067141532898, "log_odds_ratio": -0.2708202302455902, "logits/chosen": -0.6311848759651184, "logits/rejected": -1.3294005393981934, "logps/chosen": -1.3937289714813232, "logps/rejected": -2.4043586254119873, "loss": 1.4177, "nll_loss": 1.39057195186615, "rewards/accuracies": 1.0, "rewards/chosen": -0.13937290012836456, "rewards/margins": 0.10106298327445984, "rewards/rejected": -0.2404358983039856, "step": 780 }, { "epoch": 1.2301024428684002, "grad_norm": 0.2347894310951233, "learning_rate": 1.5557237772649567e-06, "log_odds_chosen": 1.203992247581482, "log_odds_ratio": -0.2692109942436218, "logits/chosen": -0.5334935188293457, "logits/rejected": -1.4999908208847046, "logps/chosen": -1.4735887050628662, "logps/rejected": -2.5025579929351807, "loss": 1.5064, "nll_loss": 1.479496955871582, "rewards/accuracies": 1.0, "rewards/chosen": -0.14735886454582214, "rewards/margins": 0.10289692133665085, "rewards/rejected": -0.2502557933330536, "step": 781 }, { "epoch": 1.231678486997636, "grad_norm": 0.20368658006191254, "learning_rate": 1.5503657060443866e-06, "log_odds_chosen": 1.3019431829452515, "log_odds_ratio": -0.2502024471759796, "logits/chosen": -0.5618449449539185, "logits/rejected": -1.432936191558838, "logps/chosen": -1.395858883857727, "logps/rejected": -2.497097969055176, "loss": 1.4394, "nll_loss": 1.4143925905227661, "rewards/accuracies": 1.0, "rewards/chosen": -0.13958589732646942, "rewards/margins": 0.1101238951086998, "rewards/rejected": -0.24970978498458862, "step": 782 }, { "epoch": 1.2332545311268714, "grad_norm": 0.2099921554327011, "learning_rate": 1.5450110315961457e-06, "log_odds_chosen": 1.5822702646255493, "log_odds_ratio": -0.20346100628376007, "logits/chosen": -0.6009377241134644, "logits/rejected": -1.6171350479125977, "logps/chosen": -1.4313298463821411, "logps/rejected": -2.80722975730896, "loss": 1.4718, "nll_loss": 1.451432228088379, "rewards/accuracies": 1.0, "rewards/chosen": -0.1431329846382141, "rewards/margins": 0.13759002089500427, "rewards/rejected": -0.280722975730896, "step": 783 }, { "epoch": 1.2348305752561073, "grad_norm": 0.22688640654087067, "learning_rate": 1.539659794372243e-06, "log_odds_chosen": 1.5025097131729126, "log_odds_ratio": -0.2302016317844391, "logits/chosen": -0.6282855272293091, "logits/rejected": -1.626317024230957, "logps/chosen": -1.4437496662139893, "logps/rejected": -2.7493932247161865, "loss": 1.4742, "nll_loss": 1.4511845111846924, "rewards/accuracies": 1.0, "rewards/chosen": -0.14437496662139893, "rewards/margins": 0.1305643618106842, "rewards/rejected": -0.27493932843208313, "step": 784 }, { "epoch": 1.2364066193853427, "grad_norm": 0.21591053903102875, "learning_rate": 1.5343120347987172e-06, "log_odds_chosen": 1.5057079792022705, "log_odds_ratio": -0.20444804430007935, "logits/chosen": -0.588503360748291, "logits/rejected": -1.5970463752746582, "logps/chosen": -1.4286227226257324, "logps/rejected": -2.728306293487549, "loss": 1.4729, "nll_loss": 1.4524708986282349, "rewards/accuracies": 1.0, "rewards/chosen": -0.14286227524280548, "rewards/margins": 0.12996836006641388, "rewards/rejected": -0.27283063530921936, "step": 785 }, { "epoch": 1.2379826635145785, "grad_norm": 0.2277948409318924, "learning_rate": 1.5289677932753398e-06, "log_odds_chosen": 1.2474923133850098, "log_odds_ratio": -0.2646195888519287, "logits/chosen": -0.607757031917572, "logits/rejected": -1.372312307357788, "logps/chosen": -1.463487982749939, "logps/rejected": -2.5320003032684326, "loss": 1.4958, "nll_loss": 1.4693405628204346, "rewards/accuracies": 1.0, "rewards/chosen": -0.14634880423545837, "rewards/margins": 0.10685122013092041, "rewards/rejected": -0.2532000243663788, "step": 786 }, { "epoch": 1.239558707643814, "grad_norm": 0.21466998755931854, "learning_rate": 1.5236271101753017e-06, "log_odds_chosen": 1.440157175064087, "log_odds_ratio": -0.21480616927146912, "logits/chosen": -0.6203504800796509, "logits/rejected": -1.5559945106506348, "logps/chosen": -1.4606151580810547, "logps/rejected": -2.70353364944458, "loss": 1.4769, "nll_loss": 1.4553958177566528, "rewards/accuracies": 1.0, "rewards/chosen": -0.14606152474880219, "rewards/margins": 0.12429183721542358, "rewards/rejected": -0.27035337686538696, "step": 787 }, { "epoch": 1.2411347517730495, "grad_norm": 0.21912704408168793, "learning_rate": 1.5182900258449135e-06, "log_odds_chosen": 1.60334312915802, "log_odds_ratio": -0.1867779791355133, "logits/chosen": -0.5966047048568726, "logits/rejected": -1.5762933492660522, "logps/chosen": -1.44041907787323, "logps/rejected": -2.8344244956970215, "loss": 1.4694, "nll_loss": 1.4507081508636475, "rewards/accuracies": 1.0, "rewards/chosen": -0.14404189586639404, "rewards/margins": 0.13940054178237915, "rewards/rejected": -0.2834424674510956, "step": 788 }, { "epoch": 1.2427107959022852, "grad_norm": 0.22293169796466827, "learning_rate": 1.5129565806032986e-06, "log_odds_chosen": 1.5123443603515625, "log_odds_ratio": -0.21256795525550842, "logits/chosen": -0.588271975517273, "logits/rejected": -1.4032602310180664, "logps/chosen": -1.330634593963623, "logps/rejected": -2.614960193634033, "loss": 1.3776, "nll_loss": 1.3562999963760376, "rewards/accuracies": 1.0, "rewards/chosen": -0.1330634504556656, "rewards/margins": 0.12843254208564758, "rewards/rejected": -0.261495977640152, "step": 789 }, { "epoch": 1.2442868400315208, "grad_norm": 0.2574654221534729, "learning_rate": 1.507626814742087e-06, "log_odds_chosen": 1.3430910110473633, "log_odds_ratio": -0.23965327441692352, "logits/chosen": -0.634746253490448, "logits/rejected": -1.3998106718063354, "logps/chosen": -1.5132931470870972, "logps/rejected": -2.677356004714966, "loss": 1.5481, "nll_loss": 1.52411687374115, "rewards/accuracies": 1.0, "rewards/chosen": -0.15132930874824524, "rewards/margins": 0.11640629172325134, "rewards/rejected": -0.2677356004714966, "step": 790 }, { "epoch": 1.2458628841607564, "grad_norm": 0.21352773904800415, "learning_rate": 1.502300768525115e-06, "log_odds_chosen": 1.5173344612121582, "log_odds_ratio": -0.2205386757850647, "logits/chosen": -0.5914537310600281, "logits/rejected": -1.5284650325775146, "logps/chosen": -1.5250039100646973, "logps/rejected": -2.858093023300171, "loss": 1.5251, "nll_loss": 1.503089189529419, "rewards/accuracies": 1.0, "rewards/chosen": -0.15250039100646973, "rewards/margins": 0.13330891728401184, "rewards/rejected": -0.28580930829048157, "step": 791 }, { "epoch": 1.247438928289992, "grad_norm": 0.25724470615386963, "learning_rate": 1.4969784821881177e-06, "log_odds_chosen": 1.2892694473266602, "log_odds_ratio": -0.25421732664108276, "logits/chosen": -0.6022913455963135, "logits/rejected": -1.2409374713897705, "logps/chosen": -1.3959331512451172, "logps/rejected": -2.485353469848633, "loss": 1.448, "nll_loss": 1.4225430488586426, "rewards/accuracies": 1.0, "rewards/chosen": -0.13959333300590515, "rewards/margins": 0.10894200205802917, "rewards/rejected": -0.24853533506393433, "step": 792 }, { "epoch": 1.2490149724192277, "grad_norm": 0.2178412228822708, "learning_rate": 1.4916599959384262e-06, "log_odds_chosen": 1.2507131099700928, "log_odds_ratio": -0.2574303448200226, "logits/chosen": -0.6103950142860413, "logits/rejected": -1.4853626489639282, "logps/chosen": -1.475551962852478, "logps/rejected": -2.548299789428711, "loss": 1.4999, "nll_loss": 1.474159598350525, "rewards/accuracies": 1.0, "rewards/chosen": -0.14755521714687347, "rewards/margins": 0.10727477073669434, "rewards/rejected": -0.254830002784729, "step": 793 }, { "epoch": 1.2505910165484633, "grad_norm": 0.30536407232284546, "learning_rate": 1.4863453499546643e-06, "log_odds_chosen": 1.4956989288330078, "log_odds_ratio": -0.21353504061698914, "logits/chosen": -0.5590574741363525, "logits/rejected": -1.4953241348266602, "logps/chosen": -1.406250238418579, "logps/rejected": -2.6932482719421387, "loss": 1.4367, "nll_loss": 1.4153844118118286, "rewards/accuracies": 1.0, "rewards/chosen": -0.1406250298023224, "rewards/margins": 0.12869982421398163, "rewards/rejected": -0.2693248391151428, "step": 794 }, { "epoch": 1.252167060677699, "grad_norm": 0.26826876401901245, "learning_rate": 1.4810345843864427e-06, "log_odds_chosen": 1.3864645957946777, "log_odds_ratio": -0.2319655865430832, "logits/chosen": -0.6440175175666809, "logits/rejected": -1.3618885278701782, "logps/chosen": -1.4762009382247925, "logps/rejected": -2.6720967292785645, "loss": 1.5074, "nll_loss": 1.4842313528060913, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476200968027115, "rewards/margins": 0.11958958208560944, "rewards/rejected": -0.2672096788883209, "step": 795 }, { "epoch": 1.2537431048069345, "grad_norm": 0.30943307280540466, "learning_rate": 1.4757277393540598e-06, "log_odds_chosen": 1.3894554376602173, "log_odds_ratio": -0.23852145671844482, "logits/chosen": -0.5769093632698059, "logits/rejected": -1.2110780477523804, "logps/chosen": -1.4527041912078857, "logps/rejected": -2.6519722938537598, "loss": 1.493, "nll_loss": 1.4691100120544434, "rewards/accuracies": 1.0, "rewards/chosen": -0.1452704221010208, "rewards/margins": 0.1199268251657486, "rewards/rejected": -0.2651972472667694, "step": 796 }, { "epoch": 1.2553191489361701, "grad_norm": 0.21650095283985138, "learning_rate": 1.4704248549481946e-06, "log_odds_chosen": 1.3199766874313354, "log_odds_ratio": -0.2460341453552246, "logits/chosen": -0.6074656248092651, "logits/rejected": -1.613268256187439, "logps/chosen": -1.5171059370040894, "logps/rejected": -2.6643447875976562, "loss": 1.5401, "nll_loss": 1.515483021736145, "rewards/accuracies": 1.0, "rewards/chosen": -0.1517105996608734, "rewards/margins": 0.11472390592098236, "rewards/rejected": -0.26643452048301697, "step": 797 }, { "epoch": 1.2568951930654058, "grad_norm": 0.21090464293956757, "learning_rate": 1.4651259712296063e-06, "log_odds_chosen": 1.5943585634231567, "log_odds_ratio": -0.19176922738552094, "logits/chosen": -0.517490029335022, "logits/rejected": -1.4596930742263794, "logps/chosen": -1.4921587705612183, "logps/rejected": -2.8879101276397705, "loss": 1.5124, "nll_loss": 1.493196725845337, "rewards/accuracies": 1.0, "rewards/chosen": -0.14921587705612183, "rewards/margins": 0.13957512378692627, "rewards/rejected": -0.2887910008430481, "step": 798 }, { "epoch": 1.2584712371946414, "grad_norm": 0.24253034591674805, "learning_rate": 1.45983112822883e-06, "log_odds_chosen": 1.3575317859649658, "log_odds_ratio": -0.23305505514144897, "logits/chosen": -0.49398794770240784, "logits/rejected": -1.3266679048538208, "logps/chosen": -1.4118586778640747, "logps/rejected": -2.570868730545044, "loss": 1.4468, "nll_loss": 1.4234497547149658, "rewards/accuracies": 1.0, "rewards/chosen": -0.14118586480617523, "rewards/margins": 0.11590103805065155, "rewards/rejected": -0.2570869028568268, "step": 799 }, { "epoch": 1.260047281323877, "grad_norm": 0.22545106709003448, "learning_rate": 1.4545403659458756e-06, "log_odds_chosen": 1.4175035953521729, "log_odds_ratio": -0.2259032428264618, "logits/chosen": -0.6052595973014832, "logits/rejected": -1.311805248260498, "logps/chosen": -1.4253277778625488, "logps/rejected": -2.6440634727478027, "loss": 1.4589, "nll_loss": 1.4363291263580322, "rewards/accuracies": 1.0, "rewards/chosen": -0.14253278076648712, "rewards/margins": 0.12187359482049942, "rewards/rejected": -0.26440635323524475, "step": 800 }, { "epoch": 1.2616233254531126, "grad_norm": 0.22660937905311584, "learning_rate": 1.4492537243499253e-06, "log_odds_chosen": 1.4647465944290161, "log_odds_ratio": -0.21327193081378937, "logits/chosen": -0.6022266149520874, "logits/rejected": -1.414176344871521, "logps/chosen": -1.4155055284500122, "logps/rejected": -2.6666698455810547, "loss": 1.4326, "nll_loss": 1.4112662076950073, "rewards/accuracies": 1.0, "rewards/chosen": -0.14155057072639465, "rewards/margins": 0.12511645257472992, "rewards/rejected": -0.2666670083999634, "step": 801 }, { "epoch": 1.2631993695823482, "grad_norm": 0.20875735580921173, "learning_rate": 1.443971243379031e-06, "log_odds_chosen": 1.481856346130371, "log_odds_ratio": -0.2232595980167389, "logits/chosen": -0.5322192907333374, "logits/rejected": -1.3940941095352173, "logps/chosen": -1.4499024152755737, "logps/rejected": -2.7311134338378906, "loss": 1.4739, "nll_loss": 1.451573133468628, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449902355670929, "rewards/margins": 0.12812109291553497, "rewards/rejected": -0.27311137318611145, "step": 802 }, { "epoch": 1.2647754137115839, "grad_norm": 0.23457227647304535, "learning_rate": 1.4386929629398144e-06, "log_odds_chosen": 1.6659356355667114, "log_odds_ratio": -0.1785147786140442, "logits/chosen": -0.5916958451271057, "logits/rejected": -1.5709147453308105, "logps/chosen": -1.4835273027420044, "logps/rejected": -2.9457387924194336, "loss": 1.5032, "nll_loss": 1.4853252172470093, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483527421951294, "rewards/margins": 0.14622116088867188, "rewards/rejected": -0.2945738732814789, "step": 803 }, { "epoch": 1.2663514578408195, "grad_norm": 0.22985726594924927, "learning_rate": 1.4334189229071614e-06, "log_odds_chosen": 1.3873188495635986, "log_odds_ratio": -0.23752443492412567, "logits/chosen": -0.5912019610404968, "logits/rejected": -1.3441188335418701, "logps/chosen": -1.381171703338623, "logps/rejected": -2.564377546310425, "loss": 1.411, "nll_loss": 1.3872419595718384, "rewards/accuracies": 1.0, "rewards/chosen": -0.13811716437339783, "rewards/margins": 0.11832059174776077, "rewards/rejected": -0.256437748670578, "step": 804 }, { "epoch": 1.267927501970055, "grad_norm": 0.24306786060333252, "learning_rate": 1.4281491631239263e-06, "log_odds_chosen": 1.1761853694915771, "log_odds_ratio": -0.27694106101989746, "logits/chosen": -0.6338315606117249, "logits/rejected": -1.3575412034988403, "logps/chosen": -1.5164538621902466, "logps/rejected": -2.5306150913238525, "loss": 1.532, "nll_loss": 1.504274606704712, "rewards/accuracies": 1.0, "rewards/chosen": -0.15164539217948914, "rewards/margins": 0.10141611844301224, "rewards/rejected": -0.25306153297424316, "step": 805 }, { "epoch": 1.2695035460992907, "grad_norm": 0.22970065474510193, "learning_rate": 1.4228837234006272e-06, "log_odds_chosen": 1.6077741384506226, "log_odds_ratio": -0.19113853573799133, "logits/chosen": -0.6385869979858398, "logits/rejected": -1.403347134590149, "logps/chosen": -1.3846330642700195, "logps/rejected": -2.773245334625244, "loss": 1.436, "nll_loss": 1.4168992042541504, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384633183479309, "rewards/margins": 0.13886120915412903, "rewards/rejected": -0.27732449769973755, "step": 806 }, { "epoch": 1.2710795902285263, "grad_norm": 0.2602978050708771, "learning_rate": 1.4176226435151462e-06, "log_odds_chosen": 1.4350380897521973, "log_odds_ratio": -0.22818563878536224, "logits/chosen": -0.6012192964553833, "logits/rejected": -1.6245769262313843, "logps/chosen": -1.4842414855957031, "logps/rejected": -2.735145330429077, "loss": 1.4962, "nll_loss": 1.473372220993042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484241634607315, "rewards/margins": 0.12509039044380188, "rewards/rejected": -0.2735145390033722, "step": 807 }, { "epoch": 1.272655634357762, "grad_norm": 0.20345047116279602, "learning_rate": 1.4123659632124298e-06, "log_odds_chosen": 1.564432978630066, "log_odds_ratio": -0.20057973265647888, "logits/chosen": -0.6357970237731934, "logits/rejected": -1.5508131980895996, "logps/chosen": -1.4488463401794434, "logps/rejected": -2.8090689182281494, "loss": 1.4804, "nll_loss": 1.4603270292282104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448846310377121, "rewards/margins": 0.13602225482463837, "rewards/rejected": -0.28090691566467285, "step": 808 }, { "epoch": 1.2742316784869976, "grad_norm": 0.21275083720684052, "learning_rate": 1.4071137222041852e-06, "log_odds_chosen": 1.5876526832580566, "log_odds_ratio": -0.1935243159532547, "logits/chosen": -0.5202667713165283, "logits/rejected": -1.6951507329940796, "logps/chosen": -1.4972716569900513, "logps/rejected": -2.887930154800415, "loss": 1.5175, "nll_loss": 1.4981794357299805, "rewards/accuracies": 1.0, "rewards/chosen": -0.14972718060016632, "rewards/margins": 0.13906586170196533, "rewards/rejected": -0.28879302740097046, "step": 809 }, { "epoch": 1.2758077226162332, "grad_norm": 0.25385233759880066, "learning_rate": 1.4018659601685857e-06, "log_odds_chosen": 1.38628089427948, "log_odds_ratio": -0.23076358437538147, "logits/chosen": -0.5455945730209351, "logits/rejected": -1.5138773918151855, "logps/chosen": -1.4868183135986328, "logps/rejected": -2.6896021366119385, "loss": 1.5078, "nll_loss": 1.4847323894500732, "rewards/accuracies": 1.0, "rewards/chosen": -0.14868183434009552, "rewards/margins": 0.12027836591005325, "rewards/rejected": -0.26896020770072937, "step": 810 }, { "epoch": 1.2773837667454688, "grad_norm": 0.25446489453315735, "learning_rate": 1.3966227167499667e-06, "log_odds_chosen": 1.307693600654602, "log_odds_ratio": -0.24345320463180542, "logits/chosen": -0.6371269822120667, "logits/rejected": -1.3773181438446045, "logps/chosen": -1.527349829673767, "logps/rejected": -2.6613223552703857, "loss": 1.545, "nll_loss": 1.520634651184082, "rewards/accuracies": 1.0, "rewards/chosen": -0.15273499488830566, "rewards/margins": 0.1133972555398941, "rewards/rejected": -0.2661322355270386, "step": 811 }, { "epoch": 1.2789598108747045, "grad_norm": 0.2630695104598999, "learning_rate": 1.3913840315585277e-06, "log_odds_chosen": 1.3261871337890625, "log_odds_ratio": -0.2366020679473877, "logits/chosen": -0.5820093750953674, "logits/rejected": -1.5055376291275024, "logps/chosen": -1.4547984600067139, "logps/rejected": -2.5924019813537598, "loss": 1.4846, "nll_loss": 1.4609538316726685, "rewards/accuracies": 1.0, "rewards/chosen": -0.14547984302043915, "rewards/margins": 0.11376036703586578, "rewards/rejected": -0.25924021005630493, "step": 812 }, { "epoch": 1.28053585500394, "grad_norm": 0.22853800654411316, "learning_rate": 1.3861499441700337e-06, "log_odds_chosen": 1.6726137399673462, "log_odds_ratio": -0.18094965815544128, "logits/chosen": -0.72444087266922, "logits/rejected": -1.5712106227874756, "logps/chosen": -1.5373544692993164, "logps/rejected": -3.019139289855957, "loss": 1.5554, "nll_loss": 1.537285566329956, "rewards/accuracies": 1.0, "rewards/chosen": -0.1537354439496994, "rewards/margins": 0.14817848801612854, "rewards/rejected": -0.30191394686698914, "step": 813 }, { "epoch": 1.2821118991331757, "grad_norm": 0.25108060240745544, "learning_rate": 1.3809204941255144e-06, "log_odds_chosen": 1.6380887031555176, "log_odds_ratio": -0.18578845262527466, "logits/chosen": -0.6443689465522766, "logits/rejected": -1.5698541402816772, "logps/chosen": -1.41604745388031, "logps/rejected": -2.836592674255371, "loss": 1.4439, "nll_loss": 1.425299048423767, "rewards/accuracies": 1.0, "rewards/chosen": -0.1416047364473343, "rewards/margins": 0.1420545130968094, "rewards/rejected": -0.28365927934646606, "step": 814 }, { "epoch": 1.2836879432624113, "grad_norm": 0.23532946407794952, "learning_rate": 1.3756957209309667e-06, "log_odds_chosen": 1.7262568473815918, "log_odds_ratio": -0.17713306844234467, "logits/chosen": -0.6322847604751587, "logits/rejected": -1.7657585144042969, "logps/chosen": -1.4623332023620605, "logps/rejected": -2.976186752319336, "loss": 1.4912, "nll_loss": 1.4735324382781982, "rewards/accuracies": 1.0, "rewards/chosen": -0.14623333513736725, "rewards/margins": 0.1513853520154953, "rewards/rejected": -0.29761865735054016, "step": 815 }, { "epoch": 1.285263987391647, "grad_norm": 0.2441030591726303, "learning_rate": 1.3704756640570575e-06, "log_odds_chosen": 1.7832515239715576, "log_odds_ratio": -0.17854107916355133, "logits/chosen": -0.663288414478302, "logits/rejected": -1.5476438999176025, "logps/chosen": -1.42481529712677, "logps/rejected": -2.986271619796753, "loss": 1.4527, "nll_loss": 1.434854507446289, "rewards/accuracies": 1.0, "rewards/chosen": -0.14248153567314148, "rewards/margins": 0.1561456173658371, "rewards/rejected": -0.2986271381378174, "step": 816 }, { "epoch": 1.2868400315208826, "grad_norm": 0.3268474042415619, "learning_rate": 1.3652603629388224e-06, "log_odds_chosen": 1.5470060110092163, "log_odds_ratio": -0.21487179398536682, "logits/chosen": -0.596767008304596, "logits/rejected": -1.5159244537353516, "logps/chosen": -1.3974632024765015, "logps/rejected": -2.725958824157715, "loss": 1.4215, "nll_loss": 1.3999779224395752, "rewards/accuracies": 1.0, "rewards/chosen": -0.13974632322788239, "rewards/margins": 0.1328495293855667, "rewards/rejected": -0.2725958526134491, "step": 817 }, { "epoch": 1.2884160756501182, "grad_norm": 0.23389191925525665, "learning_rate": 1.3600498569753715e-06, "log_odds_chosen": 1.4132609367370605, "log_odds_ratio": -0.2203066498041153, "logits/chosen": -0.45069289207458496, "logits/rejected": -1.3142499923706055, "logps/chosen": -1.4453165531158447, "logps/rejected": -2.6591105461120605, "loss": 1.4794, "nll_loss": 1.4573687314987183, "rewards/accuracies": 1.0, "rewards/chosen": -0.14453165233135223, "rewards/margins": 0.12137939780950546, "rewards/rejected": -0.2659110426902771, "step": 818 }, { "epoch": 1.2899921197793538, "grad_norm": 0.39004889130592346, "learning_rate": 1.3548441855295872e-06, "log_odds_chosen": 1.4098448753356934, "log_odds_ratio": -0.22153350710868835, "logits/chosen": -0.5996136665344238, "logits/rejected": -1.6268856525421143, "logps/chosen": -1.523671269416809, "logps/rejected": -2.753201723098755, "loss": 1.5461, "nll_loss": 1.5239500999450684, "rewards/accuracies": 1.0, "rewards/chosen": -0.15236711502075195, "rewards/margins": 0.12295305728912354, "rewards/rejected": -0.2753202021121979, "step": 819 }, { "epoch": 1.2915681639085894, "grad_norm": 0.23572242259979248, "learning_rate": 1.3496433879278315e-06, "log_odds_chosen": 1.4967519044876099, "log_odds_ratio": -0.22356414794921875, "logits/chosen": -0.5787345767021179, "logits/rejected": -1.5085010528564453, "logps/chosen": -1.5622023344039917, "logps/rejected": -2.886157989501953, "loss": 1.5728, "nll_loss": 1.5504934787750244, "rewards/accuracies": 1.0, "rewards/chosen": -0.15622025728225708, "rewards/margins": 0.13239558041095734, "rewards/rejected": -0.2886158227920532, "step": 820 }, { "epoch": 1.293144208037825, "grad_norm": 0.5434833765029907, "learning_rate": 1.3444475034596463e-06, "log_odds_chosen": 1.7494049072265625, "log_odds_ratio": -0.1667623519897461, "logits/chosen": -0.6446298360824585, "logits/rejected": -1.5591685771942139, "logps/chosen": -1.4353530406951904, "logps/rejected": -2.967869758605957, "loss": 1.4562, "nll_loss": 1.439555287361145, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435353010892868, "rewards/margins": 0.15325166285037994, "rewards/rejected": -0.29678699374198914, "step": 821 }, { "epoch": 1.2947202521670607, "grad_norm": 0.24766425788402557, "learning_rate": 1.3392565713774575e-06, "log_odds_chosen": 1.8081917762756348, "log_odds_ratio": -0.15730388462543488, "logits/chosen": -0.6573777198791504, "logits/rejected": -1.6533654928207397, "logps/chosen": -1.4503732919692993, "logps/rejected": -3.0400443077087402, "loss": 1.4829, "nll_loss": 1.4671614170074463, "rewards/accuracies": 1.0, "rewards/chosen": -0.14503732323646545, "rewards/margins": 0.15896710753440857, "rewards/rejected": -0.3040044605731964, "step": 822 }, { "epoch": 1.2962962962962963, "grad_norm": 0.275490939617157, "learning_rate": 1.3340706308962763e-06, "log_odds_chosen": 1.485947608947754, "log_odds_ratio": -0.21844175457954407, "logits/chosen": -0.6285545825958252, "logits/rejected": -1.2534183263778687, "logps/chosen": -1.4148553609848022, "logps/rejected": -2.692004442214966, "loss": 1.4459, "nll_loss": 1.4240984916687012, "rewards/accuracies": 1.0, "rewards/chosen": -0.1414855271577835, "rewards/margins": 0.12771493196487427, "rewards/rejected": -0.2692004442214966, "step": 823 }, { "epoch": 1.297872340425532, "grad_norm": 0.3741922378540039, "learning_rate": 1.3288897211934066e-06, "log_odds_chosen": 1.4314900636672974, "log_odds_ratio": -0.22909072041511536, "logits/chosen": -0.601670503616333, "logits/rejected": -1.247763752937317, "logps/chosen": -1.4129483699798584, "logps/rejected": -2.6406874656677246, "loss": 1.4388, "nll_loss": 1.415844440460205, "rewards/accuracies": 1.0, "rewards/chosen": -0.14129483699798584, "rewards/margins": 0.12277393043041229, "rewards/rejected": -0.26406875252723694, "step": 824 }, { "epoch": 1.2994483845547675, "grad_norm": 0.2457880675792694, "learning_rate": 1.323713881408147e-06, "log_odds_chosen": 1.3239305019378662, "log_odds_ratio": -0.2442399561405182, "logits/chosen": -0.5879520773887634, "logits/rejected": -1.5013256072998047, "logps/chosen": -1.465965747833252, "logps/rejected": -2.60659122467041, "loss": 1.4891, "nll_loss": 1.4646568298339844, "rewards/accuracies": 1.0, "rewards/chosen": -0.14659658074378967, "rewards/margins": 0.11406257003545761, "rewards/rejected": -0.2606591582298279, "step": 825 }, { "epoch": 1.3010244286840031, "grad_norm": 0.23192963004112244, "learning_rate": 1.3185431506414943e-06, "log_odds_chosen": 1.5322251319885254, "log_odds_ratio": -0.2040068507194519, "logits/chosen": -0.6286110877990723, "logits/rejected": -1.5630531311035156, "logps/chosen": -1.3600577116012573, "logps/rejected": -2.671229839324951, "loss": 1.3889, "nll_loss": 1.3684895038604736, "rewards/accuracies": 1.0, "rewards/chosen": -0.13600577414035797, "rewards/margins": 0.13111719489097595, "rewards/rejected": -0.26712295413017273, "step": 826 }, { "epoch": 1.3026004728132388, "grad_norm": 0.2781152129173279, "learning_rate": 1.313377567955851e-06, "log_odds_chosen": 1.226921796798706, "log_odds_ratio": -0.27446916699409485, "logits/chosen": -0.5895642638206482, "logits/rejected": -1.3028472661972046, "logps/chosen": -1.4906383752822876, "logps/rejected": -2.549145460128784, "loss": 1.5115, "nll_loss": 1.4840279817581177, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490638256072998, "rewards/margins": 0.10585072636604309, "rewards/rejected": -0.2549145519733429, "step": 827 }, { "epoch": 1.3041765169424744, "grad_norm": 0.24023838341236115, "learning_rate": 1.3082171723747257e-06, "log_odds_chosen": 1.3791821002960205, "log_odds_ratio": -0.2413456290960312, "logits/chosen": -0.7238616943359375, "logits/rejected": -1.4020274877548218, "logps/chosen": -1.4439319372177124, "logps/rejected": -2.6299452781677246, "loss": 1.4706, "nll_loss": 1.4465088844299316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443932056427002, "rewards/margins": 0.11860135197639465, "rewards/rejected": -0.26299452781677246, "step": 828 }, { "epoch": 1.30575256107171, "grad_norm": 0.2640969753265381, "learning_rate": 1.3030620028824424e-06, "log_odds_chosen": 1.3337830305099487, "log_odds_ratio": -0.24221113324165344, "logits/chosen": -0.6235900521278381, "logits/rejected": -1.3800705671310425, "logps/chosen": -1.458464503288269, "logps/rejected": -2.602323055267334, "loss": 1.4903, "nll_loss": 1.4661128520965576, "rewards/accuracies": 1.0, "rewards/chosen": -0.14584645628929138, "rewards/margins": 0.11438586562871933, "rewards/rejected": -0.2602323293685913, "step": 829 }, { "epoch": 1.3073286052009456, "grad_norm": 0.22373828291893005, "learning_rate": 1.2979120984238449e-06, "log_odds_chosen": 1.3962290287017822, "log_odds_ratio": -0.2311771810054779, "logits/chosen": -0.6370054483413696, "logits/rejected": -1.3749843835830688, "logps/chosen": -1.5016121864318848, "logps/rejected": -2.71468448638916, "loss": 1.5249, "nll_loss": 1.5017694234848022, "rewards/accuracies": 1.0, "rewards/chosen": -0.15016120672225952, "rewards/margins": 0.12130723893642426, "rewards/rejected": -0.27146846055984497, "step": 830 }, { "epoch": 1.3089046493301812, "grad_norm": 0.259819358587265, "learning_rate": 1.2927674979040009e-06, "log_odds_chosen": 1.3944642543792725, "log_odds_ratio": -0.22724983096122742, "logits/chosen": -0.5305383205413818, "logits/rejected": -1.2928234338760376, "logps/chosen": -1.3926821947097778, "logps/rejected": -2.578974962234497, "loss": 1.4403, "nll_loss": 1.4176024198532104, "rewards/accuracies": 1.0, "rewards/chosen": -0.13926823437213898, "rewards/margins": 0.11862929165363312, "rewards/rejected": -0.2578974962234497, "step": 831 }, { "epoch": 1.3104806934594169, "grad_norm": 0.2155289202928543, "learning_rate": 1.2876282401879106e-06, "log_odds_chosen": 1.5762255191802979, "log_odds_ratio": -0.19331632554531097, "logits/chosen": -0.5675473213195801, "logits/rejected": -1.423137903213501, "logps/chosen": -1.4213179349899292, "logps/rejected": -2.7864999771118164, "loss": 1.4538, "nll_loss": 1.4345142841339111, "rewards/accuracies": 1.0, "rewards/chosen": -0.14213180541992188, "rewards/margins": 0.136518195271492, "rewards/rejected": -0.2786499857902527, "step": 832 }, { "epoch": 1.3120567375886525, "grad_norm": 0.24451856315135956, "learning_rate": 1.2824943641002115e-06, "log_odds_chosen": 1.6182150840759277, "log_odds_ratio": -0.18711890280246735, "logits/chosen": -0.4954943060874939, "logits/rejected": -1.6108092069625854, "logps/chosen": -1.4183343648910522, "logps/rejected": -2.8229732513427734, "loss": 1.4494, "nll_loss": 1.4307172298431396, "rewards/accuracies": 1.0, "rewards/chosen": -0.14183342456817627, "rewards/margins": 0.1404639184474945, "rewards/rejected": -0.2822973430156708, "step": 833 }, { "epoch": 1.313632781717888, "grad_norm": 0.25867733359336853, "learning_rate": 1.2773659084248845e-06, "log_odds_chosen": 1.852787733078003, "log_odds_ratio": -0.17198441922664642, "logits/chosen": -0.7245616316795349, "logits/rejected": -1.652019739151001, "logps/chosen": -1.370871663093567, "logps/rejected": -2.991672992706299, "loss": 1.3967, "nll_loss": 1.3794749975204468, "rewards/accuracies": 1.0, "rewards/chosen": -0.1370871663093567, "rewards/margins": 0.16208013892173767, "rewards/rejected": -0.29916730523109436, "step": 834 }, { "epoch": 1.3152088258471237, "grad_norm": 0.23653243482112885, "learning_rate": 1.2722429119049632e-06, "log_odds_chosen": 1.2768532037734985, "log_odds_ratio": -0.2633589506149292, "logits/chosen": -0.5338461995124817, "logits/rejected": -1.3585484027862549, "logps/chosen": -1.503070592880249, "logps/rejected": -2.6095571517944336, "loss": 1.536, "nll_loss": 1.5097079277038574, "rewards/accuracies": 1.0, "rewards/chosen": -0.1503070592880249, "rewards/margins": 0.11064866185188293, "rewards/rejected": -0.26095569133758545, "step": 835 }, { "epoch": 1.3167848699763594, "grad_norm": 0.22052763402462006, "learning_rate": 1.267125413242239e-06, "log_odds_chosen": 1.5005533695220947, "log_odds_ratio": -0.21490009129047394, "logits/chosen": -0.6354590654373169, "logits/rejected": -1.3583699464797974, "logps/chosen": -1.3766456842422485, "logps/rejected": -2.661776065826416, "loss": 1.4172, "nll_loss": 1.3957130908966064, "rewards/accuracies": 1.0, "rewards/chosen": -0.1376645565032959, "rewards/margins": 0.12851302325725555, "rewards/rejected": -0.26617759466171265, "step": 836 }, { "epoch": 1.318360914105595, "grad_norm": 0.4790278673171997, "learning_rate": 1.2620134510969719e-06, "log_odds_chosen": 1.4586212635040283, "log_odds_ratio": -0.224677175283432, "logits/chosen": -0.6979159116744995, "logits/rejected": -1.2649545669555664, "logps/chosen": -1.3683842420578003, "logps/rejected": -2.615034341812134, "loss": 1.4085, "nll_loss": 1.386067509651184, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368384212255478, "rewards/margins": 0.1246650293469429, "rewards/rejected": -0.2615034580230713, "step": 837 }, { "epoch": 1.3199369582348306, "grad_norm": 0.2725180685520172, "learning_rate": 1.2569070640875912e-06, "log_odds_chosen": 1.6334424018859863, "log_odds_ratio": -0.20868034660816193, "logits/chosen": -0.5379756689071655, "logits/rejected": -1.443952202796936, "logps/chosen": -1.346409797668457, "logps/rejected": -2.7176432609558105, "loss": 1.3902, "nll_loss": 1.369292974472046, "rewards/accuracies": 1.0, "rewards/chosen": -0.13464096188545227, "rewards/margins": 0.13712337613105774, "rewards/rejected": -0.27176433801651, "step": 838 }, { "epoch": 1.3215130023640662, "grad_norm": 0.42804601788520813, "learning_rate": 1.2518062907904136e-06, "log_odds_chosen": 1.711381435394287, "log_odds_ratio": -0.18208424746990204, "logits/chosen": -0.6807981729507446, "logits/rejected": -1.630470871925354, "logps/chosen": -1.4140769243240356, "logps/rejected": -2.9069175720214844, "loss": 1.4442, "nll_loss": 1.4259682893753052, "rewards/accuracies": 1.0, "rewards/chosen": -0.14140769839286804, "rewards/margins": 0.14928403496742249, "rewards/rejected": -0.2906917333602905, "step": 839 }, { "epoch": 1.3230890464933018, "grad_norm": 0.24280807375907898, "learning_rate": 1.2467111697393446e-06, "log_odds_chosen": 1.4815362691879272, "log_odds_ratio": -0.21086756885051727, "logits/chosen": -0.6065419912338257, "logits/rejected": -1.6878957748413086, "logps/chosen": -1.5209952592849731, "logps/rejected": -2.8176212310791016, "loss": 1.5441, "nll_loss": 1.5230549573898315, "rewards/accuracies": 1.0, "rewards/chosen": -0.15209950506687164, "rewards/margins": 0.12966260313987732, "rewards/rejected": -0.28176212310791016, "step": 840 }, { "epoch": 1.3246650906225375, "grad_norm": 0.23113702237606049, "learning_rate": 1.2416217394255905e-06, "log_odds_chosen": 1.3655986785888672, "log_odds_ratio": -0.23284479975700378, "logits/chosen": -0.5418767929077148, "logits/rejected": -1.2516670227050781, "logps/chosen": -1.3566389083862305, "logps/rejected": -2.5103282928466797, "loss": 1.4175, "nll_loss": 1.394230842590332, "rewards/accuracies": 1.0, "rewards/chosen": -0.13566389679908752, "rewards/margins": 0.11536893248558044, "rewards/rejected": -0.25103282928466797, "step": 841 }, { "epoch": 1.326241134751773, "grad_norm": 0.27183687686920166, "learning_rate": 1.2365380382973669e-06, "log_odds_chosen": 1.2834677696228027, "log_odds_ratio": -0.2571195662021637, "logits/chosen": -0.6271907091140747, "logits/rejected": -1.198774814605713, "logps/chosen": -1.5170949697494507, "logps/rejected": -2.6317522525787354, "loss": 1.5319, "nll_loss": 1.5062181949615479, "rewards/accuracies": 1.0, "rewards/chosen": -0.15170949697494507, "rewards/margins": 0.11146571487188339, "rewards/rejected": -0.26317524909973145, "step": 842 }, { "epoch": 1.3278171788810087, "grad_norm": 0.24289065599441528, "learning_rate": 1.2314601047596061e-06, "log_odds_chosen": 1.746811032295227, "log_odds_ratio": -0.1659754067659378, "logits/chosen": -0.5585183501243591, "logits/rejected": -1.6068834066390991, "logps/chosen": -1.4367531538009644, "logps/rejected": -2.9650981426239014, "loss": 1.4522, "nll_loss": 1.4355918169021606, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436753123998642, "rewards/margins": 0.15283450484275818, "rewards/rejected": -0.2965098023414612, "step": 843 }, { "epoch": 1.3293932230102443, "grad_norm": 0.42870867252349854, "learning_rate": 1.2263879771736713e-06, "log_odds_chosen": 1.3868666887283325, "log_odds_ratio": -0.23231098055839539, "logits/chosen": -0.6453801989555359, "logits/rejected": -1.5813323259353638, "logps/chosen": -1.4578145742416382, "logps/rejected": -2.6517040729522705, "loss": 1.4862, "nll_loss": 1.462958812713623, "rewards/accuracies": 1.0, "rewards/chosen": -0.145781472325325, "rewards/margins": 0.11938894540071487, "rewards/rejected": -0.2651704251766205, "step": 844 }, { "epoch": 1.33096926713948, "grad_norm": 0.24256369471549988, "learning_rate": 1.2213216938570642e-06, "log_odds_chosen": 1.5715909004211426, "log_odds_ratio": -0.21311572194099426, "logits/chosen": -0.6242514848709106, "logits/rejected": -1.4461240768432617, "logps/chosen": -1.3803417682647705, "logps/rejected": -2.7272305488586426, "loss": 1.4229, "nll_loss": 1.401560664176941, "rewards/accuracies": 1.0, "rewards/chosen": -0.1380341649055481, "rewards/margins": 0.13468888401985168, "rewards/rejected": -0.27272307872772217, "step": 845 }, { "epoch": 1.3325453112687156, "grad_norm": 0.23952677845954895, "learning_rate": 1.2162612930831354e-06, "log_odds_chosen": 1.8564397096633911, "log_odds_ratio": -0.16202743351459503, "logits/chosen": -0.6841865181922913, "logits/rejected": -1.628495454788208, "logps/chosen": -1.2810691595077515, "logps/rejected": -2.8716635704040527, "loss": 1.3222, "nll_loss": 1.306032419204712, "rewards/accuracies": 1.0, "rewards/chosen": -0.12810692191123962, "rewards/margins": 0.15905943512916565, "rewards/rejected": -0.2871663570404053, "step": 846 }, { "epoch": 1.3341213553979512, "grad_norm": 0.2226683795452118, "learning_rate": 1.2112068130807949e-06, "log_odds_chosen": 1.9628236293792725, "log_odds_ratio": -0.13985173404216766, "logits/chosen": -0.6319723725318909, "logits/rejected": -1.795454978942871, "logps/chosen": -1.4285566806793213, "logps/rejected": -3.163897752761841, "loss": 1.4383, "nll_loss": 1.4242912530899048, "rewards/accuracies": 1.0, "rewards/chosen": -0.1428556740283966, "rewards/margins": 0.173534095287323, "rewards/rejected": -0.3163897693157196, "step": 847 }, { "epoch": 1.3356973995271868, "grad_norm": 0.26131734251976013, "learning_rate": 1.206158292034226e-06, "log_odds_chosen": 1.6567684412002563, "log_odds_ratio": -0.18120090663433075, "logits/chosen": -0.6492166519165039, "logits/rejected": -1.5648795366287231, "logps/chosen": -1.4466544389724731, "logps/rejected": -2.8918793201446533, "loss": 1.4714, "nll_loss": 1.4533123970031738, "rewards/accuracies": 1.0, "rewards/chosen": -0.1446654498577118, "rewards/margins": 0.1445225179195404, "rewards/rejected": -0.2891879677772522, "step": 848 }, { "epoch": 1.3372734436564224, "grad_norm": 0.253273069858551, "learning_rate": 1.2011157680825928e-06, "log_odds_chosen": 1.6794590950012207, "log_odds_ratio": -0.18548327684402466, "logits/chosen": -0.5395171046257019, "logits/rejected": -1.617524266242981, "logps/chosen": -1.3760827779769897, "logps/rejected": -2.8302364349365234, "loss": 1.4092, "nll_loss": 1.3906164169311523, "rewards/accuracies": 1.0, "rewards/chosen": -0.13760828971862793, "rewards/margins": 0.14541538059711456, "rewards/rejected": -0.2830236554145813, "step": 849 }, { "epoch": 1.338849487785658, "grad_norm": 0.24384582042694092, "learning_rate": 1.1960792793197553e-06, "log_odds_chosen": 1.6050926446914673, "log_odds_ratio": -0.1991470605134964, "logits/chosen": -0.707046389579773, "logits/rejected": -1.533729076385498, "logps/chosen": -1.427878975868225, "logps/rejected": -2.820812225341797, "loss": 1.4542, "nll_loss": 1.43429696559906, "rewards/accuracies": 1.0, "rewards/chosen": -0.14278791844844818, "rewards/margins": 0.13929331302642822, "rewards/rejected": -0.2820812165737152, "step": 850 }, { "epoch": 1.3404255319148937, "grad_norm": 0.24895353615283966, "learning_rate": 1.1910488637939824e-06, "log_odds_chosen": 1.9199038743972778, "log_odds_ratio": -0.1412464827299118, "logits/chosen": -0.7576161623001099, "logits/rejected": -1.6525764465332031, "logps/chosen": -1.4692214727401733, "logps/rejected": -3.17101788520813, "loss": 1.4715, "nll_loss": 1.4573686122894287, "rewards/accuracies": 1.0, "rewards/chosen": -0.14692214131355286, "rewards/margins": 0.17017965018749237, "rewards/rejected": -0.31710177659988403, "step": 851 }, { "epoch": 1.3420015760441293, "grad_norm": 0.26812073588371277, "learning_rate": 1.1860245595076582e-06, "log_odds_chosen": 1.313482642173767, "log_odds_ratio": -0.24426017701625824, "logits/chosen": -0.52881920337677, "logits/rejected": -1.493924617767334, "logps/chosen": -1.5094387531280518, "logps/rejected": -2.6473355293273926, "loss": 1.514, "nll_loss": 1.489540696144104, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094387531280518, "rewards/margins": 0.11378967761993408, "rewards/rejected": -0.26473355293273926, "step": 852 }, { "epoch": 1.343577620173365, "grad_norm": 0.2410273551940918, "learning_rate": 1.1810064044170027e-06, "log_odds_chosen": 1.636589765548706, "log_odds_ratio": -0.19249330461025238, "logits/chosen": -0.6974166035652161, "logits/rejected": -1.5706920623779297, "logps/chosen": -1.39455246925354, "logps/rejected": -2.8113789558410645, "loss": 1.4253, "nll_loss": 1.4060957431793213, "rewards/accuracies": 1.0, "rewards/chosen": -0.13945524394512177, "rewards/margins": 0.1416826695203781, "rewards/rejected": -0.2811379134654999, "step": 853 }, { "epoch": 1.3451536643026005, "grad_norm": 0.2574176490306854, "learning_rate": 1.1759944364317812e-06, "log_odds_chosen": 1.603165626525879, "log_odds_ratio": -0.19906370341777802, "logits/chosen": -0.6916913986206055, "logits/rejected": -1.5008288621902466, "logps/chosen": -1.4492762088775635, "logps/rejected": -2.8470959663391113, "loss": 1.4688, "nll_loss": 1.4488977193832397, "rewards/accuracies": 1.0, "rewards/chosen": -0.14492763578891754, "rewards/margins": 0.13978195190429688, "rewards/rejected": -0.2847095727920532, "step": 854 }, { "epoch": 1.3467297084318361, "grad_norm": 0.2300931215286255, "learning_rate": 1.1709886934150172e-06, "log_odds_chosen": 1.4698625802993774, "log_odds_ratio": -0.21434006094932556, "logits/chosen": -0.566104531288147, "logits/rejected": -1.3819411993026733, "logps/chosen": -1.471221923828125, "logps/rejected": -2.7428500652313232, "loss": 1.5042, "nll_loss": 1.4827244281768799, "rewards/accuracies": 1.0, "rewards/chosen": -0.14712218940258026, "rewards/margins": 0.12716282904148102, "rewards/rejected": -0.2742850184440613, "step": 855 }, { "epoch": 1.3483057525610718, "grad_norm": 0.25239959359169006, "learning_rate": 1.1659892131827097e-06, "log_odds_chosen": 1.6053402423858643, "log_odds_ratio": -0.1971941441297531, "logits/chosen": -0.6730937361717224, "logits/rejected": -1.7223103046417236, "logps/chosen": -1.4930871725082397, "logps/rejected": -2.903635025024414, "loss": 1.5094, "nll_loss": 1.489640474319458, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493087261915207, "rewards/margins": 0.14105476438999176, "rewards/rejected": -0.29036349058151245, "step": 856 }, { "epoch": 1.3498817966903074, "grad_norm": 0.21717876195907593, "learning_rate": 1.1609960335035423e-06, "log_odds_chosen": 2.0900535583496094, "log_odds_ratio": -0.12933163344860077, "logits/chosen": -0.642082691192627, "logits/rejected": -1.740064263343811, "logps/chosen": -1.4793895483016968, "logps/rejected": -3.3437135219573975, "loss": 1.4977, "nll_loss": 1.4847984313964844, "rewards/accuracies": 1.0, "rewards/chosen": -0.14793896675109863, "rewards/margins": 0.1864323616027832, "rewards/rejected": -0.33437132835388184, "step": 857 }, { "epoch": 1.351457840819543, "grad_norm": 0.24051296710968018, "learning_rate": 1.1560091920986028e-06, "log_odds_chosen": 1.7658697366714478, "log_odds_ratio": -0.1659710705280304, "logits/chosen": -0.642181932926178, "logits/rejected": -1.6060584783554077, "logps/chosen": -1.3994590044021606, "logps/rejected": -2.937842607498169, "loss": 1.4321, "nll_loss": 1.4154534339904785, "rewards/accuracies": 1.0, "rewards/chosen": -0.13994590938091278, "rewards/margins": 0.1538383513689041, "rewards/rejected": -0.2937842607498169, "step": 858 }, { "epoch": 1.3530338849487786, "grad_norm": 0.23283112049102783, "learning_rate": 1.1510287266410967e-06, "log_odds_chosen": 1.4090423583984375, "log_odds_ratio": -0.2285042703151703, "logits/chosen": -0.6504819989204407, "logits/rejected": -1.4080336093902588, "logps/chosen": -1.5133273601531982, "logps/rejected": -2.743722915649414, "loss": 1.5328, "nll_loss": 1.5099424123764038, "rewards/accuracies": 1.0, "rewards/chosen": -0.15133275091648102, "rewards/margins": 0.12303955107927322, "rewards/rejected": -0.27437227964401245, "step": 859 }, { "epoch": 1.3546099290780143, "grad_norm": 0.2225169539451599, "learning_rate": 1.1460546747560616e-06, "log_odds_chosen": 1.8164125680923462, "log_odds_ratio": -0.15852658450603485, "logits/chosen": -0.6664764285087585, "logits/rejected": -1.714491367340088, "logps/chosen": -1.4432940483093262, "logps/rejected": -3.0411689281463623, "loss": 1.4546, "nll_loss": 1.438779592514038, "rewards/accuracies": 1.0, "rewards/chosen": -0.14432939887046814, "rewards/margins": 0.15978752076625824, "rewards/rejected": -0.3041169047355652, "step": 860 }, { "epoch": 1.3561859732072499, "grad_norm": 0.2316436767578125, "learning_rate": 1.1410870740200839e-06, "log_odds_chosen": 1.617644190788269, "log_odds_ratio": -0.20058351755142212, "logits/chosen": -0.6289576292037964, "logits/rejected": -1.5740225315093994, "logps/chosen": -1.493541955947876, "logps/rejected": -2.917174816131592, "loss": 1.5095, "nll_loss": 1.489485263824463, "rewards/accuracies": 1.0, "rewards/chosen": -0.14935418963432312, "rewards/margins": 0.1423633098602295, "rewards/rejected": -0.2917174994945526, "step": 861 }, { "epoch": 1.3577620173364855, "grad_norm": 0.24486534297466278, "learning_rate": 1.1361259619610138e-06, "log_odds_chosen": 1.665561318397522, "log_odds_ratio": -0.1871521770954132, "logits/chosen": -0.6418501734733582, "logits/rejected": -1.6335361003875732, "logps/chosen": -1.431355595588684, "logps/rejected": -2.87868332862854, "loss": 1.4629, "nll_loss": 1.4441808462142944, "rewards/accuracies": 1.0, "rewards/chosen": -0.14313554763793945, "rewards/margins": 0.14473280310630798, "rewards/rejected": -0.28786835074424744, "step": 862 }, { "epoch": 1.3593380614657211, "grad_norm": 0.7537965178489685, "learning_rate": 1.1311713760576834e-06, "log_odds_chosen": 1.709722638130188, "log_odds_ratio": -0.1851150393486023, "logits/chosen": -0.7632265686988831, "logits/rejected": -1.3779895305633545, "logps/chosen": -1.4145159721374512, "logps/rejected": -2.9023544788360596, "loss": 1.4244, "nll_loss": 1.4058459997177124, "rewards/accuracies": 1.0, "rewards/chosen": -0.14145159721374512, "rewards/margins": 0.1487838327884674, "rewards/rejected": -0.2902354598045349, "step": 863 }, { "epoch": 1.3609141055949567, "grad_norm": 0.24928341805934906, "learning_rate": 1.1262233537396228e-06, "log_odds_chosen": 1.9565744400024414, "log_odds_ratio": -0.1643376350402832, "logits/chosen": -0.5985521078109741, "logits/rejected": -1.5526808500289917, "logps/chosen": -1.390761375427246, "logps/rejected": -3.1119015216827393, "loss": 1.4031, "nll_loss": 1.3867006301879883, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390761286020279, "rewards/margins": 0.1721140295267105, "rewards/rejected": -0.3111901879310608, "step": 864 }, { "epoch": 1.3624901497241924, "grad_norm": 0.22759051620960236, "learning_rate": 1.1212819323867778e-06, "log_odds_chosen": 1.6447187662124634, "log_odds_ratio": -0.19922199845314026, "logits/chosen": -0.6582179665565491, "logits/rejected": -1.5183279514312744, "logps/chosen": -1.3644665479660034, "logps/rejected": -2.7891225814819336, "loss": 1.4077, "nll_loss": 1.387730598449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.13644665479660034, "rewards/margins": 0.14246559143066406, "rewards/rejected": -0.2789122462272644, "step": 865 }, { "epoch": 1.364066193853428, "grad_norm": 0.24368128180503845, "learning_rate": 1.1163471493292267e-06, "log_odds_chosen": 1.6180881261825562, "log_odds_ratio": -0.19148674607276917, "logits/chosen": -0.6676050424575806, "logits/rejected": -1.5187058448791504, "logps/chosen": -1.446605920791626, "logps/rejected": -2.8559176921844482, "loss": 1.4644, "nll_loss": 1.4452784061431885, "rewards/accuracies": 1.0, "rewards/chosen": -0.1446605622768402, "rewards/margins": 0.14093118906021118, "rewards/rejected": -0.2855917513370514, "step": 866 }, { "epoch": 1.3656422379826636, "grad_norm": 0.21349558234214783, "learning_rate": 1.1114190418468972e-06, "log_odds_chosen": 1.581590175628662, "log_odds_ratio": -0.20506787300109863, "logits/chosen": -0.5727652311325073, "logits/rejected": -1.5056498050689697, "logps/chosen": -1.466938853263855, "logps/rejected": -2.853933811187744, "loss": 1.5026, "nll_loss": 1.4820456504821777, "rewards/accuracies": 1.0, "rewards/chosen": -0.1466939002275467, "rewards/margins": 0.138699471950531, "rewards/rejected": -0.2853933870792389, "step": 867 }, { "epoch": 1.367218282111899, "grad_norm": 0.23098094761371613, "learning_rate": 1.106497647169288e-06, "log_odds_chosen": 1.7774879932403564, "log_odds_ratio": -0.1619657725095749, "logits/chosen": -0.5783388018608093, "logits/rejected": -1.6011881828308105, "logps/chosen": -1.392938494682312, "logps/rejected": -2.9401845932006836, "loss": 1.4095, "nll_loss": 1.3932610750198364, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392938792705536, "rewards/margins": 0.15472456812858582, "rewards/rejected": -0.2940184473991394, "step": 868 }, { "epoch": 1.3687943262411348, "grad_norm": 0.22277240455150604, "learning_rate": 1.1015830024751854e-06, "log_odds_chosen": 1.5932520627975464, "log_odds_ratio": -0.19965462386608124, "logits/chosen": -0.7604570388793945, "logits/rejected": -1.3756550550460815, "logps/chosen": -1.3519713878631592, "logps/rejected": -2.720393180847168, "loss": 1.3854, "nll_loss": 1.3654124736785889, "rewards/accuracies": 1.0, "rewards/chosen": -0.13519714772701263, "rewards/margins": 0.13684219121932983, "rewards/rejected": -0.2720393240451813, "step": 869 }, { "epoch": 1.3703703703703702, "grad_norm": 0.22521238029003143, "learning_rate": 1.0966751448923834e-06, "log_odds_chosen": 2.0099873542785645, "log_odds_ratio": -0.1357879936695099, "logits/chosen": -0.5918890833854675, "logits/rejected": -1.5549322366714478, "logps/chosen": -1.3542336225509644, "logps/rejected": -3.1139070987701416, "loss": 1.3828, "nll_loss": 1.3691737651824951, "rewards/accuracies": 1.0, "rewards/chosen": -0.13542336225509644, "rewards/margins": 0.17596739530563354, "rewards/rejected": -0.31139075756073, "step": 870 }, { "epoch": 1.371946414499606, "grad_norm": 0.24158324301242828, "learning_rate": 1.0917741114974007e-06, "log_odds_chosen": 1.520578145980835, "log_odds_ratio": -0.2033848613500595, "logits/chosen": -0.5592161417007446, "logits/rejected": -1.544834017753601, "logps/chosen": -1.4935863018035889, "logps/rejected": -2.8185956478118896, "loss": 1.5161, "nll_loss": 1.4957417249679565, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493586301803589, "rewards/margins": 0.13250091671943665, "rewards/rejected": -0.28185954689979553, "step": 871 }, { "epoch": 1.3735224586288415, "grad_norm": 0.2237880378961563, "learning_rate": 1.0868799393152035e-06, "log_odds_chosen": 1.8863072395324707, "log_odds_ratio": -0.1634805053472519, "logits/chosen": -0.7058467268943787, "logits/rejected": -1.620200276374817, "logps/chosen": -1.3770359754562378, "logps/rejected": -3.026153087615967, "loss": 1.4051, "nll_loss": 1.3887983560562134, "rewards/accuracies": 1.0, "rewards/chosen": -0.13770359754562378, "rewards/margins": 0.16491171717643738, "rewards/rejected": -0.30261531472206116, "step": 872 }, { "epoch": 1.3750985027580773, "grad_norm": 0.28563395142555237, "learning_rate": 1.0819926653189271e-06, "log_odds_chosen": 1.5606904029846191, "log_odds_ratio": -0.2044568657875061, "logits/chosen": -0.6395079493522644, "logits/rejected": -1.3664402961730957, "logps/chosen": -1.4640522003173828, "logps/rejected": -2.8246538639068604, "loss": 1.4915, "nll_loss": 1.471075177192688, "rewards/accuracies": 1.0, "rewards/chosen": -0.14640523493289948, "rewards/margins": 0.13606014847755432, "rewards/rejected": -0.282465398311615, "step": 873 }, { "epoch": 1.3766745468873127, "grad_norm": 0.21589653193950653, "learning_rate": 1.0771123264295895e-06, "log_odds_chosen": 1.5921666622161865, "log_odds_ratio": -0.19474714994430542, "logits/chosen": -0.6634510159492493, "logits/rejected": -1.6877684593200684, "logps/chosen": -1.3579119443893433, "logps/rejected": -2.720470905303955, "loss": 1.3885, "nll_loss": 1.3689954280853271, "rewards/accuracies": 1.0, "rewards/chosen": -0.13579118251800537, "rewards/margins": 0.1362559199333191, "rewards/rejected": -0.27204710245132446, "step": 874 }, { "epoch": 1.3782505910165486, "grad_norm": 0.22386837005615234, "learning_rate": 1.0722389595158215e-06, "log_odds_chosen": 1.8254528045654297, "log_odds_ratio": -0.16639356315135956, "logits/chosen": -0.5494032502174377, "logits/rejected": -1.5416202545166016, "logps/chosen": -1.3461651802062988, "logps/rejected": -2.931638717651367, "loss": 1.382, "nll_loss": 1.3653497695922852, "rewards/accuracies": 1.0, "rewards/chosen": -0.13461652398109436, "rewards/margins": 0.15854734182357788, "rewards/rejected": -0.29316386580467224, "step": 875 }, { "epoch": 1.379826635145784, "grad_norm": 0.2681967318058014, "learning_rate": 1.0673726013935827e-06, "log_odds_chosen": 1.7635384798049927, "log_odds_ratio": -0.19242730736732483, "logits/chosen": -0.5731449723243713, "logits/rejected": -1.4597039222717285, "logps/chosen": -1.3603630065917969, "logps/rejected": -2.890061140060425, "loss": 1.3968, "nll_loss": 1.3775389194488525, "rewards/accuracies": 1.0, "rewards/chosen": -0.13603630661964417, "rewards/margins": 0.1529698371887207, "rewards/rejected": -0.28900614380836487, "step": 876 }, { "epoch": 1.3814026792750198, "grad_norm": 0.22812511026859283, "learning_rate": 1.0625132888258833e-06, "log_odds_chosen": 1.6586129665374756, "log_odds_ratio": -0.19311630725860596, "logits/chosen": -0.701583206653595, "logits/rejected": -1.421006679534912, "logps/chosen": -1.4265589714050293, "logps/rejected": -2.8771860599517822, "loss": 1.4502, "nll_loss": 1.4308946132659912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1426558941602707, "rewards/margins": 0.14506272971630096, "rewards/rejected": -0.28771862387657166, "step": 877 }, { "epoch": 1.3829787234042552, "grad_norm": 0.2343727946281433, "learning_rate": 1.057661058522509e-06, "log_odds_chosen": 1.8158915042877197, "log_odds_ratio": -0.15713225305080414, "logits/chosen": -0.6596369743347168, "logits/rejected": -1.681816816329956, "logps/chosen": -1.4494779109954834, "logps/rejected": -3.045167922973633, "loss": 1.4767, "nll_loss": 1.4609739780426025, "rewards/accuracies": 1.0, "rewards/chosen": -0.144947811961174, "rewards/margins": 0.15956899523735046, "rewards/rejected": -0.3045167922973633, "step": 878 }, { "epoch": 1.384554767533491, "grad_norm": 0.5640384554862976, "learning_rate": 1.0528159471397425e-06, "log_odds_chosen": 1.5712882280349731, "log_odds_ratio": -0.20260006189346313, "logits/chosen": -0.626277506351471, "logits/rejected": -1.5943480730056763, "logps/chosen": -1.3989372253417969, "logps/rejected": -2.756653070449829, "loss": 1.4339, "nll_loss": 1.4136770963668823, "rewards/accuracies": 1.0, "rewards/chosen": -0.13989374041557312, "rewards/margins": 0.13577157258987427, "rewards/rejected": -0.275665283203125, "step": 879 }, { "epoch": 1.3861308116627264, "grad_norm": 0.2508867383003235, "learning_rate": 1.0479779912800868e-06, "log_odds_chosen": 1.6103177070617676, "log_odds_ratio": -0.19111379981040955, "logits/chosen": -0.758451521396637, "logits/rejected": -1.5467413663864136, "logps/chosen": -1.3675017356872559, "logps/rejected": -2.7500529289245605, "loss": 1.4019, "nll_loss": 1.382838249206543, "rewards/accuracies": 1.0, "rewards/chosen": -0.13675017654895782, "rewards/margins": 0.13825511932373047, "rewards/rejected": -0.2750052809715271, "step": 880 }, { "epoch": 1.3877068557919623, "grad_norm": 0.22995373606681824, "learning_rate": 1.0431472274919863e-06, "log_odds_chosen": 1.8982397317886353, "log_odds_ratio": -0.1501951813697815, "logits/chosen": -0.547713041305542, "logits/rejected": -1.669274926185608, "logps/chosen": -1.3867900371551514, "logps/rejected": -3.0491251945495605, "loss": 1.4107, "nll_loss": 1.395638108253479, "rewards/accuracies": 1.0, "rewards/chosen": -0.13867899775505066, "rewards/margins": 0.16623355448246002, "rewards/rejected": -0.3049125671386719, "step": 881 }, { "epoch": 1.3892828999211977, "grad_norm": 0.24104370176792145, "learning_rate": 1.0383236922695543e-06, "log_odds_chosen": 1.910417914390564, "log_odds_ratio": -0.1467703878879547, "logits/chosen": -0.4995293915271759, "logits/rejected": -1.5275884866714478, "logps/chosen": -1.2323276996612549, "logps/rejected": -2.8423500061035156, "loss": 1.2803, "nll_loss": 1.2656131982803345, "rewards/accuracies": 1.0, "rewards/chosen": -0.12323278933763504, "rewards/margins": 0.16100221872329712, "rewards/rejected": -0.28423500061035156, "step": 882 }, { "epoch": 1.3908589440504335, "grad_norm": 0.25414207577705383, "learning_rate": 1.0335074220522962e-06, "log_odds_chosen": 1.8656892776489258, "log_odds_ratio": -0.16491052508354187, "logits/chosen": -0.6115697026252747, "logits/rejected": -1.4091947078704834, "logps/chosen": -1.4603948593139648, "logps/rejected": -3.113933563232422, "loss": 1.4856, "nll_loss": 1.4691309928894043, "rewards/accuracies": 1.0, "rewards/chosen": -0.14603950083255768, "rewards/margins": 0.16535384953022003, "rewards/rejected": -0.3113933503627777, "step": 883 }, { "epoch": 1.392434988179669, "grad_norm": 0.26081377267837524, "learning_rate": 1.0286984532248326e-06, "log_odds_chosen": 1.367045521736145, "log_odds_ratio": -0.24221490323543549, "logits/chosen": -0.5516694188117981, "logits/rejected": -1.562090277671814, "logps/chosen": -1.4935163259506226, "logps/rejected": -2.6765871047973633, "loss": 1.5151, "nll_loss": 1.4908676147460938, "rewards/accuracies": 1.0, "rewards/chosen": -0.14935162663459778, "rewards/margins": 0.11830709874629974, "rewards/rejected": -0.2676587402820587, "step": 884 }, { "epoch": 1.3940110323089048, "grad_norm": 0.2518186867237091, "learning_rate": 1.0238968221166269e-06, "log_odds_chosen": 1.606108546257019, "log_odds_ratio": -0.20955884456634521, "logits/chosen": -0.6107924580574036, "logits/rejected": -1.5653332471847534, "logps/chosen": -1.451974868774414, "logps/rejected": -2.859508991241455, "loss": 1.4846, "nll_loss": 1.4636868238449097, "rewards/accuracies": 1.0, "rewards/chosen": -0.14519749581813812, "rewards/margins": 0.1407533884048462, "rewards/rejected": -0.2859508693218231, "step": 885 }, { "epoch": 1.3955870764381402, "grad_norm": 0.23638294637203217, "learning_rate": 1.019102565001707e-06, "log_odds_chosen": 1.4686362743377686, "log_odds_ratio": -0.22250008583068848, "logits/chosen": -0.585566520690918, "logits/rejected": -1.3059039115905762, "logps/chosen": -1.4826006889343262, "logps/rejected": -2.76189923286438, "loss": 1.507, "nll_loss": 1.484725832939148, "rewards/accuracies": 1.0, "rewards/chosen": -0.14826007187366486, "rewards/margins": 0.12792986631393433, "rewards/rejected": -0.276189923286438, "step": 886 }, { "epoch": 1.397163120567376, "grad_norm": 0.2579563856124878, "learning_rate": 1.0143157180983965e-06, "log_odds_chosen": 1.5000890493392944, "log_odds_ratio": -0.2140539139509201, "logits/chosen": -0.6484724283218384, "logits/rejected": -1.4389346837997437, "logps/chosen": -1.4671251773834229, "logps/rejected": -2.7712459564208984, "loss": 1.4666, "nll_loss": 1.4452418088912964, "rewards/accuracies": 1.0, "rewards/chosen": -0.1467125117778778, "rewards/margins": 0.13041208684444427, "rewards/rejected": -0.2771245837211609, "step": 887 }, { "epoch": 1.3987391646966114, "grad_norm": 0.3018769919872284, "learning_rate": 1.0095363175690375e-06, "log_odds_chosen": 1.5418834686279297, "log_odds_ratio": -0.20728465914726257, "logits/chosen": -0.6682940125465393, "logits/rejected": -1.2616838216781616, "logps/chosen": -1.4611940383911133, "logps/rejected": -2.8027215003967285, "loss": 1.4726, "nll_loss": 1.4518331289291382, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461194008588791, "rewards/margins": 0.13415274024009705, "rewards/rejected": -0.28027215600013733, "step": 888 }, { "epoch": 1.4003152088258473, "grad_norm": 0.2607669234275818, "learning_rate": 1.0047643995197178e-06, "log_odds_chosen": 1.623878836631775, "log_odds_ratio": -0.1956598311662674, "logits/chosen": -0.5829888582229614, "logits/rejected": -1.5397287607192993, "logps/chosen": -1.3773345947265625, "logps/rejected": -2.7731235027313232, "loss": 1.403, "nll_loss": 1.383469581604004, "rewards/accuracies": 1.0, "rewards/chosen": -0.13773347437381744, "rewards/margins": 0.1395788937807083, "rewards/rejected": -0.27731236815452576, "step": 889 }, { "epoch": 1.4018912529550827, "grad_norm": 0.25077128410339355, "learning_rate": 1.0000000000000004e-06, "log_odds_chosen": 1.5483626127243042, "log_odds_ratio": -0.20669405162334442, "logits/chosen": -0.5734342336654663, "logits/rejected": -1.4856351613998413, "logps/chosen": -1.424758791923523, "logps/rejected": -2.760406255722046, "loss": 1.4639, "nll_loss": 1.4432692527770996, "rewards/accuracies": 1.0, "rewards/chosen": -0.142475888133049, "rewards/margins": 0.13356474041938782, "rewards/rejected": -0.27604061365127563, "step": 890 }, { "epoch": 1.4034672970843185, "grad_norm": 0.22610604763031006, "learning_rate": 9.952431550026459e-07, "log_odds_chosen": 1.6889674663543701, "log_odds_ratio": -0.18497657775878906, "logits/chosen": -0.7005455493927002, "logits/rejected": -1.4289085865020752, "logps/chosen": -1.4461010694503784, "logps/rejected": -2.9273979663848877, "loss": 1.4737, "nll_loss": 1.455183744430542, "rewards/accuracies": 1.0, "rewards/chosen": -0.14461010694503784, "rewards/margins": 0.14812970161437988, "rewards/rejected": -0.2927398085594177, "step": 891 }, { "epoch": 1.405043341213554, "grad_norm": 0.24834203720092773, "learning_rate": 9.904939004633471e-07, "log_odds_chosen": 1.790747046470642, "log_odds_ratio": -0.1575406938791275, "logits/chosen": -0.7561191916465759, "logits/rejected": -1.595555305480957, "logps/chosen": -1.5042771100997925, "logps/rejected": -3.087547540664673, "loss": 1.5081, "nll_loss": 1.4923908710479736, "rewards/accuracies": 1.0, "rewards/chosen": -0.1504276990890503, "rewards/margins": 0.15832704305648804, "rewards/rejected": -0.30875474214553833, "step": 892 }, { "epoch": 1.4066193853427895, "grad_norm": 0.2530110776424408, "learning_rate": 9.857522722604536e-07, "log_odds_chosen": 1.5290027856826782, "log_odds_ratio": -0.22384284436702728, "logits/chosen": -0.6061379909515381, "logits/rejected": -1.227806806564331, "logps/chosen": -1.3785853385925293, "logps/rejected": -2.6948788166046143, "loss": 1.424, "nll_loss": 1.4015873670578003, "rewards/accuracies": 1.0, "rewards/chosen": -0.1378585398197174, "rewards/margins": 0.1316293627023697, "rewards/rejected": -0.2694878876209259, "step": 893 }, { "epoch": 1.4081954294720251, "grad_norm": 0.2474088817834854, "learning_rate": 9.81018306214702e-07, "log_odds_chosen": 1.502267599105835, "log_odds_ratio": -0.2099406123161316, "logits/chosen": -0.6570034027099609, "logits/rejected": -1.3393932580947876, "logps/chosen": -1.3909960985183716, "logps/rejected": -2.6762640476226807, "loss": 1.4395, "nll_loss": 1.4185012578964233, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390996128320694, "rewards/margins": 0.12852680683135986, "rewards/rejected": -0.26762640476226807, "step": 894 }, { "epoch": 1.4097714736012608, "grad_norm": 0.33020880818367004, "learning_rate": 9.76292038088945e-07, "log_odds_chosen": 1.5085315704345703, "log_odds_ratio": -0.20608478784561157, "logits/chosen": -0.5883653163909912, "logits/rejected": -1.3349723815917969, "logps/chosen": -1.4683583974838257, "logps/rejected": -2.7802469730377197, "loss": 1.4866, "nll_loss": 1.4659663438796997, "rewards/accuracies": 1.0, "rewards/chosen": -0.14683584868907928, "rewards/margins": 0.13118883967399597, "rewards/rejected": -0.27802467346191406, "step": 895 }, { "epoch": 1.4113475177304964, "grad_norm": 0.44886964559555054, "learning_rate": 9.715735035878799e-07, "log_odds_chosen": 1.7419822216033936, "log_odds_ratio": -0.17037026584148407, "logits/chosen": -0.6762113571166992, "logits/rejected": -1.5708619356155396, "logps/chosen": -1.3584445714950562, "logps/rejected": -2.8568472862243652, "loss": 1.3838, "nll_loss": 1.3667407035827637, "rewards/accuracies": 1.0, "rewards/chosen": -0.13584445416927338, "rewards/margins": 0.14984026551246643, "rewards/rejected": -0.285684734582901, "step": 896 }, { "epoch": 1.412923561859732, "grad_norm": 0.23925159871578217, "learning_rate": 9.668627383577812e-07, "log_odds_chosen": 1.632826805114746, "log_odds_ratio": -0.21078652143478394, "logits/chosen": -0.6347646117210388, "logits/rejected": -1.6543445587158203, "logps/chosen": -1.4457504749298096, "logps/rejected": -2.8781142234802246, "loss": 1.4757, "nll_loss": 1.454664945602417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1445750594139099, "rewards/margins": 0.14323639869689941, "rewards/rejected": -0.2878114581108093, "step": 897 }, { "epoch": 1.4144996059889676, "grad_norm": 0.26766297221183777, "learning_rate": 9.621597779862307e-07, "log_odds_chosen": 1.3921657800674438, "log_odds_ratio": -0.23004251718521118, "logits/chosen": -0.708504319190979, "logits/rejected": -1.4522309303283691, "logps/chosen": -1.4511884450912476, "logps/rejected": -2.647890329360962, "loss": 1.4749, "nll_loss": 1.4518711566925049, "rewards/accuracies": 1.0, "rewards/chosen": -0.1451188623905182, "rewards/margins": 0.11967018246650696, "rewards/rejected": -0.26478904485702515, "step": 898 }, { "epoch": 1.4160756501182032, "grad_norm": 0.2688276171684265, "learning_rate": 9.57464658001848e-07, "log_odds_chosen": 1.5197861194610596, "log_odds_ratio": -0.20404323935508728, "logits/chosen": -0.6154606938362122, "logits/rejected": -1.4863282442092896, "logps/chosen": -1.457461953163147, "logps/rejected": -2.777696371078491, "loss": 1.4822, "nll_loss": 1.4618126153945923, "rewards/accuracies": 1.0, "rewards/chosen": -0.14574620127677917, "rewards/margins": 0.13202345371246338, "rewards/rejected": -0.27776965498924255, "step": 899 }, { "epoch": 1.4176516942474389, "grad_norm": 0.2558063268661499, "learning_rate": 9.527774138740212e-07, "log_odds_chosen": 1.5778982639312744, "log_odds_ratio": -0.19274799525737762, "logits/chosen": -0.7722602486610413, "logits/rejected": -1.5500413179397583, "logps/chosen": -1.417495608329773, "logps/rejected": -2.7712903022766113, "loss": 1.4513, "nll_loss": 1.4320013523101807, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417495608329773, "rewards/margins": 0.13537943363189697, "rewards/rejected": -0.27712899446487427, "step": 900 }, { "epoch": 1.4192277383766745, "grad_norm": 0.28994813561439514, "learning_rate": 9.480980810126411e-07, "log_odds_chosen": 1.7850899696350098, "log_odds_ratio": -0.19235913455486298, "logits/chosen": -0.7060902118682861, "logits/rejected": -1.4891314506530762, "logps/chosen": -1.3143898248672485, "logps/rejected": -2.8652384281158447, "loss": 1.3578, "nll_loss": 1.338534951210022, "rewards/accuracies": 1.0, "rewards/chosen": -0.1314389854669571, "rewards/margins": 0.15508489310741425, "rewards/rejected": -0.28652387857437134, "step": 901 }, { "epoch": 1.42080378250591, "grad_norm": 0.2505437433719635, "learning_rate": 9.434266947678324e-07, "log_odds_chosen": 1.6816164255142212, "log_odds_ratio": -0.19430740177631378, "logits/chosen": -0.6904462575912476, "logits/rejected": -1.5032645463943481, "logps/chosen": -1.4055613279342651, "logps/rejected": -2.8726818561553955, "loss": 1.4249, "nll_loss": 1.4054228067398071, "rewards/accuracies": 1.0, "rewards/chosen": -0.14055614173412323, "rewards/margins": 0.1467120200395584, "rewards/rejected": -0.28726816177368164, "step": 902 }, { "epoch": 1.4223798266351457, "grad_norm": 0.2679803669452667, "learning_rate": 9.387632904296872e-07, "log_odds_chosen": 2.116358518600464, "log_odds_ratio": -0.1409800499677658, "logits/chosen": -0.6455526351928711, "logits/rejected": -1.6516026258468628, "logps/chosen": -1.4351006746292114, "logps/rejected": -3.3209948539733887, "loss": 1.4499, "nll_loss": 1.4358484745025635, "rewards/accuracies": 1.0, "rewards/chosen": -0.14351005852222443, "rewards/margins": 0.188589408993721, "rewards/rejected": -0.3320994973182678, "step": 903 }, { "epoch": 1.4239558707643813, "grad_norm": 0.37360966205596924, "learning_rate": 9.341079032279986e-07, "log_odds_chosen": 1.7366825342178345, "log_odds_ratio": -0.1803555190563202, "logits/chosen": -0.589972972869873, "logits/rejected": -1.415221929550171, "logps/chosen": -1.393925428390503, "logps/rejected": -2.9021759033203125, "loss": 1.4264, "nll_loss": 1.408334732055664, "rewards/accuracies": 1.0, "rewards/chosen": -0.13939253985881805, "rewards/margins": 0.15082502365112305, "rewards/rejected": -0.2902175784111023, "step": 904 }, { "epoch": 1.425531914893617, "grad_norm": 0.2487158626317978, "learning_rate": 9.294605683319919e-07, "log_odds_chosen": 1.7900364398956299, "log_odds_ratio": -0.17004141211509705, "logits/chosen": -0.6846928596496582, "logits/rejected": -1.5274299383163452, "logps/chosen": -1.2867729663848877, "logps/rejected": -2.8194665908813477, "loss": 1.318, "nll_loss": 1.3010035753250122, "rewards/accuracies": 1.0, "rewards/chosen": -0.12867729365825653, "rewards/margins": 0.15326935052871704, "rewards/rejected": -0.28194665908813477, "step": 905 }, { "epoch": 1.4271079590228526, "grad_norm": 0.24308228492736816, "learning_rate": 9.248213208500629e-07, "log_odds_chosen": 1.6273454427719116, "log_odds_ratio": -0.19989073276519775, "logits/chosen": -0.7513257265090942, "logits/rejected": -1.5772173404693604, "logps/chosen": -1.467084527015686, "logps/rejected": -2.891197443008423, "loss": 1.4786, "nll_loss": 1.4586395025253296, "rewards/accuracies": 1.0, "rewards/chosen": -0.14670845866203308, "rewards/margins": 0.14241132140159607, "rewards/rejected": -0.28911978006362915, "step": 906 }, { "epoch": 1.4286840031520882, "grad_norm": 0.24337664246559143, "learning_rate": 9.201901958295115e-07, "log_odds_chosen": 1.9387871026992798, "log_odds_ratio": -0.15630395710468292, "logits/chosen": -0.6508644819259644, "logits/rejected": -1.4973303079605103, "logps/chosen": -1.4235007762908936, "logps/rejected": -3.137270450592041, "loss": 1.4616, "nll_loss": 1.4459247589111328, "rewards/accuracies": 1.0, "rewards/chosen": -0.14235009253025055, "rewards/margins": 0.17137697339057922, "rewards/rejected": -0.3137270510196686, "step": 907 }, { "epoch": 1.4302600472813238, "grad_norm": 0.2549786865711212, "learning_rate": 9.155672282562736e-07, "log_odds_chosen": 1.659327745437622, "log_odds_ratio": -0.18966315686702728, "logits/chosen": -0.7559006810188293, "logits/rejected": -1.5569121837615967, "logps/chosen": -1.4450880289077759, "logps/rejected": -2.8973772525787354, "loss": 1.4747, "nll_loss": 1.455707311630249, "rewards/accuracies": 1.0, "rewards/chosen": -0.14450879395008087, "rewards/margins": 0.14522895216941833, "rewards/rejected": -0.289737731218338, "step": 908 }, { "epoch": 1.4318360914105595, "grad_norm": 0.25416597723960876, "learning_rate": 9.109524530546622e-07, "log_odds_chosen": 1.5111150741577148, "log_odds_ratio": -0.20951895415782928, "logits/chosen": -0.7180823683738708, "logits/rejected": -1.4877595901489258, "logps/chosen": -1.4433443546295166, "logps/rejected": -2.7477176189422607, "loss": 1.4756, "nll_loss": 1.4546022415161133, "rewards/accuracies": 1.0, "rewards/chosen": -0.14433442056179047, "rewards/margins": 0.13043732941150665, "rewards/rejected": -0.2747717499732971, "step": 909 }, { "epoch": 1.433412135539795, "grad_norm": 0.29453134536743164, "learning_rate": 9.063459050871001e-07, "log_odds_chosen": 1.5925884246826172, "log_odds_ratio": -0.1985904574394226, "logits/chosen": -0.7714993953704834, "logits/rejected": -1.2546613216400146, "logps/chosen": -1.410155177116394, "logps/rejected": -2.788203001022339, "loss": 1.428, "nll_loss": 1.408141016960144, "rewards/accuracies": 1.0, "rewards/chosen": -0.14101552963256836, "rewards/margins": 0.13780477643013, "rewards/rejected": -0.27882030606269836, "step": 910 }, { "epoch": 1.4349881796690307, "grad_norm": 0.24730490148067474, "learning_rate": 9.017476191538555e-07, "log_odds_chosen": 1.8357172012329102, "log_odds_ratio": -0.17157533764839172, "logits/chosen": -0.7459608912467957, "logits/rejected": -1.3676772117614746, "logps/chosen": -1.3629581928253174, "logps/rejected": -2.9562366008758545, "loss": 1.4015, "nll_loss": 1.38435697555542, "rewards/accuracies": 1.0, "rewards/chosen": -0.13629582524299622, "rewards/margins": 0.15932784974575043, "rewards/rejected": -0.29562368988990784, "step": 911 }, { "epoch": 1.4365642237982663, "grad_norm": 0.22700850665569305, "learning_rate": 8.971576299927832e-07, "log_odds_chosen": 1.86017906665802, "log_odds_ratio": -0.15699787437915802, "logits/chosen": -0.6873756647109985, "logits/rejected": -1.701200246810913, "logps/chosen": -1.449873447418213, "logps/rejected": -3.0903689861297607, "loss": 1.4724, "nll_loss": 1.4567129611968994, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449873447418213, "rewards/margins": 0.16404958069324493, "rewards/rejected": -0.3090369403362274, "step": 912 }, { "epoch": 1.438140267927502, "grad_norm": 0.28582850098609924, "learning_rate": 8.925759722790591e-07, "log_odds_chosen": 1.6427593231201172, "log_odds_ratio": -0.18476685881614685, "logits/chosen": -0.6677998304367065, "logits/rejected": -1.5819510221481323, "logps/chosen": -1.421278953552246, "logps/rejected": -2.847679615020752, "loss": 1.449, "nll_loss": 1.4305495023727417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1421278864145279, "rewards/margins": 0.14264006912708282, "rewards/rejected": -0.2847679853439331, "step": 913 }, { "epoch": 1.4397163120567376, "grad_norm": 0.2541874051094055, "learning_rate": 8.880026806249192e-07, "log_odds_chosen": 1.9597915410995483, "log_odds_ratio": -0.13926208019256592, "logits/chosen": -0.5797353982925415, "logits/rejected": -1.6747076511383057, "logps/chosen": -1.4759434461593628, "logps/rejected": -3.2159194946289062, "loss": 1.4919, "nll_loss": 1.4780035018920898, "rewards/accuracies": 1.0, "rewards/chosen": -0.14759434759616852, "rewards/margins": 0.17399759590625763, "rewards/rejected": -0.32159194350242615, "step": 914 }, { "epoch": 1.4412923561859732, "grad_norm": 0.2716968357563019, "learning_rate": 8.834377895793965e-07, "log_odds_chosen": 1.6315258741378784, "log_odds_ratio": -0.2065974473953247, "logits/chosen": -0.6029942631721497, "logits/rejected": -1.4257211685180664, "logps/chosen": -1.5510449409484863, "logps/rejected": -2.998202323913574, "loss": 1.5693, "nll_loss": 1.5486116409301758, "rewards/accuracies": 1.0, "rewards/chosen": -0.15510451793670654, "rewards/margins": 0.14471575617790222, "rewards/rejected": -0.29982027411460876, "step": 915 }, { "epoch": 1.4428684003152088, "grad_norm": 0.23609581589698792, "learning_rate": 8.78881333628063e-07, "log_odds_chosen": 1.6295254230499268, "log_odds_ratio": -0.1823047250509262, "logits/chosen": -0.674763023853302, "logits/rejected": -1.675118088722229, "logps/chosen": -1.4699325561523438, "logps/rejected": -2.8940014839172363, "loss": 1.4839, "nll_loss": 1.4656920433044434, "rewards/accuracies": 1.0, "rewards/chosen": -0.1469932496547699, "rewards/margins": 0.1424068808555603, "rewards/rejected": -0.2894001603126526, "step": 916 }, { "epoch": 1.4444444444444444, "grad_norm": 0.24852561950683594, "learning_rate": 8.743333471927672e-07, "log_odds_chosen": 1.8598127365112305, "log_odds_ratio": -0.15133577585220337, "logits/chosen": -0.7297662496566772, "logits/rejected": -1.6425448656082153, "logps/chosen": -1.4743287563323975, "logps/rejected": -3.1196818351745605, "loss": 1.4979, "nll_loss": 1.4827524423599243, "rewards/accuracies": 1.0, "rewards/chosen": -0.14743287861347198, "rewards/margins": 0.1645352989435196, "rewards/rejected": -0.31196820735931396, "step": 917 }, { "epoch": 1.44602048857368, "grad_norm": 0.3501012623310089, "learning_rate": 8.697938646313742e-07, "log_odds_chosen": 1.7917332649230957, "log_odds_ratio": -0.1569167822599411, "logits/chosen": -0.733870267868042, "logits/rejected": -1.4836645126342773, "logps/chosen": -1.3811607360839844, "logps/rejected": -2.9354400634765625, "loss": 1.4034, "nll_loss": 1.3876993656158447, "rewards/accuracies": 1.0, "rewards/chosen": -0.13811607658863068, "rewards/margins": 0.1554279625415802, "rewards/rejected": -0.2935440242290497, "step": 918 }, { "epoch": 1.4475965327029157, "grad_norm": 0.29661837220191956, "learning_rate": 8.652629202375075e-07, "log_odds_chosen": 1.5683361291885376, "log_odds_ratio": -0.20053435862064362, "logits/chosen": -0.5060651898384094, "logits/rejected": -1.541445016860962, "logps/chosen": -1.397658348083496, "logps/rejected": -2.7494089603424072, "loss": 1.4367, "nll_loss": 1.4166853427886963, "rewards/accuracies": 1.0, "rewards/chosen": -0.13976582884788513, "rewards/margins": 0.13517507910728455, "rewards/rejected": -0.2749409079551697, "step": 919 }, { "epoch": 1.4491725768321513, "grad_norm": 0.2720679044723511, "learning_rate": 8.607405482402861e-07, "log_odds_chosen": 1.9913804531097412, "log_odds_ratio": -0.1532764434814453, "logits/chosen": -0.7506877779960632, "logits/rejected": -1.6161788702011108, "logps/chosen": -1.2819592952728271, "logps/rejected": -3.0097174644470215, "loss": 1.3169, "nll_loss": 1.3015979528427124, "rewards/accuracies": 1.0, "rewards/chosen": -0.12819592654705048, "rewards/margins": 0.17277583479881287, "rewards/rejected": -0.30097177624702454, "step": 920 }, { "epoch": 1.450748620961387, "grad_norm": 0.22886787354946136, "learning_rate": 8.562267828040712e-07, "log_odds_chosen": 1.8573884963989258, "log_odds_ratio": -0.15454642474651337, "logits/chosen": -0.7215290665626526, "logits/rejected": -1.881230354309082, "logps/chosen": -1.4047327041625977, "logps/rejected": -3.031428337097168, "loss": 1.4239, "nll_loss": 1.408409595489502, "rewards/accuracies": 1.0, "rewards/chosen": -0.14047329127788544, "rewards/margins": 0.1626695692539215, "rewards/rejected": -0.30314287543296814, "step": 921 }, { "epoch": 1.4523246650906225, "grad_norm": 0.2560023069381714, "learning_rate": 8.517216580282048e-07, "log_odds_chosen": 1.6058309078216553, "log_odds_ratio": -0.19469963014125824, "logits/chosen": -0.5811789631843567, "logits/rejected": -1.6048799753189087, "logps/chosen": -1.4780242443084717, "logps/rejected": -2.8814961910247803, "loss": 1.5187, "nll_loss": 1.4992363452911377, "rewards/accuracies": 1.0, "rewards/chosen": -0.1478024423122406, "rewards/margins": 0.1403472125530243, "rewards/rejected": -0.2881496250629425, "step": 922 }, { "epoch": 1.4539007092198581, "grad_norm": 0.2533600330352783, "learning_rate": 8.472252079467535e-07, "log_odds_chosen": 1.6077029705047607, "log_odds_ratio": -0.18846000730991364, "logits/chosen": -0.6689071655273438, "logits/rejected": -1.556738257408142, "logps/chosen": -1.4537690877914429, "logps/rejected": -2.8543403148651123, "loss": 1.4782, "nll_loss": 1.4593662023544312, "rewards/accuracies": 1.0, "rewards/chosen": -0.14537690579891205, "rewards/margins": 0.14005713164806366, "rewards/rejected": -0.2854340374469757, "step": 923 }, { "epoch": 1.4554767533490938, "grad_norm": 0.32297271490097046, "learning_rate": 8.427374665282488e-07, "log_odds_chosen": 1.466186761856079, "log_odds_ratio": -0.21466611325740814, "logits/chosen": -0.5589500069618225, "logits/rejected": -1.2779663801193237, "logps/chosen": -1.3692665100097656, "logps/rejected": -2.6176676750183105, "loss": 1.4064, "nll_loss": 1.3848856687545776, "rewards/accuracies": 1.0, "rewards/chosen": -0.13692665100097656, "rewards/margins": 0.1248401328921318, "rewards/rejected": -0.2617667615413666, "step": 924 }, { "epoch": 1.4570527974783294, "grad_norm": 0.4079399108886719, "learning_rate": 8.382584676754336e-07, "log_odds_chosen": 1.702414870262146, "log_odds_ratio": -0.1787688285112381, "logits/chosen": -0.7131695747375488, "logits/rejected": -1.474623203277588, "logps/chosen": -1.4565544128417969, "logps/rejected": -2.950603723526001, "loss": 1.4802, "nll_loss": 1.4623353481292725, "rewards/accuracies": 1.0, "rewards/chosen": -0.14565543830394745, "rewards/margins": 0.14940495789051056, "rewards/rejected": -0.295060396194458, "step": 925 }, { "epoch": 1.458628841607565, "grad_norm": 0.2525225877761841, "learning_rate": 8.337882452250058e-07, "log_odds_chosen": 2.172259569168091, "log_odds_ratio": -0.13398639857769012, "logits/chosen": -0.7223413586616516, "logits/rejected": -1.7857404947280884, "logps/chosen": -1.4725472927093506, "logps/rejected": -3.4278881549835205, "loss": 1.4867, "nll_loss": 1.4733326435089111, "rewards/accuracies": 1.0, "rewards/chosen": -0.14725472033023834, "rewards/margins": 0.1955341100692749, "rewards/rejected": -0.34278884530067444, "step": 926 }, { "epoch": 1.4602048857368006, "grad_norm": 0.2317046821117401, "learning_rate": 8.293268329473602e-07, "log_odds_chosen": 2.142016887664795, "log_odds_ratio": -0.12687087059020996, "logits/chosen": -0.7637448310852051, "logits/rejected": -1.6723371744155884, "logps/chosen": -1.4088551998138428, "logps/rejected": -3.313737392425537, "loss": 1.4323, "nll_loss": 1.419594407081604, "rewards/accuracies": 1.0, "rewards/chosen": -0.14088551700115204, "rewards/margins": 0.19048823416233063, "rewards/rejected": -0.33137375116348267, "step": 927 }, { "epoch": 1.4617809298660362, "grad_norm": 0.286592036485672, "learning_rate": 8.248742645463367e-07, "log_odds_chosen": 1.9152957201004028, "log_odds_ratio": -0.15829113125801086, "logits/chosen": -0.6550620794296265, "logits/rejected": -1.551281452178955, "logps/chosen": -1.4756174087524414, "logps/rejected": -3.178788900375366, "loss": 1.4828, "nll_loss": 1.4670010805130005, "rewards/accuracies": 1.0, "rewards/chosen": -0.14756174385547638, "rewards/margins": 0.1703171581029892, "rewards/rejected": -0.3178789019584656, "step": 928 }, { "epoch": 1.4633569739952719, "grad_norm": 0.23163653910160065, "learning_rate": 8.204305736589612e-07, "log_odds_chosen": 1.832330584526062, "log_odds_ratio": -0.16355212032794952, "logits/chosen": -0.6462885141372681, "logits/rejected": -1.4336743354797363, "logps/chosen": -1.4303230047225952, "logps/rejected": -3.0414857864379883, "loss": 1.4481, "nll_loss": 1.4317905902862549, "rewards/accuracies": 1.0, "rewards/chosen": -0.14303229749202728, "rewards/margins": 0.16111627221107483, "rewards/rejected": -0.3041485846042633, "step": 929 }, { "epoch": 1.4649330181245075, "grad_norm": 0.23074650764465332, "learning_rate": 8.159957938551966e-07, "log_odds_chosen": 1.7751812934875488, "log_odds_ratio": -0.17015007138252258, "logits/chosen": -0.6098726391792297, "logits/rejected": -1.5128607749938965, "logps/chosen": -1.4367517232894897, "logps/rejected": -2.985832929611206, "loss": 1.464, "nll_loss": 1.4469714164733887, "rewards/accuracies": 1.0, "rewards/chosen": -0.14367519319057465, "rewards/margins": 0.15490810573101044, "rewards/rejected": -0.2985832691192627, "step": 930 }, { "epoch": 1.4665090622537431, "grad_norm": 0.23624230921268463, "learning_rate": 8.115699586376865e-07, "log_odds_chosen": 1.5871126651763916, "log_odds_ratio": -0.1996283382177353, "logits/chosen": -0.7303210496902466, "logits/rejected": -1.4325451850891113, "logps/chosen": -1.480905532836914, "logps/rejected": -2.8737592697143555, "loss": 1.5001, "nll_loss": 1.480181336402893, "rewards/accuracies": 1.0, "rewards/chosen": -0.14809054136276245, "rewards/margins": 0.13928541541099548, "rewards/rejected": -0.28737592697143555, "step": 931 }, { "epoch": 1.4680851063829787, "grad_norm": 0.25147050619125366, "learning_rate": 8.071531014415018e-07, "log_odds_chosen": 2.015183448791504, "log_odds_ratio": -0.14438295364379883, "logits/chosen": -0.683853805065155, "logits/rejected": -1.5350584983825684, "logps/chosen": -1.4378182888031006, "logps/rejected": -3.22896146774292, "loss": 1.4412, "nll_loss": 1.4267596006393433, "rewards/accuracies": 1.0, "rewards/chosen": -0.14378182590007782, "rewards/margins": 0.17911432683467865, "rewards/rejected": -0.32289615273475647, "step": 932 }, { "epoch": 1.4696611505122144, "grad_norm": 0.21603430807590485, "learning_rate": 8.027452556338894e-07, "log_odds_chosen": 1.8661415576934814, "log_odds_ratio": -0.14683236181735992, "logits/chosen": -0.6550284624099731, "logits/rejected": -1.7531710863113403, "logps/chosen": -1.4416742324829102, "logps/rejected": -3.0784685611724854, "loss": 1.4616, "nll_loss": 1.4468977451324463, "rewards/accuracies": 1.0, "rewards/chosen": -0.14416742324829102, "rewards/margins": 0.16367945075035095, "rewards/rejected": -0.30784687399864197, "step": 933 }, { "epoch": 1.47123719464145, "grad_norm": 0.26335662603378296, "learning_rate": 7.983464545140179e-07, "log_odds_chosen": 1.9164276123046875, "log_odds_ratio": -0.1412404477596283, "logits/chosen": -0.7396938800811768, "logits/rejected": -1.5333837270736694, "logps/chosen": -1.3823633193969727, "logps/rejected": -3.0580759048461914, "loss": 1.3968, "nll_loss": 1.3826264142990112, "rewards/accuracies": 1.0, "rewards/chosen": -0.13823631405830383, "rewards/margins": 0.1675712913274765, "rewards/rejected": -0.30580762028694153, "step": 934 }, { "epoch": 1.4728132387706856, "grad_norm": 0.25700458884239197, "learning_rate": 7.939567313127295e-07, "log_odds_chosen": 1.8454346656799316, "log_odds_ratio": -0.15232446789741516, "logits/chosen": -0.6836897730827332, "logits/rejected": -1.4595261812210083, "logps/chosen": -1.446830153465271, "logps/rejected": -3.071077346801758, "loss": 1.4705, "nll_loss": 1.4552760124206543, "rewards/accuracies": 1.0, "rewards/chosen": -0.14468303322792053, "rewards/margins": 0.1624247133731842, "rewards/rejected": -0.30710774660110474, "step": 935 }, { "epoch": 1.4743892828999212, "grad_norm": 0.24883194267749786, "learning_rate": 7.89576119192286e-07, "log_odds_chosen": 2.0168988704681396, "log_odds_ratio": -0.13523153960704803, "logits/chosen": -0.6411904692649841, "logits/rejected": -1.6028637886047363, "logps/chosen": -1.3860918283462524, "logps/rejected": -3.1596243381500244, "loss": 1.3974, "nll_loss": 1.3838660717010498, "rewards/accuracies": 1.0, "rewards/chosen": -0.13860918581485748, "rewards/margins": 0.17735326290130615, "rewards/rejected": -0.31596243381500244, "step": 936 }, { "epoch": 1.4759653270291568, "grad_norm": 0.23368825018405914, "learning_rate": 7.852046512461201e-07, "log_odds_chosen": 1.8750478029251099, "log_odds_ratio": -0.1503620147705078, "logits/chosen": -0.6881729960441589, "logits/rejected": -1.6647746562957764, "logps/chosen": -1.4039406776428223, "logps/rejected": -3.042243003845215, "loss": 1.4253, "nll_loss": 1.410217046737671, "rewards/accuracies": 1.0, "rewards/chosen": -0.14039407670497894, "rewards/margins": 0.1638302356004715, "rewards/rejected": -0.30422431230545044, "step": 937 }, { "epoch": 1.4775413711583925, "grad_norm": 0.23154808580875397, "learning_rate": 7.808423604985843e-07, "log_odds_chosen": 1.8925951719284058, "log_odds_ratio": -0.15831367671489716, "logits/chosen": -0.6020694375038147, "logits/rejected": -1.4571864604949951, "logps/chosen": -1.4015604257583618, "logps/rejected": -3.0591988563537598, "loss": 1.4503, "nll_loss": 1.4344905614852905, "rewards/accuracies": 1.0, "rewards/chosen": -0.14015603065490723, "rewards/margins": 0.16576388478279114, "rewards/rejected": -0.30591991543769836, "step": 938 }, { "epoch": 1.479117415287628, "grad_norm": 0.2233363389968872, "learning_rate": 7.764892799047005e-07, "log_odds_chosen": 1.9696848392486572, "log_odds_ratio": -0.1418209969997406, "logits/chosen": -0.6627472043037415, "logits/rejected": -1.6807780265808105, "logps/chosen": -1.4396893978118896, "logps/rejected": -3.1809778213500977, "loss": 1.4603, "nll_loss": 1.4460759162902832, "rewards/accuracies": 1.0, "rewards/chosen": -0.14396893978118896, "rewards/margins": 0.17412887513637543, "rewards/rejected": -0.3180978000164032, "step": 939 }, { "epoch": 1.4806934594168637, "grad_norm": 0.24284207820892334, "learning_rate": 7.721454423499143e-07, "log_odds_chosen": 2.046962261199951, "log_odds_ratio": -0.13433170318603516, "logits/chosen": -0.7078008055686951, "logits/rejected": -1.7467715740203857, "logps/chosen": -1.4217815399169922, "logps/rejected": -3.2299644947052, "loss": 1.4417, "nll_loss": 1.428259015083313, "rewards/accuracies": 1.0, "rewards/chosen": -0.14217817783355713, "rewards/margins": 0.18081827461719513, "rewards/rejected": -0.32299643754959106, "step": 940 }, { "epoch": 1.4822695035460993, "grad_norm": 0.2638827860355377, "learning_rate": 7.678108806498441e-07, "log_odds_chosen": 2.0251903533935547, "log_odds_ratio": -0.14184138178825378, "logits/chosen": -0.636859655380249, "logits/rejected": -1.5217381715774536, "logps/chosen": -1.3980457782745361, "logps/rejected": -3.1872143745422363, "loss": 1.4285, "nll_loss": 1.414327621459961, "rewards/accuracies": 1.0, "rewards/chosen": -0.13980457186698914, "rewards/margins": 0.17891687154769897, "rewards/rejected": -0.3187214434146881, "step": 941 }, { "epoch": 1.483845547675335, "grad_norm": 0.270585298538208, "learning_rate": 7.634856275500315e-07, "log_odds_chosen": 1.4698821306228638, "log_odds_ratio": -0.22061733901500702, "logits/chosen": -0.650209367275238, "logits/rejected": -1.3704414367675781, "logps/chosen": -1.5313105583190918, "logps/rejected": -2.823129892349243, "loss": 1.554, "nll_loss": 1.5318888425827026, "rewards/accuracies": 1.0, "rewards/chosen": -0.15313105285167694, "rewards/margins": 0.12918195128440857, "rewards/rejected": -0.2823129892349243, "step": 942 }, { "epoch": 1.4854215918045706, "grad_norm": 0.21558013558387756, "learning_rate": 7.591697157256991e-07, "log_odds_chosen": 1.9673588275909424, "log_odds_ratio": -0.14971627295017242, "logits/chosen": -0.6312054991722107, "logits/rejected": -1.532444715499878, "logps/chosen": -1.3634440898895264, "logps/rejected": -3.0835490226745605, "loss": 1.3868, "nll_loss": 1.3718693256378174, "rewards/accuracies": 1.0, "rewards/chosen": -0.13634440302848816, "rewards/margins": 0.17201051115989685, "rewards/rejected": -0.308354914188385, "step": 943 }, { "epoch": 1.4869976359338062, "grad_norm": 0.304609090089798, "learning_rate": 7.548631777814995e-07, "log_odds_chosen": 2.017549753189087, "log_odds_ratio": -0.14845839142799377, "logits/chosen": -0.652974009513855, "logits/rejected": -1.509749174118042, "logps/chosen": -1.4153010845184326, "logps/rejected": -3.203185558319092, "loss": 1.4445, "nll_loss": 1.4296934604644775, "rewards/accuracies": 1.0, "rewards/chosen": -0.1415301263332367, "rewards/margins": 0.1787884533405304, "rewards/rejected": -0.3203185796737671, "step": 944 }, { "epoch": 1.4885736800630418, "grad_norm": 0.24522024393081665, "learning_rate": 7.505660462512689e-07, "log_odds_chosen": 1.6701476573944092, "log_odds_ratio": -0.18431419134140015, "logits/chosen": -0.7164368629455566, "logits/rejected": -1.4171056747436523, "logps/chosen": -1.33930504322052, "logps/rejected": -2.765798330307007, "loss": 1.3831, "nll_loss": 1.3646361827850342, "rewards/accuracies": 1.0, "rewards/chosen": -0.133930504322052, "rewards/margins": 0.1426493227481842, "rewards/rejected": -0.2765798568725586, "step": 945 }, { "epoch": 1.4901497241922774, "grad_norm": 0.2594417333602905, "learning_rate": 7.462783535977842e-07, "log_odds_chosen": 1.9742743968963623, "log_odds_ratio": -0.13439792394638062, "logits/chosen": -0.691044807434082, "logits/rejected": -1.5234267711639404, "logps/chosen": -1.3967187404632568, "logps/rejected": -3.130528211593628, "loss": 1.4195, "nll_loss": 1.4061055183410645, "rewards/accuracies": 1.0, "rewards/chosen": -0.13967187702655792, "rewards/margins": 0.17338094115257263, "rewards/rejected": -0.31305280327796936, "step": 946 }, { "epoch": 1.491725768321513, "grad_norm": 0.2217145413160324, "learning_rate": 7.420001322125156e-07, "log_odds_chosen": 1.2451107501983643, "log_odds_ratio": -0.2715088427066803, "logits/chosen": -0.7807783484458923, "logits/rejected": -1.3123338222503662, "logps/chosen": -1.3806712627410889, "logps/rejected": -2.438361644744873, "loss": 1.414, "nll_loss": 1.3868746757507324, "rewards/accuracies": 1.0, "rewards/chosen": -0.1380671262741089, "rewards/margins": 0.10576906055212021, "rewards/rejected": -0.2438361942768097, "step": 947 }, { "epoch": 1.4933018124507487, "grad_norm": 0.23894211649894714, "learning_rate": 7.377314144153814e-07, "log_odds_chosen": 1.5071243047714233, "log_odds_ratio": -0.20632603764533997, "logits/chosen": -0.7174784541130066, "logits/rejected": -1.4949740171432495, "logps/chosen": -1.4536449909210205, "logps/rejected": -2.7604901790618896, "loss": 1.4843, "nll_loss": 1.4636691808700562, "rewards/accuracies": 1.0, "rewards/chosen": -0.14536450803279877, "rewards/margins": 0.130684494972229, "rewards/rejected": -0.27604901790618896, "step": 948 }, { "epoch": 1.4948778565799843, "grad_norm": 0.23438452184200287, "learning_rate": 7.334722324545064e-07, "log_odds_chosen": 1.9249755144119263, "log_odds_ratio": -0.14309164881706238, "logits/chosen": -0.6152241230010986, "logits/rejected": -1.7680805921554565, "logps/chosen": -1.4081079959869385, "logps/rejected": -3.0983264446258545, "loss": 1.4138, "nll_loss": 1.3994516134262085, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408107876777649, "rewards/margins": 0.169021874666214, "rewards/rejected": -0.30983269214630127, "step": 949 }, { "epoch": 1.49645390070922, "grad_norm": 0.24368099868297577, "learning_rate": 7.292226185059756e-07, "log_odds_chosen": 1.7671029567718506, "log_odds_ratio": -0.17132027447223663, "logits/chosen": -0.6248744130134583, "logits/rejected": -1.4099971055984497, "logps/chosen": -1.4691294431686401, "logps/rejected": -3.029083251953125, "loss": 1.5011, "nll_loss": 1.483997106552124, "rewards/accuracies": 1.0, "rewards/chosen": -0.14691296219825745, "rewards/margins": 0.15599539875984192, "rewards/rejected": -0.30290836095809937, "step": 950 }, { "epoch": 1.4980299448384555, "grad_norm": 0.21915243566036224, "learning_rate": 7.249826046735927e-07, "log_odds_chosen": 1.8139275312423706, "log_odds_ratio": -0.1609688103199005, "logits/chosen": -0.6943358778953552, "logits/rejected": -1.5638314485549927, "logps/chosen": -1.4063531160354614, "logps/rejected": -2.99365496635437, "loss": 1.4284, "nll_loss": 1.412264347076416, "rewards/accuracies": 1.0, "rewards/chosen": -0.14063531160354614, "rewards/margins": 0.1587301790714264, "rewards/rejected": -0.2993655204772949, "step": 951 }, { "epoch": 1.4996059889676912, "grad_norm": 0.2773244082927704, "learning_rate": 7.207522229886379e-07, "log_odds_chosen": 1.9699525833129883, "log_odds_ratio": -0.14488418400287628, "logits/chosen": -0.713561475276947, "logits/rejected": -1.392665982246399, "logps/chosen": -1.332568645477295, "logps/rejected": -3.0436148643493652, "loss": 1.362, "nll_loss": 1.3474806547164917, "rewards/accuracies": 1.0, "rewards/chosen": -0.13325685262680054, "rewards/margins": 0.17110465466976166, "rewards/rejected": -0.3043615221977234, "step": 952 }, { "epoch": 1.5011820330969265, "grad_norm": 0.2424214631319046, "learning_rate": 7.165315054096228e-07, "log_odds_chosen": 1.8157105445861816, "log_odds_ratio": -0.17059148848056793, "logits/chosen": -0.6951842904090881, "logits/rejected": -1.5444356203079224, "logps/chosen": -1.4868419170379639, "logps/rejected": -3.096999168395996, "loss": 1.4947, "nll_loss": 1.4776148796081543, "rewards/accuracies": 1.0, "rewards/chosen": -0.14868420362472534, "rewards/margins": 0.16101573407649994, "rewards/rejected": -0.3096999228000641, "step": 953 }, { "epoch": 1.5027580772261624, "grad_norm": 0.21515390276908875, "learning_rate": 7.123204838220533e-07, "log_odds_chosen": 2.155853033065796, "log_odds_ratio": -0.13583272695541382, "logits/chosen": -0.7590830326080322, "logits/rejected": -1.6461933851242065, "logps/chosen": -1.365823745727539, "logps/rejected": -3.2752747535705566, "loss": 1.3884, "nll_loss": 1.3747961521148682, "rewards/accuracies": 1.0, "rewards/chosen": -0.1365823745727539, "rewards/margins": 0.1909450888633728, "rewards/rejected": -0.3275274634361267, "step": 954 }, { "epoch": 1.5043341213553978, "grad_norm": 0.2391800582408905, "learning_rate": 7.081191900381862e-07, "log_odds_chosen": 1.9468826055526733, "log_odds_ratio": -0.1482704132795334, "logits/chosen": -0.6943378448486328, "logits/rejected": -1.5789308547973633, "logps/chosen": -1.4783952236175537, "logps/rejected": -3.210561990737915, "loss": 1.5009, "nll_loss": 1.4860384464263916, "rewards/accuracies": 1.0, "rewards/chosen": -0.1478395164012909, "rewards/margins": 0.17321667075157166, "rewards/rejected": -0.32105618715286255, "step": 955 }, { "epoch": 1.5059101654846336, "grad_norm": 0.22466237843036652, "learning_rate": 7.039276557967895e-07, "log_odds_chosen": 2.2401113510131836, "log_odds_ratio": -0.11504784226417542, "logits/chosen": -0.6450394988059998, "logits/rejected": -1.8218344449996948, "logps/chosen": -1.361754298210144, "logps/rejected": -3.338789701461792, "loss": 1.3929, "nll_loss": 1.3813717365264893, "rewards/accuracies": 1.0, "rewards/chosen": -0.13617542386054993, "rewards/margins": 0.1977035403251648, "rewards/rejected": -0.3338789641857147, "step": 956 }, { "epoch": 1.507486209613869, "grad_norm": 0.22904148697853088, "learning_rate": 6.997459127629028e-07, "log_odds_chosen": 1.5114691257476807, "log_odds_ratio": -0.21334531903266907, "logits/chosen": -0.6755697727203369, "logits/rejected": -1.4697928428649902, "logps/chosen": -1.4383251667022705, "logps/rejected": -2.7475852966308594, "loss": 1.4786, "nll_loss": 1.457273244857788, "rewards/accuracies": 1.0, "rewards/chosen": -0.14383253455162048, "rewards/margins": 0.13092602789402008, "rewards/rejected": -0.27475854754447937, "step": 957 }, { "epoch": 1.5090622537431049, "grad_norm": 0.23240822553634644, "learning_rate": 6.955739925275963e-07, "log_odds_chosen": 2.2500858306884766, "log_odds_ratio": -0.11607085168361664, "logits/chosen": -0.7263058423995972, "logits/rejected": -1.6341439485549927, "logps/chosen": -1.357983946800232, "logps/rejected": -3.345592737197876, "loss": 1.3866, "nll_loss": 1.3749525547027588, "rewards/accuracies": 1.0, "rewards/chosen": -0.1357983946800232, "rewards/margins": 0.19876088201999664, "rewards/rejected": -0.33455926179885864, "step": 958 }, { "epoch": 1.5106382978723403, "grad_norm": 0.2334897220134735, "learning_rate": 6.914119266077354e-07, "log_odds_chosen": 2.0655581951141357, "log_odds_ratio": -0.12655213475227356, "logits/chosen": -0.5946290493011475, "logits/rejected": -1.7702455520629883, "logps/chosen": -1.485993504524231, "logps/rejected": -3.332597017288208, "loss": 1.5112, "nll_loss": 1.498542070388794, "rewards/accuracies": 1.0, "rewards/chosen": -0.14859935641288757, "rewards/margins": 0.18466034531593323, "rewards/rejected": -0.3332597017288208, "step": 959 }, { "epoch": 1.5122143420015761, "grad_norm": 0.3587821125984192, "learning_rate": 6.872597464457397e-07, "log_odds_chosen": 1.7171913385391235, "log_odds_ratio": -0.18248029053211212, "logits/chosen": -0.6445052027702332, "logits/rejected": -1.4844458103179932, "logps/chosen": -1.3720635175704956, "logps/rejected": -2.8570876121520996, "loss": 1.3911, "nll_loss": 1.3728132247924805, "rewards/accuracies": 1.0, "rewards/chosen": -0.13720636069774628, "rewards/margins": 0.1485024243593216, "rewards/rejected": -0.28570878505706787, "step": 960 }, { "epoch": 1.5137903861308115, "grad_norm": 0.4160895347595215, "learning_rate": 6.831174834093476e-07, "log_odds_chosen": 1.7886178493499756, "log_odds_ratio": -0.16238519549369812, "logits/chosen": -0.6922329068183899, "logits/rejected": -1.4965581893920898, "logps/chosen": -1.3912993669509888, "logps/rejected": -2.948406934738159, "loss": 1.4105, "nll_loss": 1.3942997455596924, "rewards/accuracies": 1.0, "rewards/chosen": -0.13912993669509888, "rewards/margins": 0.15571075677871704, "rewards/rejected": -0.2948406934738159, "step": 961 }, { "epoch": 1.5153664302600474, "grad_norm": 0.23221762478351593, "learning_rate": 6.789851687913784e-07, "log_odds_chosen": 1.5177117586135864, "log_odds_ratio": -0.22095687687397003, "logits/chosen": -0.7221294045448303, "logits/rejected": -1.587545394897461, "logps/chosen": -1.3432116508483887, "logps/rejected": -2.6421313285827637, "loss": 1.3738, "nll_loss": 1.3516777753829956, "rewards/accuracies": 1.0, "rewards/chosen": -0.1343211829662323, "rewards/margins": 0.12989196181297302, "rewards/rejected": -0.26421311497688293, "step": 962 }, { "epoch": 1.5169424743892828, "grad_norm": 0.22573482990264893, "learning_rate": 6.748628338094937e-07, "log_odds_chosen": 1.9619501829147339, "log_odds_ratio": -0.1419064700603485, "logits/chosen": -0.6415755152702332, "logits/rejected": -1.533616304397583, "logps/chosen": -1.3573863506317139, "logps/rejected": -3.067230463027954, "loss": 1.3843, "nll_loss": 1.3701223134994507, "rewards/accuracies": 1.0, "rewards/chosen": -0.13573864102363586, "rewards/margins": 0.1709844172000885, "rewards/rejected": -0.30672305822372437, "step": 963 }, { "epoch": 1.5185185185185186, "grad_norm": 0.2693047523498535, "learning_rate": 6.707505096059662e-07, "log_odds_chosen": 1.9928638935089111, "log_odds_ratio": -0.14110992848873138, "logits/chosen": -0.6593167781829834, "logits/rejected": -1.5224437713623047, "logps/chosen": -1.368179440498352, "logps/rejected": -3.118009090423584, "loss": 1.4031, "nll_loss": 1.3889775276184082, "rewards/accuracies": 1.0, "rewards/chosen": -0.13681794703006744, "rewards/margins": 0.1749829649925232, "rewards/rejected": -0.31180089712142944, "step": 964 }, { "epoch": 1.520094562647754, "grad_norm": 0.23469862341880798, "learning_rate": 6.666482272474412e-07, "log_odds_chosen": 2.061997413635254, "log_odds_ratio": -0.12642227113246918, "logits/chosen": -0.6676424145698547, "logits/rejected": -1.6210864782333374, "logps/chosen": -1.4189386367797852, "logps/rejected": -3.244745969772339, "loss": 1.4398, "nll_loss": 1.427161455154419, "rewards/accuracies": 1.0, "rewards/chosen": -0.1418938785791397, "rewards/margins": 0.18258069455623627, "rewards/rejected": -0.324474573135376, "step": 965 }, { "epoch": 1.5216706067769898, "grad_norm": 0.23726844787597656, "learning_rate": 6.625560177247023e-07, "log_odds_chosen": 1.8917025327682495, "log_odds_ratio": -0.1467244178056717, "logits/chosen": -0.6768726110458374, "logits/rejected": -1.567640781402588, "logps/chosen": -1.5120936632156372, "logps/rejected": -3.1917314529418945, "loss": 1.526, "nll_loss": 1.511344075202942, "rewards/accuracies": 1.0, "rewards/chosen": -0.15120935440063477, "rewards/margins": 0.16796378791332245, "rewards/rejected": -0.319173127412796, "step": 966 }, { "epoch": 1.5232466509062252, "grad_norm": 0.2998630106449127, "learning_rate": 6.584739119524383e-07, "log_odds_chosen": 2.130988836288452, "log_odds_ratio": -0.13314604759216309, "logits/chosen": -0.6467706561088562, "logits/rejected": -1.6562049388885498, "logps/chosen": -1.3789918422698975, "logps/rejected": -3.2636961936950684, "loss": 1.387, "nll_loss": 1.373658537864685, "rewards/accuracies": 1.0, "rewards/chosen": -0.13789919018745422, "rewards/margins": 0.18847045302391052, "rewards/rejected": -0.32636961340904236, "step": 967 }, { "epoch": 1.524822695035461, "grad_norm": 0.2360096573829651, "learning_rate": 6.544019407690077e-07, "log_odds_chosen": 1.5989983081817627, "log_odds_ratio": -0.20044191181659698, "logits/chosen": -0.6707049608230591, "logits/rejected": -1.4034690856933594, "logps/chosen": -1.4732283353805542, "logps/rejected": -2.875408172607422, "loss": 1.4888, "nll_loss": 1.4687124490737915, "rewards/accuracies": 1.0, "rewards/chosen": -0.1473228484392166, "rewards/margins": 0.14021798968315125, "rewards/rejected": -0.28754085302352905, "step": 968 }, { "epoch": 1.5263987391646965, "grad_norm": 0.23426444828510284, "learning_rate": 6.503401349362083e-07, "log_odds_chosen": 1.7697782516479492, "log_odds_ratio": -0.16758820414543152, "logits/chosen": -0.7855375409126282, "logits/rejected": -1.5976003408432007, "logps/chosen": -1.5018411874771118, "logps/rejected": -3.0670080184936523, "loss": 1.5198, "nll_loss": 1.5030412673950195, "rewards/accuracies": 1.0, "rewards/chosen": -0.15018412470817566, "rewards/margins": 0.1565166711807251, "rewards/rejected": -0.30670079588890076, "step": 969 }, { "epoch": 1.5279747832939323, "grad_norm": 0.2340129315853119, "learning_rate": 6.462885251390433e-07, "log_odds_chosen": 1.9121594429016113, "log_odds_ratio": -0.14837515354156494, "logits/chosen": -0.729539692401886, "logits/rejected": -1.4843120574951172, "logps/chosen": -1.4921919107437134, "logps/rejected": -3.192976236343384, "loss": 1.4954, "nll_loss": 1.4805225133895874, "rewards/accuracies": 1.0, "rewards/chosen": -0.14921918511390686, "rewards/margins": 0.17007845640182495, "rewards/rejected": -0.3192976415157318, "step": 970 }, { "epoch": 1.5295508274231677, "grad_norm": 0.21162472665309906, "learning_rate": 6.422471419854898e-07, "log_odds_chosen": 1.9907008409500122, "log_odds_ratio": -0.13672001659870148, "logits/chosen": -0.6383552551269531, "logits/rejected": -1.5342824459075928, "logps/chosen": -1.3227609395980835, "logps/rejected": -3.050245761871338, "loss": 1.3496, "nll_loss": 1.3359603881835938, "rewards/accuracies": 1.0, "rewards/chosen": -0.13227610290050507, "rewards/margins": 0.17274849116802216, "rewards/rejected": -0.3050246238708496, "step": 971 }, { "epoch": 1.5311268715524036, "grad_norm": 0.2712085247039795, "learning_rate": 6.382160160062662e-07, "log_odds_chosen": 2.169814348220825, "log_odds_ratio": -0.13167349994182587, "logits/chosen": -0.6404174566268921, "logits/rejected": -1.5612825155258179, "logps/chosen": -1.4152562618255615, "logps/rejected": -3.348329782485962, "loss": 1.4393, "nll_loss": 1.4260960817337036, "rewards/accuracies": 1.0, "rewards/chosen": -0.14152562618255615, "rewards/margins": 0.19330734014511108, "rewards/rejected": -0.33483296632766724, "step": 972 }, { "epoch": 1.532702915681639, "grad_norm": 0.2747749090194702, "learning_rate": 6.341951776546044e-07, "log_odds_chosen": 1.8643766641616821, "log_odds_ratio": -0.15171566605567932, "logits/chosen": -0.6093170046806335, "logits/rejected": -1.5533154010772705, "logps/chosen": -1.4239542484283447, "logps/rejected": -3.06095552444458, "loss": 1.4426, "nll_loss": 1.4274111986160278, "rewards/accuracies": 1.0, "rewards/chosen": -0.14239542186260223, "rewards/margins": 0.163700133562088, "rewards/rejected": -0.30609557032585144, "step": 973 }, { "epoch": 1.5342789598108748, "grad_norm": 0.2471790760755539, "learning_rate": 6.301846573060177e-07, "log_odds_chosen": 1.6332510709762573, "log_odds_ratio": -0.18624919652938843, "logits/chosen": -0.7112443447113037, "logits/rejected": -1.3776127099990845, "logps/chosen": -1.3730487823486328, "logps/rejected": -2.780224561691284, "loss": 1.4074, "nll_loss": 1.3887758255004883, "rewards/accuracies": 1.0, "rewards/chosen": -0.1373048722743988, "rewards/margins": 0.14071762561798096, "rewards/rejected": -0.2780224680900574, "step": 974 }, { "epoch": 1.5358550039401102, "grad_norm": 0.235744908452034, "learning_rate": 6.261844852580722e-07, "log_odds_chosen": 1.8206223249435425, "log_odds_ratio": -0.17329441010951996, "logits/chosen": -0.7152392268180847, "logits/rejected": -1.278751015663147, "logps/chosen": -1.3252991437911987, "logps/rejected": -2.9039957523345947, "loss": 1.3583, "nll_loss": 1.3409415483474731, "rewards/accuracies": 1.0, "rewards/chosen": -0.13252991437911987, "rewards/margins": 0.15786969661712646, "rewards/rejected": -0.29039961099624634, "step": 975 }, { "epoch": 1.537431048069346, "grad_norm": 0.2193654179573059, "learning_rate": 6.221946917301561e-07, "log_odds_chosen": 1.9541443586349487, "log_odds_ratio": -0.14933165907859802, "logits/chosen": -0.6110002994537354, "logits/rejected": -1.5597983598709106, "logps/chosen": -1.4144717454910278, "logps/rejected": -3.135641098022461, "loss": 1.4392, "nll_loss": 1.4242618083953857, "rewards/accuracies": 1.0, "rewards/chosen": -0.14144718647003174, "rewards/margins": 0.1721169352531433, "rewards/rejected": -0.31356412172317505, "step": 976 }, { "epoch": 1.5390070921985815, "grad_norm": 0.23790855705738068, "learning_rate": 6.182153068632545e-07, "log_odds_chosen": 1.9404877424240112, "log_odds_ratio": -0.15256814658641815, "logits/chosen": -0.6408179998397827, "logits/rejected": -1.6562281847000122, "logps/chosen": -1.4317872524261475, "logps/rejected": -3.1495001316070557, "loss": 1.467, "nll_loss": 1.4517197608947754, "rewards/accuracies": 1.0, "rewards/chosen": -0.14317873120307922, "rewards/margins": 0.1717713326215744, "rewards/rejected": -0.31495004892349243, "step": 977 }, { "epoch": 1.5405831363278173, "grad_norm": 0.26740771532058716, "learning_rate": 6.142463607197197e-07, "log_odds_chosen": 1.9451872110366821, "log_odds_ratio": -0.15472793579101562, "logits/chosen": -0.708315908908844, "logits/rejected": -1.6773669719696045, "logps/chosen": -1.4440866708755493, "logps/rejected": -3.1698157787323, "loss": 1.4514, "nll_loss": 1.4359383583068848, "rewards/accuracies": 1.0, "rewards/chosen": -0.1444086730480194, "rewards/margins": 0.17257292568683624, "rewards/rejected": -0.31698161363601685, "step": 978 }, { "epoch": 1.5421591804570527, "grad_norm": 0.22817708551883698, "learning_rate": 6.102878832830431e-07, "log_odds_chosen": 1.8373013734817505, "log_odds_ratio": -0.15536919236183167, "logits/chosen": -0.7249546051025391, "logits/rejected": -1.6927638053894043, "logps/chosen": -1.4438010454177856, "logps/rejected": -3.0623109340667725, "loss": 1.4726, "nll_loss": 1.4570345878601074, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443801075220108, "rewards/margins": 0.16185101866722107, "rewards/rejected": -0.30623114109039307, "step": 979 }, { "epoch": 1.5437352245862885, "grad_norm": 0.22524091601371765, "learning_rate": 6.063399044576316e-07, "log_odds_chosen": 1.8837974071502686, "log_odds_ratio": -0.15109311044216156, "logits/chosen": -0.6819709539413452, "logits/rejected": -1.521268606185913, "logps/chosen": -1.3848626613616943, "logps/rejected": -3.028841733932495, "loss": 1.4119, "nll_loss": 1.39683198928833, "rewards/accuracies": 1.0, "rewards/chosen": -0.13848626613616943, "rewards/margins": 0.1643979251384735, "rewards/rejected": -0.30288416147232056, "step": 980 }, { "epoch": 1.545311268715524, "grad_norm": 0.2502342462539673, "learning_rate": 6.024024540685802e-07, "log_odds_chosen": 1.9373964071273804, "log_odds_ratio": -0.15239368379116058, "logits/chosen": -0.62217116355896, "logits/rejected": -1.4089964628219604, "logps/chosen": -1.3904428482055664, "logps/rejected": -3.0897319316864014, "loss": 1.4194, "nll_loss": 1.4041366577148438, "rewards/accuracies": 1.0, "rewards/chosen": -0.13904426991939545, "rewards/margins": 0.1699289232492447, "rewards/rejected": -0.30897319316864014, "step": 981 }, { "epoch": 1.5468873128447598, "grad_norm": 0.23337644338607788, "learning_rate": 5.984755618614443e-07, "log_odds_chosen": 1.7698785066604614, "log_odds_ratio": -0.168540820479393, "logits/chosen": -0.7300572991371155, "logits/rejected": -1.625712275505066, "logps/chosen": -1.4284956455230713, "logps/rejected": -2.9774229526519775, "loss": 1.4519, "nll_loss": 1.4350183010101318, "rewards/accuracies": 1.0, "rewards/chosen": -0.14284957945346832, "rewards/margins": 0.1548927277326584, "rewards/rejected": -0.2977423071861267, "step": 982 }, { "epoch": 1.5484633569739952, "grad_norm": 0.2333681285381317, "learning_rate": 5.945592575020199e-07, "log_odds_chosen": 1.8586804866790771, "log_odds_ratio": -0.1674220710992813, "logits/chosen": -0.5435483455657959, "logits/rejected": -1.4977864027023315, "logps/chosen": -1.3972073793411255, "logps/rejected": -3.027332067489624, "loss": 1.433, "nll_loss": 1.416232943534851, "rewards/accuracies": 1.0, "rewards/chosen": -0.13972075283527374, "rewards/margins": 0.16301245987415314, "rewards/rejected": -0.3027332127094269, "step": 983 }, { "epoch": 1.550039401103231, "grad_norm": 0.26622799038887024, "learning_rate": 5.90653570576116e-07, "log_odds_chosen": 2.3747029304504395, "log_odds_ratio": -0.10542309284210205, "logits/chosen": -0.7676352858543396, "logits/rejected": -1.6198368072509766, "logps/chosen": -1.4438918828964233, "logps/rejected": -3.5821423530578613, "loss": 1.4601, "nll_loss": 1.4495563507080078, "rewards/accuracies": 1.0, "rewards/chosen": -0.14438918232917786, "rewards/margins": 0.2138250321149826, "rewards/rejected": -0.35821419954299927, "step": 984 }, { "epoch": 1.5516154452324664, "grad_norm": 0.2317635416984558, "learning_rate": 5.867585305893315e-07, "log_odds_chosen": 1.8688759803771973, "log_odds_ratio": -0.16067832708358765, "logits/chosen": -0.7286397218704224, "logits/rejected": -1.4049956798553467, "logps/chosen": -1.366334080696106, "logps/rejected": -2.998530387878418, "loss": 1.4046, "nll_loss": 1.3885746002197266, "rewards/accuracies": 1.0, "rewards/chosen": -0.13663341104984283, "rewards/margins": 0.1632196605205536, "rewards/rejected": -0.29985305666923523, "step": 985 }, { "epoch": 1.5531914893617023, "grad_norm": 0.21900911629199982, "learning_rate": 5.828741669668337e-07, "log_odds_chosen": 2.1781530380249023, "log_odds_ratio": -0.11259491741657257, "logits/chosen": -0.6381757259368896, "logits/rejected": -1.6998456716537476, "logps/chosen": -1.4175000190734863, "logps/rejected": -3.354124069213867, "loss": 1.4296, "nll_loss": 1.4183080196380615, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417500078678131, "rewards/margins": 0.19366240501403809, "rewards/rejected": -0.3354123830795288, "step": 986 }, { "epoch": 1.5547675334909377, "grad_norm": 0.217463880777359, "learning_rate": 5.790005090531333e-07, "log_odds_chosen": 2.2625317573547363, "log_odds_ratio": -0.12319906800985336, "logits/chosen": -0.6966750621795654, "logits/rejected": -1.684138536453247, "logps/chosen": -1.4314756393432617, "logps/rejected": -3.453754425048828, "loss": 1.4485, "nll_loss": 1.4361519813537598, "rewards/accuracies": 1.0, "rewards/chosen": -0.1431475728750229, "rewards/margins": 0.2022278755903244, "rewards/rejected": -0.3453754782676697, "step": 987 }, { "epoch": 1.5563435776201735, "grad_norm": 0.22708973288536072, "learning_rate": 5.751375861118666e-07, "log_odds_chosen": 2.0565907955169678, "log_odds_ratio": -0.15445038676261902, "logits/chosen": -0.6868225932121277, "logits/rejected": -1.5475994348526, "logps/chosen": -1.444994330406189, "logps/rejected": -3.2818796634674072, "loss": 1.4526, "nll_loss": 1.437137246131897, "rewards/accuracies": 1.0, "rewards/chosen": -0.14449943602085114, "rewards/margins": 0.18368852138519287, "rewards/rejected": -0.3281879127025604, "step": 988 }, { "epoch": 1.557919621749409, "grad_norm": 0.2407715618610382, "learning_rate": 5.712854273255707e-07, "log_odds_chosen": 1.7788302898406982, "log_odds_ratio": -0.1637771874666214, "logits/chosen": -0.5679078102111816, "logits/rejected": -1.6175519227981567, "logps/chosen": -1.4289250373840332, "logps/rejected": -2.9876627922058105, "loss": 1.4662, "nll_loss": 1.4498696327209473, "rewards/accuracies": 1.0, "rewards/chosen": -0.1428924947977066, "rewards/margins": 0.15587377548217773, "rewards/rejected": -0.29876625537872314, "step": 989 }, { "epoch": 1.5594956658786447, "grad_norm": 0.21809129416942596, "learning_rate": 5.674440617954659e-07, "log_odds_chosen": 1.9170739650726318, "log_odds_ratio": -0.14835508167743683, "logits/chosen": -0.7263450026512146, "logits/rejected": -1.3957607746124268, "logps/chosen": -1.3353750705718994, "logps/rejected": -2.998387336730957, "loss": 1.3553, "nll_loss": 1.3404512405395508, "rewards/accuracies": 1.0, "rewards/chosen": -0.13353751599788666, "rewards/margins": 0.16630125045776367, "rewards/rejected": -0.29983875155448914, "step": 990 }, { "epoch": 1.5610717100078801, "grad_norm": 0.24128296971321106, "learning_rate": 5.63613518541234e-07, "log_odds_chosen": 1.9754855632781982, "log_odds_ratio": -0.15521501004695892, "logits/chosen": -0.6127564311027527, "logits/rejected": -1.4858635663986206, "logps/chosen": -1.3786113262176514, "logps/rejected": -3.1153194904327393, "loss": 1.4152, "nll_loss": 1.3997262716293335, "rewards/accuracies": 1.0, "rewards/chosen": -0.13786114752292633, "rewards/margins": 0.17367081344127655, "rewards/rejected": -0.3115319609642029, "step": 991 }, { "epoch": 1.562647754137116, "grad_norm": 0.47279778122901917, "learning_rate": 5.597938265007993e-07, "log_odds_chosen": 2.017469882965088, "log_odds_ratio": -0.1291920691728592, "logits/chosen": -0.6799546480178833, "logits/rejected": -1.6301541328430176, "logps/chosen": -1.3704148530960083, "logps/rejected": -3.135061502456665, "loss": 1.3918, "nll_loss": 1.3788890838623047, "rewards/accuracies": 1.0, "rewards/chosen": -0.13704147934913635, "rewards/margins": 0.17646467685699463, "rewards/rejected": -0.313506156206131, "step": 992 }, { "epoch": 1.5642237982663514, "grad_norm": 0.2880786061286926, "learning_rate": 5.559850145301106e-07, "log_odds_chosen": 1.670323133468628, "log_odds_ratio": -0.18176257610321045, "logits/chosen": -0.6458690166473389, "logits/rejected": -1.3920232057571411, "logps/chosen": -1.407934546470642, "logps/rejected": -2.857358694076538, "loss": 1.4419, "nll_loss": 1.4237000942230225, "rewards/accuracies": 1.0, "rewards/chosen": -0.14079347252845764, "rewards/margins": 0.14494240283966064, "rewards/rejected": -0.2857358753681183, "step": 993 }, { "epoch": 1.5657998423955872, "grad_norm": 0.5181437730789185, "learning_rate": 5.521871114029233e-07, "log_odds_chosen": 1.8897773027420044, "log_odds_ratio": -0.1437710076570511, "logits/chosen": -0.7042770385742188, "logits/rejected": -1.5562762022018433, "logps/chosen": -1.3297470808029175, "logps/rejected": -2.9654946327209473, "loss": 1.3725, "nll_loss": 1.3580961227416992, "rewards/accuracies": 1.0, "rewards/chosen": -0.13297469913959503, "rewards/margins": 0.16357475519180298, "rewards/rejected": -0.2965494394302368, "step": 994 }, { "epoch": 1.5673758865248226, "grad_norm": 0.33190351724624634, "learning_rate": 5.484001458105823e-07, "log_odds_chosen": 1.7056673765182495, "log_odds_ratio": -0.18585379421710968, "logits/chosen": -0.7297174334526062, "logits/rejected": -1.2175813913345337, "logps/chosen": -1.4123536348342896, "logps/rejected": -2.898897171020508, "loss": 1.4367, "nll_loss": 1.4181629419326782, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412353813648224, "rewards/margins": 0.14865434169769287, "rewards/rejected": -0.28988972306251526, "step": 995 }, { "epoch": 1.5689519306540585, "grad_norm": 0.2508215308189392, "learning_rate": 5.446241463618027e-07, "log_odds_chosen": 2.3727543354034424, "log_odds_ratio": -0.1468883752822876, "logits/chosen": -0.6941895484924316, "logits/rejected": -1.445560336112976, "logps/chosen": -1.4314374923706055, "logps/rejected": -3.574099540710449, "loss": 1.455, "nll_loss": 1.4403475522994995, "rewards/accuracies": 1.0, "rewards/chosen": -0.14314374327659607, "rewards/margins": 0.21426618099212646, "rewards/rejected": -0.3574099540710449, "step": 996 }, { "epoch": 1.5705279747832939, "grad_norm": 0.2683565020561218, "learning_rate": 5.40859141582457e-07, "log_odds_chosen": 2.3095686435699463, "log_odds_ratio": -0.10337819159030914, "logits/chosen": -0.7148799896240234, "logits/rejected": -1.6118800640106201, "logps/chosen": -1.2982076406478882, "logps/rejected": -3.3256571292877197, "loss": 1.32, "nll_loss": 1.3096983432769775, "rewards/accuracies": 1.0, "rewards/chosen": -0.12982076406478882, "rewards/margins": 0.2027449607849121, "rewards/rejected": -0.3325657248497009, "step": 997 }, { "epoch": 1.5721040189125297, "grad_norm": 0.260953813791275, "learning_rate": 5.371051599153582e-07, "log_odds_chosen": 1.7071082592010498, "log_odds_ratio": -0.2015705555677414, "logits/chosen": -0.6238905191421509, "logits/rejected": -1.2173104286193848, "logps/chosen": -1.3559688329696655, "logps/rejected": -2.8317506313323975, "loss": 1.3902, "nll_loss": 1.3700270652770996, "rewards/accuracies": 1.0, "rewards/chosen": -0.1355968713760376, "rewards/margins": 0.1475781798362732, "rewards/rejected": -0.2831750512123108, "step": 998 }, { "epoch": 1.573680063041765, "grad_norm": 0.24443098902702332, "learning_rate": 5.333622297200449e-07, "log_odds_chosen": 1.772143006324768, "log_odds_ratio": -0.16182053089141846, "logits/chosen": -0.6557694673538208, "logits/rejected": -1.3393594026565552, "logps/chosen": -1.3531529903411865, "logps/rejected": -2.8749842643737793, "loss": 1.3963, "nll_loss": 1.3801213502883911, "rewards/accuracies": 1.0, "rewards/chosen": -0.13531529903411865, "rewards/margins": 0.1521831452846527, "rewards/rejected": -0.287498414516449, "step": 999 }, { "epoch": 1.575256107171001, "grad_norm": 0.2207130491733551, "learning_rate": 5.296303792725676e-07, "log_odds_chosen": 2.081167221069336, "log_odds_ratio": -0.12388971447944641, "logits/chosen": -0.6979268789291382, "logits/rejected": -1.7110087871551514, "logps/chosen": -1.4649267196655273, "logps/rejected": -3.3177027702331543, "loss": 1.4761, "nll_loss": 1.463676929473877, "rewards/accuracies": 1.0, "rewards/chosen": -0.14649267494678497, "rewards/margins": 0.18527761101722717, "rewards/rejected": -0.33177027106285095, "step": 1000 }, { "epoch": 1.5768321513002364, "grad_norm": 0.39313483238220215, "learning_rate": 5.259096367652725e-07, "log_odds_chosen": 2.0995781421661377, "log_odds_ratio": -0.12762674689292908, "logits/chosen": -0.6608773469924927, "logits/rejected": -1.5571776628494263, "logps/chosen": -1.4135432243347168, "logps/rejected": -3.274109125137329, "loss": 1.4362, "nll_loss": 1.423448085784912, "rewards/accuracies": 1.0, "rewards/chosen": -0.14135432243347168, "rewards/margins": 0.18605661392211914, "rewards/rejected": -0.3274109363555908, "step": 1001 }, { "epoch": 1.578408195429472, "grad_norm": 0.23428000509738922, "learning_rate": 5.222000303065927e-07, "log_odds_chosen": 2.065018653869629, "log_odds_ratio": -0.1342705339193344, "logits/chosen": -0.6169339418411255, "logits/rejected": -1.7493171691894531, "logps/chosen": -1.3996392488479614, "logps/rejected": -3.2256298065185547, "loss": 1.425, "nll_loss": 1.411563515663147, "rewards/accuracies": 1.0, "rewards/chosen": -0.13996395468711853, "rewards/margins": 0.1825990527868271, "rewards/rejected": -0.3225629925727844, "step": 1002 }, { "epoch": 1.5799842395587076, "grad_norm": 0.2564602494239807, "learning_rate": 5.185015879208335e-07, "log_odds_chosen": 2.018220901489258, "log_odds_ratio": -0.12643006443977356, "logits/chosen": -0.6790438294410706, "logits/rejected": -1.4937458038330078, "logps/chosen": -1.398182988166809, "logps/rejected": -3.1689088344573975, "loss": 1.4286, "nll_loss": 1.4159754514694214, "rewards/accuracies": 1.0, "rewards/chosen": -0.13981831073760986, "rewards/margins": 0.17707255482673645, "rewards/rejected": -0.3168908953666687, "step": 1003 }, { "epoch": 1.5815602836879432, "grad_norm": 0.24016346037387848, "learning_rate": 5.148143375479601e-07, "log_odds_chosen": 2.0931503772735596, "log_odds_ratio": -0.1198614165186882, "logits/chosen": -0.6796484589576721, "logits/rejected": -1.5749139785766602, "logps/chosen": -1.4522900581359863, "logps/rejected": -3.3148303031921387, "loss": 1.4583, "nll_loss": 1.4463437795639038, "rewards/accuracies": 1.0, "rewards/chosen": -0.1452290117740631, "rewards/margins": 0.18625403940677643, "rewards/rejected": -0.33148306608200073, "step": 1004 }, { "epoch": 1.5831363278171788, "grad_norm": 0.24916806817054749, "learning_rate": 5.111383070433887e-07, "log_odds_chosen": 2.1631312370300293, "log_odds_ratio": -0.12071561813354492, "logits/chosen": -0.6392558813095093, "logits/rejected": -1.6787564754486084, "logps/chosen": -1.4543805122375488, "logps/rejected": -3.38873291015625, "loss": 1.4703, "nll_loss": 1.4582314491271973, "rewards/accuracies": 1.0, "rewards/chosen": -0.1454380452632904, "rewards/margins": 0.19343525171279907, "rewards/rejected": -0.33887332677841187, "step": 1005 }, { "epoch": 1.5847123719464145, "grad_norm": 0.2747148871421814, "learning_rate": 5.074735241777733e-07, "log_odds_chosen": 1.5903794765472412, "log_odds_ratio": -0.20101934671401978, "logits/chosen": -0.6054337024688721, "logits/rejected": -1.3932902812957764, "logps/chosen": -1.5557260513305664, "logps/rejected": -2.9665348529815674, "loss": 1.5532, "nll_loss": 1.533131718635559, "rewards/accuracies": 1.0, "rewards/chosen": -0.15557260811328888, "rewards/margins": 0.14108088612556458, "rewards/rejected": -0.29665350914001465, "step": 1006 }, { "epoch": 1.58628841607565, "grad_norm": 0.34648656845092773, "learning_rate": 5.038200166367978e-07, "log_odds_chosen": 2.1133413314819336, "log_odds_ratio": -0.1319977194070816, "logits/chosen": -0.751988410949707, "logits/rejected": -1.4760771989822388, "logps/chosen": -1.3870490789413452, "logps/rejected": -3.2544689178466797, "loss": 1.4117, "nll_loss": 1.3985283374786377, "rewards/accuracies": 1.0, "rewards/chosen": -0.13870491087436676, "rewards/margins": 0.18674199283123016, "rewards/rejected": -0.3254469037055969, "step": 1007 }, { "epoch": 1.5878644602048857, "grad_norm": 0.2319309562444687, "learning_rate": 5.00177812020967e-07, "log_odds_chosen": 1.837390661239624, "log_odds_ratio": -0.15366661548614502, "logits/chosen": -0.6675772666931152, "logits/rejected": -1.5887476205825806, "logps/chosen": -1.4810361862182617, "logps/rejected": -3.1065688133239746, "loss": 1.5103, "nll_loss": 1.4948968887329102, "rewards/accuracies": 1.0, "rewards/chosen": -0.14810360968112946, "rewards/margins": 0.16255329549312592, "rewards/rejected": -0.31065690517425537, "step": 1008 }, { "epoch": 1.5894405043341213, "grad_norm": 0.30177879333496094, "learning_rate": 4.965469378453979e-07, "log_odds_chosen": 1.8645656108856201, "log_odds_ratio": -0.17713713645935059, "logits/chosen": -0.7347486615180969, "logits/rejected": -1.2333391904830933, "logps/chosen": -1.3859689235687256, "logps/rejected": -3.0136122703552246, "loss": 1.4171, "nll_loss": 1.399362325668335, "rewards/accuracies": 1.0, "rewards/chosen": -0.13859690725803375, "rewards/margins": 0.16276434063911438, "rewards/rejected": -0.3013612627983093, "step": 1009 }, { "epoch": 1.591016548463357, "grad_norm": 0.231363907456398, "learning_rate": 4.929274215396091e-07, "log_odds_chosen": 1.9178415536880493, "log_odds_ratio": -0.1448175609111786, "logits/chosen": -0.750930666923523, "logits/rejected": -1.5096575021743774, "logps/chosen": -1.439091444015503, "logps/rejected": -3.1250619888305664, "loss": 1.4525, "nll_loss": 1.438064455986023, "rewards/accuracies": 1.0, "rewards/chosen": -0.14390915632247925, "rewards/margins": 0.16859707236289978, "rewards/rejected": -0.31250619888305664, "step": 1010 }, { "epoch": 1.5925925925925926, "grad_norm": 0.25067129731178284, "learning_rate": 4.893192904473182e-07, "log_odds_chosen": 1.4285374879837036, "log_odds_ratio": -0.2365470826625824, "logits/chosen": -0.7194561958312988, "logits/rejected": -1.085870385169983, "logps/chosen": -1.4595608711242676, "logps/rejected": -2.6990315914154053, "loss": 1.495, "nll_loss": 1.4713022708892822, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459560990333557, "rewards/margins": 0.12394708395004272, "rewards/rejected": -0.26990318298339844, "step": 1011 }, { "epoch": 1.5941686367218282, "grad_norm": 0.22900527715682983, "learning_rate": 4.85722571826233e-07, "log_odds_chosen": 1.8804036378860474, "log_odds_ratio": -0.17272751033306122, "logits/chosen": -0.717240571975708, "logits/rejected": -1.461229681968689, "logps/chosen": -1.366174340248108, "logps/rejected": -3.014453411102295, "loss": 1.399, "nll_loss": 1.381683111190796, "rewards/accuracies": 1.0, "rewards/chosen": -0.13661745190620422, "rewards/margins": 0.16482789814472198, "rewards/rejected": -0.301445335149765, "step": 1012 }, { "epoch": 1.5957446808510638, "grad_norm": 0.2882484495639801, "learning_rate": 4.821372928478433e-07, "log_odds_chosen": 1.7660059928894043, "log_odds_ratio": -0.17163825035095215, "logits/chosen": -0.6306114792823792, "logits/rejected": -1.4862912893295288, "logps/chosen": -1.479105830192566, "logps/rejected": -3.0342788696289062, "loss": 1.4973, "nll_loss": 1.480169653892517, "rewards/accuracies": 1.0, "rewards/chosen": -0.14791058003902435, "rewards/margins": 0.1555173099040985, "rewards/rejected": -0.30342790484428406, "step": 1013 }, { "epoch": 1.5973207249802994, "grad_norm": 0.22692321240901947, "learning_rate": 4.785634805972201e-07, "log_odds_chosen": 1.7013983726501465, "log_odds_ratio": -0.19057759642601013, "logits/chosen": -0.6479536294937134, "logits/rejected": -1.39177668094635, "logps/chosen": -1.4196569919586182, "logps/rejected": -2.9050095081329346, "loss": 1.4456, "nll_loss": 1.4265507459640503, "rewards/accuracies": 1.0, "rewards/chosen": -0.14196571707725525, "rewards/margins": 0.14853526651859283, "rewards/rejected": -0.2905009686946869, "step": 1014 }, { "epoch": 1.598896769109535, "grad_norm": 0.3236638605594635, "learning_rate": 4.750011620728085e-07, "log_odds_chosen": 1.9005149602890015, "log_odds_ratio": -0.14637871086597443, "logits/chosen": -0.6936113834381104, "logits/rejected": -1.4206082820892334, "logps/chosen": -1.4421392679214478, "logps/rejected": -3.118384599685669, "loss": 1.4597, "nll_loss": 1.4451022148132324, "rewards/accuracies": 1.0, "rewards/chosen": -0.14421392977237701, "rewards/margins": 0.1676245629787445, "rewards/rejected": -0.3118385374546051, "step": 1015 }, { "epoch": 1.6004728132387707, "grad_norm": 0.2701707184314728, "learning_rate": 4.714503641862224e-07, "log_odds_chosen": 1.915150761604309, "log_odds_ratio": -0.14649231731891632, "logits/chosen": -0.639166533946991, "logits/rejected": -1.5035792589187622, "logps/chosen": -1.461795687675476, "logps/rejected": -3.1535773277282715, "loss": 1.4785, "nll_loss": 1.4638880491256714, "rewards/accuracies": 1.0, "rewards/chosen": -0.14617958664894104, "rewards/margins": 0.16917812824249268, "rewards/rejected": -0.3153577148914337, "step": 1016 }, { "epoch": 1.6020488573680063, "grad_norm": 0.24471162259578705, "learning_rate": 4.679111137620442e-07, "log_odds_chosen": 2.521536350250244, "log_odds_ratio": -0.12394658476114273, "logits/chosen": -0.779167890548706, "logits/rejected": -1.5542045831680298, "logps/chosen": -1.4179344177246094, "logps/rejected": -3.6974282264709473, "loss": 1.4248, "nll_loss": 1.4124279022216797, "rewards/accuracies": 1.0, "rewards/chosen": -0.14179344475269318, "rewards/margins": 0.2279493808746338, "rewards/rejected": -0.36974281072616577, "step": 1017 }, { "epoch": 1.603624901497242, "grad_norm": 0.39962512254714966, "learning_rate": 4.6438343753762055e-07, "log_odds_chosen": 1.6810894012451172, "log_odds_ratio": -0.1925288289785385, "logits/chosen": -0.6954044103622437, "logits/rejected": -1.4158189296722412, "logps/chosen": -1.415076732635498, "logps/rejected": -2.875946283340454, "loss": 1.4215, "nll_loss": 1.402219295501709, "rewards/accuracies": 1.0, "rewards/chosen": -0.14150768518447876, "rewards/margins": 0.1460869461297989, "rewards/rejected": -0.28759461641311646, "step": 1018 }, { "epoch": 1.6052009456264775, "grad_norm": 0.2883186638355255, "learning_rate": 4.608673621628609e-07, "log_odds_chosen": 1.91929030418396, "log_odds_ratio": -0.15084290504455566, "logits/chosen": -0.6413398385047913, "logits/rejected": -1.4859340190887451, "logps/chosen": -1.43373703956604, "logps/rejected": -3.126699447631836, "loss": 1.442, "nll_loss": 1.4268749952316284, "rewards/accuracies": 1.0, "rewards/chosen": -0.14337369799613953, "rewards/margins": 0.16929621994495392, "rewards/rejected": -0.31266993284225464, "step": 1019 }, { "epoch": 1.6067769897557131, "grad_norm": 0.2561066150665283, "learning_rate": 4.5736291420003413e-07, "log_odds_chosen": 1.9549800157546997, "log_odds_ratio": -0.1541568785905838, "logits/chosen": -0.6252659559249878, "logits/rejected": -1.399927020072937, "logps/chosen": -1.4241833686828613, "logps/rejected": -3.154966354370117, "loss": 1.4501, "nll_loss": 1.43466055393219, "rewards/accuracies": 1.0, "rewards/chosen": -0.14241832494735718, "rewards/margins": 0.17307832837104797, "rewards/rejected": -0.31549668312072754, "step": 1020 }, { "epoch": 1.6083530338849488, "grad_norm": 0.2521533966064453, "learning_rate": 4.538701201235713e-07, "log_odds_chosen": 2.0349037647247314, "log_odds_ratio": -0.14215528964996338, "logits/chosen": -0.5966821908950806, "logits/rejected": -1.5088404417037964, "logps/chosen": -1.3814523220062256, "logps/rejected": -3.1771457195281982, "loss": 1.4056, "nll_loss": 1.3913933038711548, "rewards/accuracies": 1.0, "rewards/chosen": -0.13814525306224823, "rewards/margins": 0.1795693188905716, "rewards/rejected": -0.3177145719528198, "step": 1021 }, { "epoch": 1.6099290780141844, "grad_norm": 0.27942129969596863, "learning_rate": 4.5038900631986367e-07, "log_odds_chosen": 1.9295190572738647, "log_odds_ratio": -0.15211039781570435, "logits/chosen": -0.6040986180305481, "logits/rejected": -1.4176690578460693, "logps/chosen": -1.3633060455322266, "logps/rejected": -3.048539400100708, "loss": 1.4086, "nll_loss": 1.3934367895126343, "rewards/accuracies": 1.0, "rewards/chosen": -0.13633058965206146, "rewards/margins": 0.16852334141731262, "rewards/rejected": -0.3048539459705353, "step": 1022 }, { "epoch": 1.61150512214342, "grad_norm": 0.25446707010269165, "learning_rate": 4.4691959908706287e-07, "log_odds_chosen": 1.6681817770004272, "log_odds_ratio": -0.20043697953224182, "logits/chosen": -0.6374213695526123, "logits/rejected": -1.4144397974014282, "logps/chosen": -1.4745643138885498, "logps/rejected": -2.947075366973877, "loss": 1.4891, "nll_loss": 1.4690940380096436, "rewards/accuracies": 1.0, "rewards/chosen": -0.14745643734931946, "rewards/margins": 0.14725112915039062, "rewards/rejected": -0.2947075664997101, "step": 1023 }, { "epoch": 1.6130811662726556, "grad_norm": 0.21517746150493622, "learning_rate": 4.434619246348843e-07, "log_odds_chosen": 2.1088056564331055, "log_odds_ratio": -0.14379973709583282, "logits/chosen": -0.6172202229499817, "logits/rejected": -1.6195627450942993, "logps/chosen": -1.3742038011550903, "logps/rejected": -3.24086856842041, "loss": 1.4078, "nll_loss": 1.3934203386306763, "rewards/accuracies": 1.0, "rewards/chosen": -0.1374203860759735, "rewards/margins": 0.18666648864746094, "rewards/rejected": -0.32408684492111206, "step": 1024 }, { "epoch": 1.6146572104018913, "grad_norm": 0.518103837966919, "learning_rate": 4.4001600908440564e-07, "log_odds_chosen": 1.872849941253662, "log_odds_ratio": -0.1549178808927536, "logits/chosen": -0.553978443145752, "logits/rejected": -1.4359403848648071, "logps/chosen": -1.4705908298492432, "logps/rejected": -3.1292688846588135, "loss": 1.4797, "nll_loss": 1.464216947555542, "rewards/accuracies": 1.0, "rewards/chosen": -0.14705908298492432, "rewards/margins": 0.16586779057979584, "rewards/rejected": -0.31292688846588135, "step": 1025 }, { "epoch": 1.6162332545311269, "grad_norm": 0.26604968309402466, "learning_rate": 4.365818784678736e-07, "log_odds_chosen": 1.957102656364441, "log_odds_ratio": -0.1338094174861908, "logits/chosen": -0.6306694149971008, "logits/rejected": -1.4266163110733032, "logps/chosen": -1.3158413171768188, "logps/rejected": -3.008941173553467, "loss": 1.3594, "nll_loss": 1.3460139036178589, "rewards/accuracies": 1.0, "rewards/chosen": -0.13158413767814636, "rewards/margins": 0.16931000351905823, "rewards/rejected": -0.3008941411972046, "step": 1026 }, { "epoch": 1.6178092986603625, "grad_norm": 0.22580751776695251, "learning_rate": 4.3315955872850464e-07, "log_odds_chosen": 1.8560806512832642, "log_odds_ratio": -0.16056184470653534, "logits/chosen": -0.6955811381340027, "logits/rejected": -1.446662187576294, "logps/chosen": -1.4349472522735596, "logps/rejected": -3.068366527557373, "loss": 1.4575, "nll_loss": 1.441493034362793, "rewards/accuracies": 1.0, "rewards/chosen": -0.14349474012851715, "rewards/margins": 0.1633419394493103, "rewards/rejected": -0.30683669447898865, "step": 1027 }, { "epoch": 1.6193853427895981, "grad_norm": 0.23215770721435547, "learning_rate": 4.2974907572029016e-07, "log_odds_chosen": 1.9560168981552124, "log_odds_ratio": -0.15227068960666656, "logits/chosen": -0.6669718623161316, "logits/rejected": -1.6200635433197021, "logps/chosen": -1.4750416278839111, "logps/rejected": -3.2141664028167725, "loss": 1.4804, "nll_loss": 1.465183973312378, "rewards/accuracies": 1.0, "rewards/chosen": -0.14750416576862335, "rewards/margins": 0.17391246557235718, "rewards/rejected": -0.3214166462421417, "step": 1028 }, { "epoch": 1.6209613869188337, "grad_norm": 0.2567824125289917, "learning_rate": 4.2635045520780033e-07, "log_odds_chosen": 2.006277322769165, "log_odds_ratio": -0.14409126341342926, "logits/chosen": -0.6700658798217773, "logits/rejected": -1.4934234619140625, "logps/chosen": -1.4181594848632812, "logps/rejected": -3.1922314167022705, "loss": 1.4424, "nll_loss": 1.4280370473861694, "rewards/accuracies": 1.0, "rewards/chosen": -0.1418159455060959, "rewards/margins": 0.1774071902036667, "rewards/rejected": -0.3192231357097626, "step": 1029 }, { "epoch": 1.6225374310480694, "grad_norm": 0.3259269595146179, "learning_rate": 4.2296372286598904e-07, "log_odds_chosen": 2.3392977714538574, "log_odds_ratio": -0.11563313752412796, "logits/chosen": -0.6986525058746338, "logits/rejected": -1.7043306827545166, "logps/chosen": -1.3956339359283447, "logps/rejected": -3.487534284591675, "loss": 1.4042, "nll_loss": 1.392653226852417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1395633965730667, "rewards/margins": 0.20919004082679749, "rewards/rejected": -0.3487534523010254, "step": 1030 }, { "epoch": 1.624113475177305, "grad_norm": 1.041456699371338, "learning_rate": 4.1958890428000205e-07, "log_odds_chosen": 1.7850561141967773, "log_odds_ratio": -0.17696474492549896, "logits/chosen": -0.6682475805282593, "logits/rejected": -1.4341821670532227, "logps/chosen": -1.4246793985366821, "logps/rejected": -2.9944074153900146, "loss": 1.452, "nll_loss": 1.4343198537826538, "rewards/accuracies": 1.0, "rewards/chosen": -0.1424679458141327, "rewards/margins": 0.15697279572486877, "rewards/rejected": -0.2994407117366791, "step": 1031 }, { "epoch": 1.6256895193065406, "grad_norm": 0.5741440057754517, "learning_rate": 4.162260249449816e-07, "log_odds_chosen": 1.912475824356079, "log_odds_ratio": -0.15212813019752502, "logits/chosen": -0.6471393704414368, "logits/rejected": -1.3360244035720825, "logps/chosen": -1.3677096366882324, "logps/rejected": -3.0345304012298584, "loss": 1.4042, "nll_loss": 1.3889737129211426, "rewards/accuracies": 1.0, "rewards/chosen": -0.13677094876766205, "rewards/margins": 0.16668206453323364, "rewards/rejected": -0.3034530282020569, "step": 1032 }, { "epoch": 1.6272655634357762, "grad_norm": 0.5658828616142273, "learning_rate": 4.128751102658752e-07, "log_odds_chosen": 1.7893495559692383, "log_odds_ratio": -0.1694086194038391, "logits/chosen": -0.6386746168136597, "logits/rejected": -1.3874554634094238, "logps/chosen": -1.414762258529663, "logps/rejected": -2.9808478355407715, "loss": 1.4286, "nll_loss": 1.411672592163086, "rewards/accuracies": 1.0, "rewards/chosen": -0.14147622883319855, "rewards/margins": 0.15660858154296875, "rewards/rejected": -0.2980847954750061, "step": 1033 }, { "epoch": 1.6288416075650118, "grad_norm": 0.23342697322368622, "learning_rate": 4.09536185557243e-07, "log_odds_chosen": 2.472292423248291, "log_odds_ratio": -0.0996335819363594, "logits/chosen": -0.5850135087966919, "logits/rejected": -1.6649936437606812, "logps/chosen": -1.4796191453933716, "logps/rejected": -3.721003532409668, "loss": 1.4837, "nll_loss": 1.4736905097961426, "rewards/accuracies": 1.0, "rewards/chosen": -0.14796192944049835, "rewards/margins": 0.22413842380046844, "rewards/rejected": -0.3721003532409668, "step": 1034 }, { "epoch": 1.6304176516942475, "grad_norm": 0.2462213635444641, "learning_rate": 4.0620927604306533e-07, "log_odds_chosen": 2.0345096588134766, "log_odds_ratio": -0.13304403424263, "logits/chosen": -0.6642025709152222, "logits/rejected": -1.7101750373840332, "logps/chosen": -1.436891794204712, "logps/rejected": -3.2375454902648926, "loss": 1.4457, "nll_loss": 1.4323927164077759, "rewards/accuracies": 1.0, "rewards/chosen": -0.14368918538093567, "rewards/margins": 0.1800653636455536, "rewards/rejected": -0.32375454902648926, "step": 1035 }, { "epoch": 1.631993695823483, "grad_norm": 0.4905497133731842, "learning_rate": 4.0289440685655516e-07, "log_odds_chosen": 1.8620474338531494, "log_odds_ratio": -0.15810561180114746, "logits/chosen": -0.6914947628974915, "logits/rejected": -1.6083216667175293, "logps/chosen": -1.4480628967285156, "logps/rejected": -3.0912280082702637, "loss": 1.4526, "nll_loss": 1.4368361234664917, "rewards/accuracies": 1.0, "rewards/chosen": -0.14480629563331604, "rewards/margins": 0.16431652009487152, "rewards/rejected": -0.30912280082702637, "step": 1036 }, { "epoch": 1.6335697399527187, "grad_norm": 0.2735949754714966, "learning_rate": 3.9959160303996575e-07, "log_odds_chosen": 1.9010448455810547, "log_odds_ratio": -0.18693888187408447, "logits/chosen": -0.5869218707084656, "logits/rejected": -1.2474870681762695, "logps/chosen": -1.4227206707000732, "logps/rejected": -3.102074146270752, "loss": 1.4466, "nll_loss": 1.4279528856277466, "rewards/accuracies": 1.0, "rewards/chosen": -0.14227205514907837, "rewards/margins": 0.1679353564977646, "rewards/rejected": -0.31020739674568176, "step": 1037 }, { "epoch": 1.6351457840819543, "grad_norm": 0.33594954013824463, "learning_rate": 3.9630088954440244e-07, "log_odds_chosen": 1.9064152240753174, "log_odds_ratio": -0.14728373289108276, "logits/chosen": -0.629096269607544, "logits/rejected": -1.553096055984497, "logps/chosen": -1.4570976495742798, "logps/rejected": -3.1430306434631348, "loss": 1.4814, "nll_loss": 1.466673731803894, "rewards/accuracies": 1.0, "rewards/chosen": -0.1457097828388214, "rewards/margins": 0.16859327256679535, "rewards/rejected": -0.31430307030677795, "step": 1038 }, { "epoch": 1.63672182821119, "grad_norm": 0.2318764328956604, "learning_rate": 3.930222912296346e-07, "log_odds_chosen": 2.5880258083343506, "log_odds_ratio": -0.10694815218448639, "logits/chosen": -0.6465334892272949, "logits/rejected": -1.5640720129013062, "logps/chosen": -1.3708205223083496, "logps/rejected": -3.7050888538360596, "loss": 1.3927, "nll_loss": 1.3819899559020996, "rewards/accuracies": 1.0, "rewards/chosen": -0.137082040309906, "rewards/margins": 0.23342685401439667, "rewards/rejected": -0.37050890922546387, "step": 1039 }, { "epoch": 1.6382978723404256, "grad_norm": 0.25644075870513916, "learning_rate": 3.8975583286390546e-07, "log_odds_chosen": 1.7307509183883667, "log_odds_ratio": -0.16870315372943878, "logits/chosen": -0.6507830619812012, "logits/rejected": -1.4632662534713745, "logps/chosen": -1.4601856470108032, "logps/rejected": -2.979065179824829, "loss": 1.4719, "nll_loss": 1.455047369003296, "rewards/accuracies": 1.0, "rewards/chosen": -0.14601854979991913, "rewards/margins": 0.1518879532814026, "rewards/rejected": -0.2979065477848053, "step": 1040 }, { "epoch": 1.6398739164696612, "grad_norm": 0.23217107355594635, "learning_rate": 3.8650153912374807e-07, "log_odds_chosen": 2.426393508911133, "log_odds_ratio": -0.09872916340827942, "logits/chosen": -0.6378209590911865, "logits/rejected": -1.6130553483963013, "logps/chosen": -1.4362802505493164, "logps/rejected": -3.615845203399658, "loss": 1.4418, "nll_loss": 1.4319356679916382, "rewards/accuracies": 1.0, "rewards/chosen": -0.14362803101539612, "rewards/margins": 0.21795648336410522, "rewards/rejected": -0.36158454418182373, "step": 1041 }, { "epoch": 1.6414499605988968, "grad_norm": 0.3092094659805298, "learning_rate": 3.832594345937974e-07, "log_odds_chosen": 1.8331087827682495, "log_odds_ratio": -0.16149643063545227, "logits/chosen": -0.7161263227462769, "logits/rejected": -1.232496976852417, "logps/chosen": -1.3488432168960571, "logps/rejected": -2.934601068496704, "loss": 1.3973, "nll_loss": 1.3811614513397217, "rewards/accuracies": 1.0, "rewards/chosen": -0.1348843276500702, "rewards/margins": 0.15857578814029694, "rewards/rejected": -0.2934601306915283, "step": 1042 }, { "epoch": 1.6430260047281324, "grad_norm": 0.25587576627731323, "learning_rate": 3.8002954376660524e-07, "log_odds_chosen": 1.8506078720092773, "log_odds_ratio": -0.15256193280220032, "logits/chosen": -0.5979391932487488, "logits/rejected": -1.6453276872634888, "logps/chosen": -1.4968429803848267, "logps/rejected": -3.1393797397613525, "loss": 1.5085, "nll_loss": 1.4932600259780884, "rewards/accuracies": 1.0, "rewards/chosen": -0.14968430995941162, "rewards/margins": 0.16425368189811707, "rewards/rejected": -0.3139379918575287, "step": 1043 }, { "epoch": 1.644602048857368, "grad_norm": 0.24996288120746613, "learning_rate": 3.7681189104245316e-07, "log_odds_chosen": 2.0608649253845215, "log_odds_ratio": -0.12748679518699646, "logits/chosen": -0.7787600755691528, "logits/rejected": -1.3809958696365356, "logps/chosen": -1.3140685558319092, "logps/rejected": -3.1053359508514404, "loss": 1.3519, "nll_loss": 1.3391987085342407, "rewards/accuracies": 1.0, "rewards/chosen": -0.13140687346458435, "rewards/margins": 0.17912673950195312, "rewards/rejected": -0.3105336129665375, "step": 1044 }, { "epoch": 1.6461780929866037, "grad_norm": 0.2735198438167572, "learning_rate": 3.7360650072917067e-07, "log_odds_chosen": 1.8327586650848389, "log_odds_ratio": -0.16486838459968567, "logits/chosen": -0.5174856781959534, "logits/rejected": -1.3682420253753662, "logps/chosen": -1.3867355585098267, "logps/rejected": -2.9823925495147705, "loss": 1.4039, "nll_loss": 1.3874154090881348, "rewards/accuracies": 1.0, "rewards/chosen": -0.1386735737323761, "rewards/margins": 0.15956568717956543, "rewards/rejected": -0.29823926091194153, "step": 1045 }, { "epoch": 1.6477541371158393, "grad_norm": 0.2698763310909271, "learning_rate": 3.704133970419514e-07, "log_odds_chosen": 1.9990997314453125, "log_odds_ratio": -0.14625856280326843, "logits/chosen": -0.722204864025116, "logits/rejected": -1.4830739498138428, "logps/chosen": -1.3892056941986084, "logps/rejected": -3.1507811546325684, "loss": 1.4243, "nll_loss": 1.409660816192627, "rewards/accuracies": 1.0, "rewards/chosen": -0.13892057538032532, "rewards/margins": 0.17615757882595062, "rewards/rejected": -0.31507813930511475, "step": 1046 }, { "epoch": 1.649330181245075, "grad_norm": 0.29149720072746277, "learning_rate": 3.672326041031677e-07, "log_odds_chosen": 1.6402442455291748, "log_odds_ratio": -0.1870439499616623, "logits/chosen": -0.5508179068565369, "logits/rejected": -1.4112125635147095, "logps/chosen": -1.4331697225570679, "logps/rejected": -2.8617639541625977, "loss": 1.4515, "nll_loss": 1.4328315258026123, "rewards/accuracies": 1.0, "rewards/chosen": -0.14331698417663574, "rewards/margins": 0.14285942912101746, "rewards/rejected": -0.2861764132976532, "step": 1047 }, { "epoch": 1.6509062253743105, "grad_norm": 0.2291058897972107, "learning_rate": 3.6406414594219116e-07, "log_odds_chosen": 2.2508485317230225, "log_odds_ratio": -0.11147428303956985, "logits/chosen": -0.654755175113678, "logits/rejected": -1.6908966302871704, "logps/chosen": -1.447685718536377, "logps/rejected": -3.461883068084717, "loss": 1.457, "nll_loss": 1.4458431005477905, "rewards/accuracies": 1.0, "rewards/chosen": -0.14476856589317322, "rewards/margins": 0.20141975581645966, "rewards/rejected": -0.3461883068084717, "step": 1048 }, { "epoch": 1.6524822695035462, "grad_norm": 0.2279290109872818, "learning_rate": 3.609080464952103e-07, "log_odds_chosen": 2.3559720516204834, "log_odds_ratio": -0.1205439567565918, "logits/chosen": -0.7401589751243591, "logits/rejected": -1.5572707653045654, "logps/chosen": -1.4388152360916138, "logps/rejected": -3.554387331008911, "loss": 1.4521, "nll_loss": 1.440041184425354, "rewards/accuracies": 1.0, "rewards/chosen": -0.14388154447078705, "rewards/margins": 0.21155720949172974, "rewards/rejected": -0.3554387390613556, "step": 1049 }, { "epoch": 1.6540583136327816, "grad_norm": 0.258706271648407, "learning_rate": 3.577643296050483e-07, "log_odds_chosen": 2.389432191848755, "log_odds_ratio": -0.12666946649551392, "logits/chosen": -0.6404685974121094, "logits/rejected": -1.6794850826263428, "logps/chosen": -1.5508801937103271, "logps/rejected": -3.731644868850708, "loss": 1.5406, "nll_loss": 1.5279145240783691, "rewards/accuracies": 1.0, "rewards/chosen": -0.15508800745010376, "rewards/margins": 0.21807646751403809, "rewards/rejected": -0.37316447496414185, "step": 1050 }, { "epoch": 1.6556343577620174, "grad_norm": 0.2831759750843048, "learning_rate": 3.5463301902098496e-07, "log_odds_chosen": 2.036219596862793, "log_odds_ratio": -0.1471668779850006, "logits/chosen": -0.6349585652351379, "logits/rejected": -1.314481496810913, "logps/chosen": -1.3349698781967163, "logps/rejected": -3.1221985816955566, "loss": 1.3642, "nll_loss": 1.3494820594787598, "rewards/accuracies": 1.0, "rewards/chosen": -0.1334969848394394, "rewards/margins": 0.17872288823127747, "rewards/rejected": -0.31221988797187805, "step": 1051 }, { "epoch": 1.6572104018912528, "grad_norm": 0.27971935272216797, "learning_rate": 3.5151413839857625e-07, "log_odds_chosen": 1.9171792268753052, "log_odds_ratio": -0.1542944610118866, "logits/chosen": -0.5966495871543884, "logits/rejected": -1.4366765022277832, "logps/chosen": -1.4591158628463745, "logps/rejected": -3.1601924896240234, "loss": 1.469, "nll_loss": 1.4535677433013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459115892648697, "rewards/margins": 0.1701076626777649, "rewards/rejected": -0.3160192668437958, "step": 1052 }, { "epoch": 1.6587864460204886, "grad_norm": 0.26634103059768677, "learning_rate": 3.484077112994759e-07, "log_odds_chosen": 2.3147478103637695, "log_odds_ratio": -0.1054999828338623, "logits/chosen": -0.7570221424102783, "logits/rejected": -1.6615108251571655, "logps/chosen": -1.4650148153305054, "logps/rejected": -3.547597646713257, "loss": 1.4761, "nll_loss": 1.465590000152588, "rewards/accuracies": 1.0, "rewards/chosen": -0.14650146663188934, "rewards/margins": 0.20825831592082977, "rewards/rejected": -0.35475975275039673, "step": 1053 }, { "epoch": 1.660362490149724, "grad_norm": 0.3826141655445099, "learning_rate": 3.45313761191256e-07, "log_odds_chosen": 1.6092479228973389, "log_odds_ratio": -0.20789264142513275, "logits/chosen": -0.7056980133056641, "logits/rejected": -1.3116384744644165, "logps/chosen": -1.3948101997375488, "logps/rejected": -2.791938543319702, "loss": 1.4265, "nll_loss": 1.4056901931762695, "rewards/accuracies": 1.0, "rewards/chosen": -0.13948102295398712, "rewards/margins": 0.13971279561519623, "rewards/rejected": -0.27919378876686096, "step": 1054 }, { "epoch": 1.6619385342789599, "grad_norm": 0.2470618188381195, "learning_rate": 3.4223231144723253e-07, "log_odds_chosen": 1.921688199043274, "log_odds_ratio": -0.1623573750257492, "logits/chosen": -0.6591117978096008, "logits/rejected": -1.5017824172973633, "logps/chosen": -1.4917609691619873, "logps/rejected": -3.205254554748535, "loss": 1.5248, "nll_loss": 1.5085912942886353, "rewards/accuracies": 1.0, "rewards/chosen": -0.14917610585689545, "rewards/margins": 0.17134931683540344, "rewards/rejected": -0.3205254077911377, "step": 1055 }, { "epoch": 1.6635145784081953, "grad_norm": 0.46097901463508606, "learning_rate": 3.391633853462861e-07, "log_odds_chosen": 2.319159507751465, "log_odds_ratio": -0.10287192463874817, "logits/chosen": -0.6756011247634888, "logits/rejected": -1.5233911275863647, "logps/chosen": -1.4324244260787964, "logps/rejected": -3.509413242340088, "loss": 1.4657, "nll_loss": 1.4554542303085327, "rewards/accuracies": 1.0, "rewards/chosen": -0.14324243366718292, "rewards/margins": 0.20769889652729034, "rewards/rejected": -0.3509413003921509, "step": 1056 }, { "epoch": 1.6650906225374311, "grad_norm": 0.2570216655731201, "learning_rate": 3.3610700607268803e-07, "log_odds_chosen": 1.712475061416626, "log_odds_ratio": -0.17620322108268738, "logits/chosen": -0.5821226239204407, "logits/rejected": -1.4439191818237305, "logps/chosen": -1.4155452251434326, "logps/rejected": -2.9097533226013184, "loss": 1.4479, "nll_loss": 1.4302473068237305, "rewards/accuracies": 1.0, "rewards/chosen": -0.14155453443527222, "rewards/margins": 0.149420827627182, "rewards/rejected": -0.2909753620624542, "step": 1057 }, { "epoch": 1.6666666666666665, "grad_norm": 0.5621395707130432, "learning_rate": 3.330631967159239e-07, "log_odds_chosen": 1.8507052659988403, "log_odds_ratio": -0.15102142095565796, "logits/chosen": -0.6082533597946167, "logits/rejected": -1.6500067710876465, "logps/chosen": -1.415788173675537, "logps/rejected": -3.0387611389160156, "loss": 1.4368, "nll_loss": 1.4216570854187012, "rewards/accuracies": 1.0, "rewards/chosen": -0.1415788233280182, "rewards/margins": 0.1622973084449768, "rewards/rejected": -0.303876131772995, "step": 1058 }, { "epoch": 1.6682427107959024, "grad_norm": 0.2411140501499176, "learning_rate": 3.300319802705189e-07, "log_odds_chosen": 2.3378853797912598, "log_odds_ratio": -0.1229456439614296, "logits/chosen": -0.551277220249176, "logits/rejected": -1.6752125024795532, "logps/chosen": -1.4683719873428345, "logps/rejected": -3.580745220184326, "loss": 1.4945, "nll_loss": 1.4822287559509277, "rewards/accuracies": 1.0, "rewards/chosen": -0.14683718979358673, "rewards/margins": 0.2112373411655426, "rewards/rejected": -0.35807451605796814, "step": 1059 }, { "epoch": 1.6698187549251378, "grad_norm": 0.2434995472431183, "learning_rate": 3.2701337963586604e-07, "log_odds_chosen": 1.894723653793335, "log_odds_ratio": -0.1545543521642685, "logits/chosen": -0.6253165006637573, "logits/rejected": -1.4164886474609375, "logps/chosen": -1.2830901145935059, "logps/rejected": -2.9124062061309814, "loss": 1.3241, "nll_loss": 1.3086225986480713, "rewards/accuracies": 1.0, "rewards/chosen": -0.12830901145935059, "rewards/margins": 0.1629316359758377, "rewards/rejected": -0.2912406623363495, "step": 1060 }, { "epoch": 1.6713947990543736, "grad_norm": 0.2464275062084198, "learning_rate": 3.24007417616051e-07, "log_odds_chosen": 2.1155428886413574, "log_odds_ratio": -0.13322824239730835, "logits/chosen": -0.7371612787246704, "logits/rejected": -1.5071771144866943, "logps/chosen": -1.368263602256775, "logps/rejected": -3.2391088008880615, "loss": 1.3821, "nll_loss": 1.368811011314392, "rewards/accuracies": 1.0, "rewards/chosen": -0.13682636618614197, "rewards/margins": 0.18708455562591553, "rewards/rejected": -0.3239108920097351, "step": 1061 }, { "epoch": 1.672970843183609, "grad_norm": 0.3384556174278259, "learning_rate": 3.2101411691968115e-07, "log_odds_chosen": 1.9623682498931885, "log_odds_ratio": -0.1425580233335495, "logits/chosen": -0.6461026668548584, "logits/rejected": -1.6105395555496216, "logps/chosen": -1.3682403564453125, "logps/rejected": -3.0824975967407227, "loss": 1.4001, "nll_loss": 1.3858314752578735, "rewards/accuracies": 1.0, "rewards/chosen": -0.13682405650615692, "rewards/margins": 0.1714257299900055, "rewards/rejected": -0.3082498013973236, "step": 1062 }, { "epoch": 1.6745468873128448, "grad_norm": 0.27544698119163513, "learning_rate": 3.180335001597143e-07, "log_odds_chosen": 2.2079830169677734, "log_odds_ratio": -0.121024489402771, "logits/chosen": -0.7384201884269714, "logits/rejected": -1.491677165031433, "logps/chosen": -1.4222251176834106, "logps/rejected": -3.3915762901306152, "loss": 1.4369, "nll_loss": 1.4247801303863525, "rewards/accuracies": 1.0, "rewards/chosen": -0.14222249388694763, "rewards/margins": 0.19693514704704285, "rewards/rejected": -0.3391576409339905, "step": 1063 }, { "epoch": 1.6761229314420802, "grad_norm": 0.34008336067199707, "learning_rate": 3.1506558985328523e-07, "log_odds_chosen": 2.212200880050659, "log_odds_ratio": -0.11723358929157257, "logits/chosen": -0.6338338255882263, "logits/rejected": -1.7514162063598633, "logps/chosen": -1.3736299276351929, "logps/rejected": -3.330686092376709, "loss": 1.3907, "nll_loss": 1.3789763450622559, "rewards/accuracies": 1.0, "rewards/chosen": -0.137363001704216, "rewards/margins": 0.1957056075334549, "rewards/rejected": -0.3330686092376709, "step": 1064 }, { "epoch": 1.677698975571316, "grad_norm": 0.2933269739151001, "learning_rate": 3.1211040842153934e-07, "log_odds_chosen": 1.8466805219650269, "log_odds_ratio": -0.15941856801509857, "logits/chosen": -0.6002825498580933, "logits/rejected": -1.6248646974563599, "logps/chosen": -1.5664018392562866, "logps/rejected": -3.221959352493286, "loss": 1.5651, "nll_loss": 1.5491851568222046, "rewards/accuracies": 1.0, "rewards/chosen": -0.1566401869058609, "rewards/margins": 0.16555579006671906, "rewards/rejected": -0.32219594717025757, "step": 1065 }, { "epoch": 1.6792750197005515, "grad_norm": 0.2671855688095093, "learning_rate": 3.091679781894605e-07, "log_odds_chosen": 2.0545029640197754, "log_odds_ratio": -0.1349347084760666, "logits/chosen": -0.6421516537666321, "logits/rejected": -1.4645674228668213, "logps/chosen": -1.4151796102523804, "logps/rejected": -3.2328884601593018, "loss": 1.4266, "nll_loss": 1.4130616188049316, "rewards/accuracies": 1.0, "rewards/chosen": -0.14151796698570251, "rewards/margins": 0.18177086114883423, "rewards/rejected": -0.32328882813453674, "step": 1066 }, { "epoch": 1.6808510638297873, "grad_norm": 0.2210875153541565, "learning_rate": 3.0623832138570473e-07, "log_odds_chosen": 2.250509738922119, "log_odds_ratio": -0.10501547157764435, "logits/chosen": -0.6483950614929199, "logits/rejected": -1.6013164520263672, "logps/chosen": -1.311753273010254, "logps/rejected": -3.281221389770508, "loss": 1.3303, "nll_loss": 1.3197828531265259, "rewards/accuracies": 1.0, "rewards/chosen": -0.13117532432079315, "rewards/margins": 0.19694679975509644, "rewards/rejected": -0.3281221091747284, "step": 1067 }, { "epoch": 1.6824271079590227, "grad_norm": 0.2498590350151062, "learning_rate": 3.033214601424283e-07, "log_odds_chosen": 1.9003256559371948, "log_odds_ratio": -0.15495559573173523, "logits/chosen": -0.6042993068695068, "logits/rejected": -1.3577349185943604, "logps/chosen": -1.4393795728683472, "logps/rejected": -3.1196069717407227, "loss": 1.457, "nll_loss": 1.4415332078933716, "rewards/accuracies": 1.0, "rewards/chosen": -0.14393796026706696, "rewards/margins": 0.16802272200584412, "rewards/rejected": -0.31196069717407227, "step": 1068 }, { "epoch": 1.6840031520882586, "grad_norm": 0.26158010959625244, "learning_rate": 3.0041741649512584e-07, "log_odds_chosen": 2.386746644973755, "log_odds_ratio": -0.09496892988681793, "logits/chosen": -0.576759934425354, "logits/rejected": -1.6918394565582275, "logps/chosen": -1.438929796218872, "logps/rejected": -3.5825114250183105, "loss": 1.4484, "nll_loss": 1.4388930797576904, "rewards/accuracies": 1.0, "rewards/chosen": -0.14389298856258392, "rewards/margins": 0.2143581658601761, "rewards/rejected": -0.35825115442276, "step": 1069 }, { "epoch": 1.685579196217494, "grad_norm": 0.24421310424804688, "learning_rate": 2.975262123824596e-07, "log_odds_chosen": 2.337888240814209, "log_odds_ratio": -0.11296647787094116, "logits/chosen": -0.7190134525299072, "logits/rejected": -1.5647231340408325, "logps/chosen": -1.3976733684539795, "logps/rejected": -3.4879870414733887, "loss": 1.4182, "nll_loss": 1.406915545463562, "rewards/accuracies": 1.0, "rewards/chosen": -0.1397673487663269, "rewards/margins": 0.2090313732624054, "rewards/rejected": -0.3487986922264099, "step": 1070 }, { "epoch": 1.6871552403467298, "grad_norm": 0.24184368550777435, "learning_rate": 2.9464786964609567e-07, "log_odds_chosen": 1.975092887878418, "log_odds_ratio": -0.14900679886341095, "logits/chosen": -0.6653262972831726, "logits/rejected": -1.4447376728057861, "logps/chosen": -1.4583994150161743, "logps/rejected": -3.2148609161376953, "loss": 1.4678, "nll_loss": 1.4528733491897583, "rewards/accuracies": 1.0, "rewards/chosen": -0.14583992958068848, "rewards/margins": 0.17564615607261658, "rewards/rejected": -0.32148608565330505, "step": 1071 }, { "epoch": 1.6887312844759652, "grad_norm": 0.23380732536315918, "learning_rate": 2.917824100305393e-07, "log_odds_chosen": 2.1292784214019775, "log_odds_ratio": -0.12977997958660126, "logits/chosen": -0.5873972177505493, "logits/rejected": -1.7371050119400024, "logps/chosen": -1.4406449794769287, "logps/rejected": -3.3417372703552246, "loss": 1.4597, "nll_loss": 1.4467285871505737, "rewards/accuracies": 1.0, "rewards/chosen": -0.14406448602676392, "rewards/margins": 0.1901092231273651, "rewards/rejected": -0.33417370915412903, "step": 1072 }, { "epoch": 1.690307328605201, "grad_norm": 0.2468893975019455, "learning_rate": 2.889298551829682e-07, "log_odds_chosen": 1.9497877359390259, "log_odds_ratio": -0.1531985104084015, "logits/chosen": -0.6388151049613953, "logits/rejected": -1.3839824199676514, "logps/chosen": -1.3506443500518799, "logps/rejected": -3.049588441848755, "loss": 1.3771, "nll_loss": 1.3618255853652954, "rewards/accuracies": 1.0, "rewards/chosen": -0.13506442308425903, "rewards/margins": 0.16989439725875854, "rewards/rejected": -0.30495885014533997, "step": 1073 }, { "epoch": 1.6918833727344365, "grad_norm": 0.23258164525032043, "learning_rate": 2.8609022665307226e-07, "log_odds_chosen": 1.866339921951294, "log_odds_ratio": -0.1535007804632187, "logits/chosen": -0.6827463507652283, "logits/rejected": -1.4575880765914917, "logps/chosen": -1.3848471641540527, "logps/rejected": -3.0147712230682373, "loss": 1.4152, "nll_loss": 1.399857759475708, "rewards/accuracies": 1.0, "rewards/chosen": -0.13848471641540527, "rewards/margins": 0.1629924476146698, "rewards/rejected": -0.3014771342277527, "step": 1074 }, { "epoch": 1.6934594168636723, "grad_norm": 0.23435211181640625, "learning_rate": 2.832635458928887e-07, "log_odds_chosen": 2.0296125411987305, "log_odds_ratio": -0.13293902575969696, "logits/chosen": -0.6012503504753113, "logits/rejected": -1.4636021852493286, "logps/chosen": -1.3424843549728394, "logps/rejected": -3.1106626987457275, "loss": 1.3825, "nll_loss": 1.3692493438720703, "rewards/accuracies": 1.0, "rewards/chosen": -0.13424843549728394, "rewards/margins": 0.1768178641796112, "rewards/rejected": -0.31106629967689514, "step": 1075 }, { "epoch": 1.6950354609929077, "grad_norm": 0.22622351348400116, "learning_rate": 2.804498342566406e-07, "log_odds_chosen": 2.303015947341919, "log_odds_ratio": -0.11672209203243256, "logits/chosen": -0.6639047861099243, "logits/rejected": -1.4146664142608643, "logps/chosen": -1.303202509880066, "logps/rejected": -3.3288261890411377, "loss": 1.3457, "nll_loss": 1.3340637683868408, "rewards/accuracies": 1.0, "rewards/chosen": -0.13032026588916779, "rewards/margins": 0.2025623768568039, "rewards/rejected": -0.3328826427459717, "step": 1076 }, { "epoch": 1.6966115051221435, "grad_norm": 0.24525657296180725, "learning_rate": 2.776491130005758e-07, "log_odds_chosen": 1.8605024814605713, "log_odds_ratio": -0.1628900021314621, "logits/chosen": -0.5873436331748962, "logits/rejected": -1.4213883876800537, "logps/chosen": -1.3839972019195557, "logps/rejected": -3.002988576889038, "loss": 1.4316, "nll_loss": 1.41530179977417, "rewards/accuracies": 1.0, "rewards/chosen": -0.13839972019195557, "rewards/margins": 0.16189917922019958, "rewards/rejected": -0.30029889941215515, "step": 1077 }, { "epoch": 1.698187549251379, "grad_norm": 0.3239259123802185, "learning_rate": 2.7486140328280473e-07, "log_odds_chosen": 1.9781323671340942, "log_odds_ratio": -0.13351967930793762, "logits/chosen": -0.7240140438079834, "logits/rejected": -1.6889302730560303, "logps/chosen": -1.4589776992797852, "logps/rejected": -3.210196018218994, "loss": 1.47, "nll_loss": 1.4566019773483276, "rewards/accuracies": 1.0, "rewards/chosen": -0.145897775888443, "rewards/margins": 0.17512184381484985, "rewards/rejected": -0.32101958990097046, "step": 1078 }, { "epoch": 1.6997635933806148, "grad_norm": 0.25348594784736633, "learning_rate": 2.7208672616314345e-07, "log_odds_chosen": 2.1705195903778076, "log_odds_ratio": -0.12241361290216446, "logits/chosen": -0.6877184510231018, "logits/rejected": -1.563254475593567, "logps/chosen": -1.4490975141525269, "logps/rejected": -3.3880767822265625, "loss": 1.4662, "nll_loss": 1.4539207220077515, "rewards/accuracies": 1.0, "rewards/chosen": -0.14490973949432373, "rewards/margins": 0.19389791786670685, "rewards/rejected": -0.3388076722621918, "step": 1079 }, { "epoch": 1.7013396375098502, "grad_norm": 0.24495218694210052, "learning_rate": 2.6932510260295237e-07, "log_odds_chosen": 1.974399209022522, "log_odds_ratio": -0.1501547396183014, "logits/chosen": -0.6338702440261841, "logits/rejected": -1.3268791437149048, "logps/chosen": -1.3336008787155151, "logps/rejected": -3.0517287254333496, "loss": 1.3613, "nll_loss": 1.3462855815887451, "rewards/accuracies": 1.0, "rewards/chosen": -0.1333601176738739, "rewards/margins": 0.1718127578496933, "rewards/rejected": -0.305172860622406, "step": 1080 }, { "epoch": 1.702915681639086, "grad_norm": 0.249800443649292, "learning_rate": 2.66576553464978e-07, "log_odds_chosen": 1.900071144104004, "log_odds_ratio": -0.1495131254196167, "logits/chosen": -0.6644768118858337, "logits/rejected": -1.6013550758361816, "logps/chosen": -1.5296528339385986, "logps/rejected": -3.2280750274658203, "loss": 1.5369, "nll_loss": 1.5219498872756958, "rewards/accuracies": 1.0, "rewards/chosen": -0.15296527743339539, "rewards/margins": 0.1698422133922577, "rewards/rejected": -0.32280752062797546, "step": 1081 }, { "epoch": 1.7044917257683214, "grad_norm": 0.265701025724411, "learning_rate": 2.6384109951319654e-07, "log_odds_chosen": 1.903924822807312, "log_odds_ratio": -0.15446186065673828, "logits/chosen": -0.6235219836235046, "logits/rejected": -1.4822415113449097, "logps/chosen": -1.4393709897994995, "logps/rejected": -3.1210076808929443, "loss": 1.4567, "nll_loss": 1.441243052482605, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439371109008789, "rewards/margins": 0.16816368699073792, "rewards/rejected": -0.31210076808929443, "step": 1082 }, { "epoch": 1.7060677698975573, "grad_norm": 0.26271939277648926, "learning_rate": 2.6111876141265646e-07, "log_odds_chosen": 1.8542128801345825, "log_odds_ratio": -0.16191092133522034, "logits/chosen": -0.6210159659385681, "logits/rejected": -1.5786596536636353, "logps/chosen": -1.4026427268981934, "logps/rejected": -3.032045841217041, "loss": 1.4265, "nll_loss": 1.4103474617004395, "rewards/accuracies": 1.0, "rewards/chosen": -0.14026427268981934, "rewards/margins": 0.16294030845165253, "rewards/rejected": -0.30320456624031067, "step": 1083 }, { "epoch": 1.7076438140267927, "grad_norm": 0.23789772391319275, "learning_rate": 2.58409559729321e-07, "log_odds_chosen": 1.9967732429504395, "log_odds_ratio": -0.1492270976305008, "logits/chosen": -0.6899981498718262, "logits/rejected": -1.515777587890625, "logps/chosen": -1.370311975479126, "logps/rejected": -3.128286361694336, "loss": 1.3992, "nll_loss": 1.384304165840149, "rewards/accuracies": 1.0, "rewards/chosen": -0.1370311826467514, "rewards/margins": 0.1757974475622177, "rewards/rejected": -0.3128286600112915, "step": 1084 }, { "epoch": 1.7092198581560285, "grad_norm": 0.2272200733423233, "learning_rate": 2.55713514929915e-07, "log_odds_chosen": 2.123439073562622, "log_odds_ratio": -0.14401251077651978, "logits/chosen": -0.7221021056175232, "logits/rejected": -1.596219539642334, "logps/chosen": -1.3702445030212402, "logps/rejected": -3.2474677562713623, "loss": 1.3988, "nll_loss": 1.384364366531372, "rewards/accuracies": 1.0, "rewards/chosen": -0.13702446222305298, "rewards/margins": 0.187722310423851, "rewards/rejected": -0.3247467875480652, "step": 1085 }, { "epoch": 1.710795902285264, "grad_norm": 0.26321184635162354, "learning_rate": 2.530306473817696e-07, "log_odds_chosen": 1.914361834526062, "log_odds_ratio": -0.14020074903964996, "logits/chosen": -0.6009557843208313, "logits/rejected": -1.6683201789855957, "logps/chosen": -1.492491364479065, "logps/rejected": -3.1936748027801514, "loss": 1.5031, "nll_loss": 1.4891226291656494, "rewards/accuracies": 1.0, "rewards/chosen": -0.1492491513490677, "rewards/margins": 0.1701183319091797, "rewards/rejected": -0.31936752796173096, "step": 1086 }, { "epoch": 1.7123719464144997, "grad_norm": 0.2567632496356964, "learning_rate": 2.50360977352668e-07, "log_odds_chosen": 2.1013269424438477, "log_odds_ratio": -0.12944424152374268, "logits/chosen": -0.6887335181236267, "logits/rejected": -1.4517964124679565, "logps/chosen": -1.450050711631775, "logps/rejected": -3.3202757835388184, "loss": 1.4725, "nll_loss": 1.4595344066619873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14500507712364197, "rewards/margins": 0.18702247738838196, "rewards/rejected": -0.3320275843143463, "step": 1087 }, { "epoch": 1.7139479905437351, "grad_norm": 0.5189839005470276, "learning_rate": 2.477045250106913e-07, "log_odds_chosen": 1.9664976596832275, "log_odds_ratio": -0.15829426050186157, "logits/chosen": -0.5747576951980591, "logits/rejected": -1.5548908710479736, "logps/chosen": -1.5300590991973877, "logps/rejected": -3.29317569732666, "loss": 1.5349, "nll_loss": 1.5190227031707764, "rewards/accuracies": 1.0, "rewards/chosen": -0.153005912899971, "rewards/margins": 0.1763116717338562, "rewards/rejected": -0.3293175995349884, "step": 1088 }, { "epoch": 1.715524034672971, "grad_norm": 0.22519537806510925, "learning_rate": 2.450613104240684e-07, "log_odds_chosen": 2.4600558280944824, "log_odds_ratio": -0.09389787912368774, "logits/chosen": -0.6630902886390686, "logits/rejected": -1.7952775955200195, "logps/chosen": -1.3907973766326904, "logps/rejected": -3.585139513015747, "loss": 1.3987, "nll_loss": 1.3893147706985474, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390797346830368, "rewards/margins": 0.2194342315196991, "rewards/rejected": -0.3585139513015747, "step": 1089 }, { "epoch": 1.7171000788022064, "grad_norm": 0.32947736978530884, "learning_rate": 2.42431353561023e-07, "log_odds_chosen": 2.033921241760254, "log_odds_ratio": -0.1345897912979126, "logits/chosen": -0.6356483697891235, "logits/rejected": -1.4998348951339722, "logps/chosen": -1.4687926769256592, "logps/rejected": -3.2811930179595947, "loss": 1.4918, "nll_loss": 1.478349208831787, "rewards/accuracies": 1.0, "rewards/chosen": -0.14687927067279816, "rewards/margins": 0.18124006688594818, "rewards/rejected": -0.32811933755874634, "step": 1090 }, { "epoch": 1.7186761229314422, "grad_norm": 0.24320213496685028, "learning_rate": 2.3981467428962364e-07, "log_odds_chosen": 2.3015081882476807, "log_odds_ratio": -0.14784200489521027, "logits/chosen": -0.6028955578804016, "logits/rejected": -1.5697957277297974, "logps/chosen": -1.3827611207962036, "logps/rejected": -3.4461801052093506, "loss": 1.4064, "nll_loss": 1.391618251800537, "rewards/accuracies": 1.0, "rewards/chosen": -0.1382761001586914, "rewards/margins": 0.2063419222831726, "rewards/rejected": -0.344618022441864, "step": 1091 }, { "epoch": 1.7202521670606776, "grad_norm": 0.2560943067073822, "learning_rate": 2.372112923776315e-07, "log_odds_chosen": 2.066913604736328, "log_odds_ratio": -0.1363620012998581, "logits/chosen": -0.6002408266067505, "logits/rejected": -1.4200880527496338, "logps/chosen": -1.4744027853012085, "logps/rejected": -3.320857286453247, "loss": 1.4934, "nll_loss": 1.479757308959961, "rewards/accuracies": 1.0, "rewards/chosen": -0.14744028449058533, "rewards/margins": 0.18464544415473938, "rewards/rejected": -0.3320857286453247, "step": 1092 }, { "epoch": 1.7218282111899135, "grad_norm": 0.24706479907035828, "learning_rate": 2.346212274923538e-07, "log_odds_chosen": 1.7718126773834229, "log_odds_ratio": -0.18659138679504395, "logits/chosen": -0.7044482827186584, "logits/rejected": -1.3833290338516235, "logps/chosen": -1.3924442529678345, "logps/rejected": -2.944779396057129, "loss": 1.4259, "nll_loss": 1.4071950912475586, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392444223165512, "rewards/margins": 0.1552334874868393, "rewards/rejected": -0.2944779098033905, "step": 1093 }, { "epoch": 1.7234042553191489, "grad_norm": 0.25667908787727356, "learning_rate": 2.3204449920049372e-07, "log_odds_chosen": 1.9647536277770996, "log_odds_ratio": -0.15607748925685883, "logits/chosen": -0.6860688924789429, "logits/rejected": -1.5573704242706299, "logps/chosen": -1.5424020290374756, "logps/rejected": -3.3073556423187256, "loss": 1.5401, "nll_loss": 1.5245261192321777, "rewards/accuracies": 1.0, "rewards/chosen": -0.1542401909828186, "rewards/margins": 0.17649537324905396, "rewards/rejected": -0.33073556423187256, "step": 1094 }, { "epoch": 1.7249802994483847, "grad_norm": 0.2486557811498642, "learning_rate": 2.2948112696800283e-07, "log_odds_chosen": 2.027191400527954, "log_odds_ratio": -0.13047511875629425, "logits/chosen": -0.6170079708099365, "logits/rejected": -1.625444769859314, "logps/chosen": -1.4990754127502441, "logps/rejected": -3.3113362789154053, "loss": 1.5082, "nll_loss": 1.495174527168274, "rewards/accuracies": 1.0, "rewards/chosen": -0.14990752935409546, "rewards/margins": 0.18122610449790955, "rewards/rejected": -0.331133633852005, "step": 1095 }, { "epoch": 1.7265563435776201, "grad_norm": 0.2441863864660263, "learning_rate": 2.2693113015993438e-07, "log_odds_chosen": 1.7822884321212769, "log_odds_ratio": -0.1852095127105713, "logits/chosen": -0.6803075671195984, "logits/rejected": -1.367129921913147, "logps/chosen": -1.5059711933135986, "logps/rejected": -3.092799425125122, "loss": 1.5194, "nll_loss": 1.5008918046951294, "rewards/accuracies": 1.0, "rewards/chosen": -0.15059712529182434, "rewards/margins": 0.15868283808231354, "rewards/rejected": -0.3092799484729767, "step": 1096 }, { "epoch": 1.728132387706856, "grad_norm": 0.31022536754608154, "learning_rate": 2.2439452804029524e-07, "log_odds_chosen": 2.1740808486938477, "log_odds_ratio": -0.12739938497543335, "logits/chosen": -0.5243293046951294, "logits/rejected": -1.513900876045227, "logps/chosen": -1.484216332435608, "logps/rejected": -3.43829345703125, "loss": 1.4905, "nll_loss": 1.4777462482452393, "rewards/accuracies": 1.0, "rewards/chosen": -0.14842164516448975, "rewards/margins": 0.1954077035188675, "rewards/rejected": -0.34382933378219604, "step": 1097 }, { "epoch": 1.7297084318360914, "grad_norm": 0.24572019279003143, "learning_rate": 2.2187133977190343e-07, "log_odds_chosen": 2.0476155281066895, "log_odds_ratio": -0.14748935401439667, "logits/chosen": -0.6546213030815125, "logits/rejected": -1.412070393562317, "logps/chosen": -1.4199929237365723, "logps/rejected": -3.236576795578003, "loss": 1.427, "nll_loss": 1.4122127294540405, "rewards/accuracies": 1.0, "rewards/chosen": -0.14199930429458618, "rewards/margins": 0.18165840208530426, "rewards/rejected": -0.32365769147872925, "step": 1098 }, { "epoch": 1.731284475965327, "grad_norm": 0.49531906843185425, "learning_rate": 2.193615844162411e-07, "log_odds_chosen": 1.914674997329712, "log_odds_ratio": -0.15190444886684418, "logits/chosen": -0.6591681241989136, "logits/rejected": -1.5725276470184326, "logps/chosen": -1.437995195388794, "logps/rejected": -3.127368927001953, "loss": 1.4496, "nll_loss": 1.4343852996826172, "rewards/accuracies": 1.0, "rewards/chosen": -0.1437995433807373, "rewards/margins": 0.16893735527992249, "rewards/rejected": -0.3127369284629822, "step": 1099 }, { "epoch": 1.7328605200945626, "grad_norm": 0.23055700957775116, "learning_rate": 2.1686528093331113e-07, "log_odds_chosen": 2.4284610748291016, "log_odds_ratio": -0.10337803512811661, "logits/chosen": -0.7616891860961914, "logits/rejected": -1.621726155281067, "logps/chosen": -1.349797248840332, "logps/rejected": -3.510150194168091, "loss": 1.3619, "nll_loss": 1.351601243019104, "rewards/accuracies": 1.0, "rewards/chosen": -0.134979709982872, "rewards/margins": 0.21603529155254364, "rewards/rejected": -0.35101503133773804, "step": 1100 }, { "epoch": 1.7344365642237982, "grad_norm": 0.2571258842945099, "learning_rate": 2.1438244818149465e-07, "log_odds_chosen": 2.228386402130127, "log_odds_ratio": -0.10804924368858337, "logits/chosen": -0.7427704930305481, "logits/rejected": -1.616530179977417, "logps/chosen": -1.4591196775436401, "logps/rejected": -3.4546592235565186, "loss": 1.4684, "nll_loss": 1.4576059579849243, "rewards/accuracies": 1.0, "rewards/chosen": -0.14591196179389954, "rewards/margins": 0.1995539665222168, "rewards/rejected": -0.34546592831611633, "step": 1101 }, { "epoch": 1.7360126083530338, "grad_norm": 0.3605799674987793, "learning_rate": 2.1191310491740633e-07, "log_odds_chosen": 1.5582538843154907, "log_odds_ratio": -0.2031971514225006, "logits/chosen": -0.5986801981925964, "logits/rejected": -1.3423411846160889, "logps/chosen": -1.573513388633728, "logps/rejected": -2.9563851356506348, "loss": 1.5867, "nll_loss": 1.5664149522781372, "rewards/accuracies": 1.0, "rewards/chosen": -0.15735134482383728, "rewards/margins": 0.13828718662261963, "rewards/rejected": -0.2956385314464569, "step": 1102 }, { "epoch": 1.7375886524822695, "grad_norm": 0.4370041787624359, "learning_rate": 2.0945726979575528e-07, "log_odds_chosen": 2.5015506744384766, "log_odds_ratio": -0.10361425578594208, "logits/chosen": -0.6363489627838135, "logits/rejected": -1.6581504344940186, "logps/chosen": -1.4529412984848022, "logps/rejected": -3.7187447547912598, "loss": 1.4636, "nll_loss": 1.4532363414764404, "rewards/accuracies": 1.0, "rewards/chosen": -0.14529412984848022, "rewards/margins": 0.22658033668994904, "rewards/rejected": -0.37187445163726807, "step": 1103 }, { "epoch": 1.739164696611505, "grad_norm": 0.3916145861148834, "learning_rate": 2.0701496136920315e-07, "log_odds_chosen": 2.2169103622436523, "log_odds_ratio": -0.10681455582380295, "logits/chosen": -0.641915500164032, "logits/rejected": -1.5060219764709473, "logps/chosen": -1.4113705158233643, "logps/rejected": -3.3788974285125732, "loss": 1.4206, "nll_loss": 1.4098851680755615, "rewards/accuracies": 1.0, "rewards/chosen": -0.1411370486021042, "rewards/margins": 0.19675271213054657, "rewards/rejected": -0.33788976073265076, "step": 1104 }, { "epoch": 1.7407407407407407, "grad_norm": 0.21819165349006653, "learning_rate": 2.0458619808822352e-07, "log_odds_chosen": 2.1508023738861084, "log_odds_ratio": -0.11963239312171936, "logits/chosen": -0.6447961926460266, "logits/rejected": -1.4860488176345825, "logps/chosen": -1.3627233505249023, "logps/rejected": -3.256545066833496, "loss": 1.3919, "nll_loss": 1.3799759149551392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1362723559141159, "rewards/margins": 0.1893821656703949, "rewards/rejected": -0.3256545066833496, "step": 1105 }, { "epoch": 1.7423167848699763, "grad_norm": 0.4059889018535614, "learning_rate": 2.0217099830096297e-07, "log_odds_chosen": 2.2109732627868652, "log_odds_ratio": -0.10734489560127258, "logits/chosen": -0.66972416639328, "logits/rejected": -1.509680151939392, "logps/chosen": -1.3343968391418457, "logps/rejected": -3.2730214595794678, "loss": 1.3581, "nll_loss": 1.347339153289795, "rewards/accuracies": 1.0, "rewards/chosen": -0.13343967497348785, "rewards/margins": 0.19386246800422668, "rewards/rejected": -0.32730215787887573, "step": 1106 }, { "epoch": 1.743892828999212, "grad_norm": 0.24140317738056183, "learning_rate": 1.9976938025310196e-07, "log_odds_chosen": 2.183379650115967, "log_odds_ratio": -0.12593252956867218, "logits/chosen": -0.6986231207847595, "logits/rejected": -1.5171825885772705, "logps/chosen": -1.4350005388259888, "logps/rejected": -3.383676290512085, "loss": 1.4463, "nll_loss": 1.4337059259414673, "rewards/accuracies": 1.0, "rewards/chosen": -0.14350005984306335, "rewards/margins": 0.1948675662279129, "rewards/rejected": -0.33836761116981506, "step": 1107 }, { "epoch": 1.7454688731284476, "grad_norm": 0.24856986105442047, "learning_rate": 1.9738136208771783e-07, "log_odds_chosen": 2.036256790161133, "log_odds_ratio": -0.12912632524967194, "logits/chosen": -0.6727962493896484, "logits/rejected": -1.660509467124939, "logps/chosen": -1.4424879550933838, "logps/rejected": -3.2480452060699463, "loss": 1.4477, "nll_loss": 1.4348175525665283, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442488133907318, "rewards/margins": 0.18055573105812073, "rewards/rejected": -0.32480451464653015, "step": 1108 }, { "epoch": 1.7470449172576832, "grad_norm": 0.24933208525180817, "learning_rate": 1.950069618451473e-07, "log_odds_chosen": 1.7854642868041992, "log_odds_ratio": -0.16483379900455475, "logits/chosen": -0.640495777130127, "logits/rejected": -1.3607367277145386, "logps/chosen": -1.3499724864959717, "logps/rejected": -2.8924694061279297, "loss": 1.3852, "nll_loss": 1.3687273263931274, "rewards/accuracies": 1.0, "rewards/chosen": -0.13499726355075836, "rewards/margins": 0.15424968302249908, "rewards/rejected": -0.28924694657325745, "step": 1109 }, { "epoch": 1.7486209613869188, "grad_norm": 0.24948106706142426, "learning_rate": 1.9264619746285016e-07, "log_odds_chosen": 1.704642653465271, "log_odds_ratio": -0.18744713068008423, "logits/chosen": -0.7268269658088684, "logits/rejected": -1.3452857732772827, "logps/chosen": -1.4029277563095093, "logps/rejected": -2.8868279457092285, "loss": 1.4236, "nll_loss": 1.404903769493103, "rewards/accuracies": 1.0, "rewards/chosen": -0.14029277861118317, "rewards/margins": 0.1483900249004364, "rewards/rejected": -0.28868281841278076, "step": 1110 }, { "epoch": 1.7501970055161544, "grad_norm": 0.24130779504776, "learning_rate": 1.9029908677527406e-07, "log_odds_chosen": 2.2048871517181396, "log_odds_ratio": -0.1130182147026062, "logits/chosen": -0.6811715960502625, "logits/rejected": -1.479119062423706, "logps/chosen": -1.4163403511047363, "logps/rejected": -3.3763585090637207, "loss": 1.4273, "nll_loss": 1.4159905910491943, "rewards/accuracies": 1.0, "rewards/chosen": -0.1416340470314026, "rewards/margins": 0.1960017830133438, "rewards/rejected": -0.3376358151435852, "step": 1111 }, { "epoch": 1.75177304964539, "grad_norm": 0.2604852616786957, "learning_rate": 1.8796564751371836e-07, "log_odds_chosen": 1.8557606935501099, "log_odds_ratio": -0.17069022357463837, "logits/chosen": -0.5974665880203247, "logits/rejected": -1.3730294704437256, "logps/chosen": -1.4477473497390747, "logps/rejected": -3.091548442840576, "loss": 1.4828, "nll_loss": 1.4656922817230225, "rewards/accuracies": 1.0, "rewards/chosen": -0.14477472007274628, "rewards/margins": 0.16438013315200806, "rewards/rejected": -0.30915483832359314, "step": 1112 }, { "epoch": 1.7533490937746257, "grad_norm": 0.25888845324516296, "learning_rate": 1.8564589730620272e-07, "log_odds_chosen": 1.9610655307769775, "log_odds_ratio": -0.14154444634914398, "logits/chosen": -0.6732085943222046, "logits/rejected": -1.4803707599639893, "logps/chosen": -1.453474998474121, "logps/rejected": -3.1923747062683105, "loss": 1.4816, "nll_loss": 1.4674268960952759, "rewards/accuracies": 1.0, "rewards/chosen": -0.1453474909067154, "rewards/margins": 0.17388996481895447, "rewards/rejected": -0.31923750042915344, "step": 1113 }, { "epoch": 1.7549251379038613, "grad_norm": 0.25359055399894714, "learning_rate": 1.8333985367733206e-07, "log_odds_chosen": 2.259960651397705, "log_odds_ratio": -0.10203836858272552, "logits/chosen": -0.6098958253860474, "logits/rejected": -1.7328476905822754, "logps/chosen": -1.4891208410263062, "logps/rejected": -3.5240650177001953, "loss": 1.4979, "nll_loss": 1.4877079725265503, "rewards/accuracies": 1.0, "rewards/chosen": -0.14891208708286285, "rewards/margins": 0.20349442958831787, "rewards/rejected": -0.3524065315723419, "step": 1114 }, { "epoch": 1.756501182033097, "grad_norm": 0.24423667788505554, "learning_rate": 1.810475340481641e-07, "log_odds_chosen": 1.9932787418365479, "log_odds_ratio": -0.16966797411441803, "logits/chosen": -0.6583192944526672, "logits/rejected": -1.291022539138794, "logps/chosen": -1.3989951610565186, "logps/rejected": -3.1615238189697266, "loss": 1.4111, "nll_loss": 1.3940962553024292, "rewards/accuracies": 1.0, "rewards/chosen": -0.13989950716495514, "rewards/margins": 0.17625288665294647, "rewards/rejected": -0.3161523938179016, "step": 1115 }, { "epoch": 1.7580772261623325, "grad_norm": 0.23357945680618286, "learning_rate": 1.7876895573607896e-07, "log_odds_chosen": 1.8362153768539429, "log_odds_ratio": -0.1529145985841751, "logits/chosen": -0.6491990685462952, "logits/rejected": -1.5472196340560913, "logps/chosen": -1.4456733465194702, "logps/rejected": -3.0598621368408203, "loss": 1.4594, "nll_loss": 1.4441100358963013, "rewards/accuracies": 1.0, "rewards/chosen": -0.1445673555135727, "rewards/margins": 0.1614188849925995, "rewards/rejected": -0.3059862554073334, "step": 1116 }, { "epoch": 1.7596532702915682, "grad_norm": 0.523276686668396, "learning_rate": 1.7650413595464776e-07, "log_odds_chosen": 2.1355326175689697, "log_odds_ratio": -0.12463532388210297, "logits/chosen": -0.7610653638839722, "logits/rejected": -1.6068873405456543, "logps/chosen": -1.4335659742355347, "logps/rejected": -3.3339483737945557, "loss": 1.443, "nll_loss": 1.430572271347046, "rewards/accuracies": 1.0, "rewards/chosen": -0.143356591463089, "rewards/margins": 0.19003824889659882, "rewards/rejected": -0.3333948254585266, "step": 1117 }, { "epoch": 1.7612293144208038, "grad_norm": 0.24096181988716125, "learning_rate": 1.7425309181350167e-07, "log_odds_chosen": 1.8404210805892944, "log_odds_ratio": -0.15650488436222076, "logits/chosen": -0.6339390873908997, "logits/rejected": -1.4825618267059326, "logps/chosen": -1.3963136672973633, "logps/rejected": -3.0029304027557373, "loss": 1.4159, "nll_loss": 1.400277853012085, "rewards/accuracies": 1.0, "rewards/chosen": -0.13963137567043304, "rewards/margins": 0.16066166758537292, "rewards/rejected": -0.30029305815696716, "step": 1118 }, { "epoch": 1.7628053585500394, "grad_norm": 0.2304840385913849, "learning_rate": 1.7201584031820415e-07, "log_odds_chosen": 1.8438260555267334, "log_odds_ratio": -0.1598886102437973, "logits/chosen": -0.5556644797325134, "logits/rejected": -1.511696696281433, "logps/chosen": -1.304341435432434, "logps/rejected": -2.880110502243042, "loss": 1.3435, "nll_loss": 1.3275017738342285, "rewards/accuracies": 1.0, "rewards/chosen": -0.13043415546417236, "rewards/margins": 0.15757688879966736, "rewards/rejected": -0.2880110442638397, "step": 1119 }, { "epoch": 1.764381402679275, "grad_norm": 0.34933432936668396, "learning_rate": 1.6979239837012193e-07, "log_odds_chosen": 2.1846253871917725, "log_odds_ratio": -0.12376932054758072, "logits/chosen": -0.5972345471382141, "logits/rejected": -1.4883105754852295, "logps/chosen": -1.3852473497390747, "logps/rejected": -3.3172552585601807, "loss": 1.4017, "nll_loss": 1.3893691301345825, "rewards/accuracies": 1.0, "rewards/chosen": -0.13852475583553314, "rewards/margins": 0.1932007372379303, "rewards/rejected": -0.33172550797462463, "step": 1120 }, { "epoch": 1.7659574468085106, "grad_norm": 0.23291108012199402, "learning_rate": 1.6758278276629655e-07, "log_odds_chosen": 2.1401126384735107, "log_odds_ratio": -0.11547724902629852, "logits/chosen": -0.5188056826591492, "logits/rejected": -1.6312652826309204, "logps/chosen": -1.4703902006149292, "logps/rejected": -3.384904384613037, "loss": 1.4893, "nll_loss": 1.4777474403381348, "rewards/accuracies": 1.0, "rewards/chosen": -0.1470390260219574, "rewards/margins": 0.19145141541957855, "rewards/rejected": -0.33849042654037476, "step": 1121 }, { "epoch": 1.7675334909377463, "grad_norm": 0.2521669268608093, "learning_rate": 1.6538701019931867e-07, "log_odds_chosen": 2.0365591049194336, "log_odds_ratio": -0.13342799246311188, "logits/chosen": -0.7561998963356018, "logits/rejected": -1.6114742755889893, "logps/chosen": -1.3323646783828735, "logps/rejected": -3.104421615600586, "loss": 1.3613, "nll_loss": 1.347915768623352, "rewards/accuracies": 1.0, "rewards/chosen": -0.13323648273944855, "rewards/margins": 0.17720571160316467, "rewards/rejected": -0.310442179441452, "step": 1122 }, { "epoch": 1.7691095350669819, "grad_norm": 0.2512739896774292, "learning_rate": 1.6320509725720123e-07, "log_odds_chosen": 2.169313669204712, "log_odds_ratio": -0.12478064000606537, "logits/chosen": -0.6798474192619324, "logits/rejected": -1.594992995262146, "logps/chosen": -1.4319853782653809, "logps/rejected": -3.367257833480835, "loss": 1.4586, "nll_loss": 1.4461684226989746, "rewards/accuracies": 1.0, "rewards/chosen": -0.14319853484630585, "rewards/margins": 0.1935272216796875, "rewards/rejected": -0.33672577142715454, "step": 1123 }, { "epoch": 1.7706855791962175, "grad_norm": 0.2450278401374817, "learning_rate": 1.6103706042325426e-07, "log_odds_chosen": 2.3881607055664062, "log_odds_ratio": -0.1081864982843399, "logits/chosen": -0.7193140983581543, "logits/rejected": -1.4998961687088013, "logps/chosen": -1.4644615650177002, "logps/rejected": -3.6197800636291504, "loss": 1.4773, "nll_loss": 1.4664843082427979, "rewards/accuracies": 1.0, "rewards/chosen": -0.14644618332386017, "rewards/margins": 0.2155318558216095, "rewards/rejected": -0.36197802424430847, "step": 1124 }, { "epoch": 1.7722616233254531, "grad_norm": 0.2577517628669739, "learning_rate": 1.5888291607596127e-07, "log_odds_chosen": 2.2435426712036133, "log_odds_ratio": -0.11581197381019592, "logits/chosen": -0.7666789293289185, "logits/rejected": -1.547073245048523, "logps/chosen": -1.36039137840271, "logps/rejected": -3.3482577800750732, "loss": 1.3805, "nll_loss": 1.3689347505569458, "rewards/accuracies": 1.0, "rewards/chosen": -0.136039137840271, "rewards/margins": 0.1987866461277008, "rewards/rejected": -0.3348257839679718, "step": 1125 }, { "epoch": 1.7738376674546887, "grad_norm": 0.2725983262062073, "learning_rate": 1.5674268048885275e-07, "log_odds_chosen": 2.10737681388855, "log_odds_ratio": -0.13232018053531647, "logits/chosen": -0.6193439364433289, "logits/rejected": -1.5012483596801758, "logps/chosen": -1.4422088861465454, "logps/rejected": -3.322279453277588, "loss": 1.4716, "nll_loss": 1.4583821296691895, "rewards/accuracies": 1.0, "rewards/chosen": -0.14422088861465454, "rewards/margins": 0.18800705671310425, "rewards/rejected": -0.3322279453277588, "step": 1126 }, { "epoch": 1.7754137115839244, "grad_norm": 0.2608501613140106, "learning_rate": 1.5461636983038685e-07, "log_odds_chosen": 1.9501361846923828, "log_odds_ratio": -0.14728882908821106, "logits/chosen": -0.5869881510734558, "logits/rejected": -1.605644941329956, "logps/chosen": -1.5326752662658691, "logps/rejected": -3.274317741394043, "loss": 1.5212, "nll_loss": 1.5064988136291504, "rewards/accuracies": 1.0, "rewards/chosen": -0.15326754748821259, "rewards/margins": 0.174164280295372, "rewards/rejected": -0.3274317979812622, "step": 1127 }, { "epoch": 1.77698975571316, "grad_norm": 0.35679367184638977, "learning_rate": 1.525040001638249e-07, "log_odds_chosen": 2.0434563159942627, "log_odds_ratio": -0.13504567742347717, "logits/chosen": -0.6465141177177429, "logits/rejected": -1.5215098857879639, "logps/chosen": -1.4231538772583008, "logps/rejected": -3.230894088745117, "loss": 1.4355, "nll_loss": 1.4219523668289185, "rewards/accuracies": 1.0, "rewards/chosen": -0.14231538772583008, "rewards/margins": 0.1807740330696106, "rewards/rejected": -0.3230894207954407, "step": 1128 }, { "epoch": 1.7785657998423956, "grad_norm": 0.4376201331615448, "learning_rate": 1.5040558744711086e-07, "log_odds_chosen": 2.3003416061401367, "log_odds_ratio": -0.10935534536838531, "logits/chosen": -0.6945511698722839, "logits/rejected": -1.6167174577713013, "logps/chosen": -1.4084886312484741, "logps/rejected": -3.462228298187256, "loss": 1.4304, "nll_loss": 1.4194775819778442, "rewards/accuracies": 1.0, "rewards/chosen": -0.14084887504577637, "rewards/margins": 0.20537398755550385, "rewards/rejected": -0.34622281789779663, "step": 1129 }, { "epoch": 1.7801418439716312, "grad_norm": 0.2434682548046112, "learning_rate": 1.4832114753275082e-07, "log_odds_chosen": 1.882206678390503, "log_odds_ratio": -0.16520872712135315, "logits/chosen": -0.7562097907066345, "logits/rejected": -1.43143630027771, "logps/chosen": -1.3748055696487427, "logps/rejected": -3.0245025157928467, "loss": 1.3987, "nll_loss": 1.3821355104446411, "rewards/accuracies": 1.0, "rewards/chosen": -0.13748057186603546, "rewards/margins": 0.16496965289115906, "rewards/rejected": -0.3024502396583557, "step": 1130 }, { "epoch": 1.7817178881008668, "grad_norm": 0.5179225206375122, "learning_rate": 1.4625069616769215e-07, "log_odds_chosen": 2.2699007987976074, "log_odds_ratio": -0.10227684676647186, "logits/chosen": -0.7386022806167603, "logits/rejected": -1.4927175045013428, "logps/chosen": -1.421711802482605, "logps/rejected": -3.4430088996887207, "loss": 1.4346, "nll_loss": 1.4243972301483154, "rewards/accuracies": 1.0, "rewards/chosen": -0.14217117428779602, "rewards/margins": 0.20212972164154053, "rewards/rejected": -0.34430092573165894, "step": 1131 }, { "epoch": 1.7832939322301025, "grad_norm": 0.2690419852733612, "learning_rate": 1.4419424899320643e-07, "log_odds_chosen": 2.1966230869293213, "log_odds_ratio": -0.11302442848682404, "logits/chosen": -0.676225483417511, "logits/rejected": -1.6214932203292847, "logps/chosen": -1.4941112995147705, "logps/rejected": -3.4627432823181152, "loss": 1.4941, "nll_loss": 1.4827520847320557, "rewards/accuracies": 1.0, "rewards/chosen": -0.1494111269712448, "rewards/margins": 0.19686321914196014, "rewards/rejected": -0.34627431631088257, "step": 1132 }, { "epoch": 1.784869976359338, "grad_norm": 0.42933356761932373, "learning_rate": 1.4215182154476945e-07, "log_odds_chosen": 2.348839044570923, "log_odds_ratio": -0.1039995402097702, "logits/chosen": -0.633678138256073, "logits/rejected": -1.6150999069213867, "logps/chosen": -1.4288480281829834, "logps/rejected": -3.5355703830718994, "loss": 1.449, "nll_loss": 1.4386348724365234, "rewards/accuracies": 1.0, "rewards/chosen": -0.14288479089736938, "rewards/margins": 0.21067222952842712, "rewards/rejected": -0.3535570204257965, "step": 1133 }, { "epoch": 1.7864460204885737, "grad_norm": 0.22818252444267273, "learning_rate": 1.401234292519453e-07, "log_odds_chosen": 2.5082528591156006, "log_odds_ratio": -0.09248188138008118, "logits/chosen": -0.6822519302368164, "logits/rejected": -1.7531650066375732, "logps/chosen": -1.389281988143921, "logps/rejected": -3.6389782428741455, "loss": 1.41, "nll_loss": 1.4007184505462646, "rewards/accuracies": 1.0, "rewards/chosen": -0.13892820477485657, "rewards/margins": 0.22496961057186127, "rewards/rejected": -0.36389783024787903, "step": 1134 }, { "epoch": 1.7880220646178093, "grad_norm": 0.24648959934711456, "learning_rate": 1.3810908743826933e-07, "log_odds_chosen": 1.9950299263000488, "log_odds_ratio": -0.13119778037071228, "logits/chosen": -0.7389218211174011, "logits/rejected": -1.4215130805969238, "logps/chosen": -1.4065076112747192, "logps/rejected": -3.1587483882904053, "loss": 1.4293, "nll_loss": 1.4161561727523804, "rewards/accuracies": 1.0, "rewards/chosen": -0.14065076410770416, "rewards/margins": 0.17522408068180084, "rewards/rejected": -0.315874844789505, "step": 1135 }, { "epoch": 1.789598108747045, "grad_norm": 0.24536120891571045, "learning_rate": 1.3610881132113107e-07, "log_odds_chosen": 1.9627854824066162, "log_odds_ratio": -0.1368819773197174, "logits/chosen": -0.7054685354232788, "logits/rejected": -1.345278024673462, "logps/chosen": -1.3292237520217896, "logps/rejected": -3.03375506401062, "loss": 1.3578, "nll_loss": 1.3441579341888428, "rewards/accuracies": 1.0, "rewards/chosen": -0.13292236626148224, "rewards/margins": 0.17045316100120544, "rewards/rejected": -0.3033755123615265, "step": 1136 }, { "epoch": 1.7911741528762806, "grad_norm": 0.23358774185180664, "learning_rate": 1.3412261601166131e-07, "log_odds_chosen": 1.906179428100586, "log_odds_ratio": -0.14227718114852905, "logits/chosen": -0.4925750195980072, "logits/rejected": -1.4600443840026855, "logps/chosen": -1.3168917894363403, "logps/rejected": -2.964538335800171, "loss": 1.3466, "nll_loss": 1.3323993682861328, "rewards/accuracies": 1.0, "rewards/chosen": -0.1316891759634018, "rewards/margins": 0.16476468741893768, "rewards/rejected": -0.2964538335800171, "step": 1137 }, { "epoch": 1.7927501970055162, "grad_norm": 0.24905234575271606, "learning_rate": 1.3215051651461706e-07, "log_odds_chosen": 2.3748044967651367, "log_odds_ratio": -0.10581173747777939, "logits/chosen": -0.616445779800415, "logits/rejected": -1.59285306930542, "logps/chosen": -1.344122052192688, "logps/rejected": -3.4546701908111572, "loss": 1.3652, "nll_loss": 1.3545997142791748, "rewards/accuracies": 1.0, "rewards/chosen": -0.13441219925880432, "rewards/margins": 0.21105480194091797, "rewards/rejected": -0.3454670310020447, "step": 1138 }, { "epoch": 1.7943262411347518, "grad_norm": 0.2738676369190216, "learning_rate": 1.3019252772826872e-07, "log_odds_chosen": 2.2702691555023193, "log_odds_ratio": -0.10017295181751251, "logits/chosen": -0.6799413561820984, "logits/rejected": -1.5725997686386108, "logps/chosen": -1.4216991662979126, "logps/rejected": -3.44551682472229, "loss": 1.4442, "nll_loss": 1.434189796447754, "rewards/accuracies": 1.0, "rewards/chosen": -0.14216992259025574, "rewards/margins": 0.20238174498081207, "rewards/rejected": -0.344551682472229, "step": 1139 }, { "epoch": 1.7959022852639874, "grad_norm": 0.244595006108284, "learning_rate": 1.2824866444428527e-07, "log_odds_chosen": 2.1712191104888916, "log_odds_ratio": -0.15022753179073334, "logits/chosen": -0.6030053496360779, "logits/rejected": -1.5127454996109009, "logps/chosen": -1.5361794233322144, "logps/rejected": -3.5062575340270996, "loss": 1.5338, "nll_loss": 1.5187795162200928, "rewards/accuracies": 1.0, "rewards/chosen": -0.1536179482936859, "rewards/margins": 0.19700783491134644, "rewards/rejected": -0.35062578320503235, "step": 1140 }, { "epoch": 1.797478329393223, "grad_norm": 0.27089551091194153, "learning_rate": 1.2631894134762577e-07, "log_odds_chosen": 2.482318162918091, "log_odds_ratio": -0.09072237461805344, "logits/chosen": -0.6588243842124939, "logits/rejected": -1.6727843284606934, "logps/chosen": -1.4437631368637085, "logps/rejected": -3.684215784072876, "loss": 1.4434, "nll_loss": 1.4343516826629639, "rewards/accuracies": 1.0, "rewards/chosen": -0.14437630772590637, "rewards/margins": 0.2240452766418457, "rewards/rejected": -0.36842161417007446, "step": 1141 }, { "epoch": 1.7990543735224587, "grad_norm": 0.22666656970977783, "learning_rate": 1.2440337301642668e-07, "log_odds_chosen": 2.369511604309082, "log_odds_ratio": -0.10529670864343643, "logits/chosen": -0.709622859954834, "logits/rejected": -1.5612415075302124, "logps/chosen": -1.3527034521102905, "logps/rejected": -3.459676742553711, "loss": 1.3912, "nll_loss": 1.3807017803192139, "rewards/accuracies": 1.0, "rewards/chosen": -0.13527034223079681, "rewards/margins": 0.21069732308387756, "rewards/rejected": -0.34596768021583557, "step": 1142 }, { "epoch": 1.8006304176516943, "grad_norm": 0.2159799337387085, "learning_rate": 1.2250197392189176e-07, "log_odds_chosen": 2.4189276695251465, "log_odds_ratio": -0.11405191570520401, "logits/chosen": -0.5731110572814941, "logits/rejected": -1.623842716217041, "logps/chosen": -1.3452143669128418, "logps/rejected": -3.496781349182129, "loss": 1.3821, "nll_loss": 1.3706560134887695, "rewards/accuracies": 1.0, "rewards/chosen": -0.1345214545726776, "rewards/margins": 0.2151567041873932, "rewards/rejected": -0.3496781289577484, "step": 1143 }, { "epoch": 1.80220646178093, "grad_norm": 0.6267852187156677, "learning_rate": 1.2061475842818335e-07, "log_odds_chosen": 2.2231574058532715, "log_odds_ratio": -0.11455876380205154, "logits/chosen": -0.6324924826622009, "logits/rejected": -1.3046411275863647, "logps/chosen": -1.3756605386734009, "logps/rejected": -3.3419313430786133, "loss": 1.389, "nll_loss": 1.3775625228881836, "rewards/accuracies": 1.0, "rewards/chosen": -0.13756605982780457, "rewards/margins": 0.19662708044052124, "rewards/rejected": -0.3341931104660034, "step": 1144 }, { "epoch": 1.8037825059101655, "grad_norm": 0.2445874959230423, "learning_rate": 1.1874174079231236e-07, "log_odds_chosen": 2.343193769454956, "log_odds_ratio": -0.1098344698548317, "logits/chosen": -0.6977952718734741, "logits/rejected": -1.5583124160766602, "logps/chosen": -1.4212026596069336, "logps/rejected": -3.5237021446228027, "loss": 1.4515, "nll_loss": 1.4405288696289062, "rewards/accuracies": 1.0, "rewards/chosen": -0.14212024211883545, "rewards/margins": 0.21024993062019348, "rewards/rejected": -0.3523702025413513, "step": 1145 }, { "epoch": 1.8053585500394012, "grad_norm": 0.6313738226890564, "learning_rate": 1.1688293516403258e-07, "log_odds_chosen": 2.1939072608947754, "log_odds_ratio": -0.1227797418832779, "logits/chosen": -0.5628396272659302, "logits/rejected": -1.4532064199447632, "logps/chosen": -1.3771461248397827, "logps/rejected": -3.317903757095337, "loss": 1.4149, "nll_loss": 1.4026033878326416, "rewards/accuracies": 1.0, "rewards/chosen": -0.13771462440490723, "rewards/margins": 0.19407573342323303, "rewards/rejected": -0.33179035782814026, "step": 1146 }, { "epoch": 1.8069345941686368, "grad_norm": 0.2494928389787674, "learning_rate": 1.1503835558573306e-07, "log_odds_chosen": 2.0587120056152344, "log_odds_ratio": -0.1430104374885559, "logits/chosen": -0.7038282155990601, "logits/rejected": -1.3284424543380737, "logps/chosen": -1.3867857456207275, "logps/rejected": -3.2071471214294434, "loss": 1.4101, "nll_loss": 1.3957760334014893, "rewards/accuracies": 1.0, "rewards/chosen": -0.13867858052253723, "rewards/margins": 0.1820361465215683, "rewards/rejected": -0.32071471214294434, "step": 1147 }, { "epoch": 1.8085106382978724, "grad_norm": 0.26826173067092896, "learning_rate": 1.1320801599233144e-07, "log_odds_chosen": 2.108661651611328, "log_odds_ratio": -0.12091196328401566, "logits/chosen": -0.8008406162261963, "logits/rejected": -1.5808724164962769, "logps/chosen": -1.4034719467163086, "logps/rejected": -3.2703423500061035, "loss": 1.4437, "nll_loss": 1.4315781593322754, "rewards/accuracies": 1.0, "rewards/chosen": -0.1403471976518631, "rewards/margins": 0.18668705224990845, "rewards/rejected": -0.32703423500061035, "step": 1148 }, { "epoch": 1.8100866824271078, "grad_norm": 0.23494303226470947, "learning_rate": 1.1139193021116877e-07, "log_odds_chosen": 2.355726480484009, "log_odds_ratio": -0.10944391787052155, "logits/chosen": -0.6316502094268799, "logits/rejected": -1.466476321220398, "logps/chosen": -1.373643159866333, "logps/rejected": -3.4736504554748535, "loss": 1.4119, "nll_loss": 1.4009490013122559, "rewards/accuracies": 1.0, "rewards/chosen": -0.13736431300640106, "rewards/margins": 0.21000073850154877, "rewards/rejected": -0.34736505150794983, "step": 1149 }, { "epoch": 1.8116627265563436, "grad_norm": 0.3008306622505188, "learning_rate": 1.0959011196190582e-07, "log_odds_chosen": 1.7228707075119019, "log_odds_ratio": -0.1723945587873459, "logits/chosen": -0.6498463749885559, "logits/rejected": -1.2774968147277832, "logps/chosen": -1.4522461891174316, "logps/rejected": -2.9631600379943848, "loss": 1.4714, "nll_loss": 1.4541656970977783, "rewards/accuracies": 1.0, "rewards/chosen": -0.14522461593151093, "rewards/margins": 0.15109138190746307, "rewards/rejected": -0.296315997838974, "step": 1150 }, { "epoch": 1.813238770685579, "grad_norm": 0.24512970447540283, "learning_rate": 1.0780257485641908e-07, "log_odds_chosen": 2.1910598278045654, "log_odds_ratio": -0.11393696069717407, "logits/chosen": -0.6546553373336792, "logits/rejected": -1.5877926349639893, "logps/chosen": -1.4432175159454346, "logps/rejected": -3.4004392623901367, "loss": 1.4583, "nll_loss": 1.446913242340088, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443217694759369, "rewards/margins": 0.19572214782238007, "rewards/rejected": -0.34004390239715576, "step": 1151 }, { "epoch": 1.8148148148148149, "grad_norm": 0.25401076674461365, "learning_rate": 1.0602933239869738e-07, "log_odds_chosen": 2.1370644569396973, "log_odds_ratio": -0.12820219993591309, "logits/chosen": -0.6410478949546814, "logits/rejected": -1.6421220302581787, "logps/chosen": -1.3627318143844604, "logps/rejected": -3.2484068870544434, "loss": 1.3811, "nll_loss": 1.3682374954223633, "rewards/accuracies": 1.0, "rewards/chosen": -0.13627319037914276, "rewards/margins": 0.18856751918792725, "rewards/rejected": -0.3248407244682312, "step": 1152 }, { "epoch": 1.8163908589440503, "grad_norm": 0.2409854382276535, "learning_rate": 1.0427039798474035e-07, "log_odds_chosen": 2.2288715839385986, "log_odds_ratio": -0.10972033441066742, "logits/chosen": -0.6386184692382812, "logits/rejected": -1.607033371925354, "logps/chosen": -1.3847670555114746, "logps/rejected": -3.36104416847229, "loss": 1.402, "nll_loss": 1.3910095691680908, "rewards/accuracies": 1.0, "rewards/chosen": -0.13847669959068298, "rewards/margins": 0.19762775301933289, "rewards/rejected": -0.33610445261001587, "step": 1153 }, { "epoch": 1.8179669030732861, "grad_norm": 0.2570226788520813, "learning_rate": 1.0252578490245811e-07, "log_odds_chosen": 2.2703559398651123, "log_odds_ratio": -0.1110154315829277, "logits/chosen": -0.7696962952613831, "logits/rejected": -1.5770198106765747, "logps/chosen": -1.4609251022338867, "logps/rejected": -3.5004453659057617, "loss": 1.4728, "nll_loss": 1.461649775505066, "rewards/accuracies": 1.0, "rewards/chosen": -0.1460925042629242, "rewards/margins": 0.20395205914974213, "rewards/rejected": -0.3500445783138275, "step": 1154 }, { "epoch": 1.8195429472025215, "grad_norm": 0.3262077867984772, "learning_rate": 1.007955063315682e-07, "log_odds_chosen": 1.8618896007537842, "log_odds_ratio": -0.15386460721492767, "logits/chosen": -0.66713947057724, "logits/rejected": -1.5333601236343384, "logps/chosen": -1.46468186378479, "logps/rejected": -3.1116678714752197, "loss": 1.4764, "nll_loss": 1.4610321521759033, "rewards/accuracies": 1.0, "rewards/chosen": -0.14646819233894348, "rewards/margins": 0.16469863057136536, "rewards/rejected": -0.3111668527126312, "step": 1155 }, { "epoch": 1.8211189913317574, "grad_norm": 0.2542937099933624, "learning_rate": 9.90795753434992e-08, "log_odds_chosen": 2.065402030944824, "log_odds_ratio": -0.13953696191310883, "logits/chosen": -0.5612714886665344, "logits/rejected": -1.5611573457717896, "logps/chosen": -1.4885002374649048, "logps/rejected": -3.3366222381591797, "loss": 1.5144, "nll_loss": 1.5004711151123047, "rewards/accuracies": 1.0, "rewards/chosen": -0.14885000884532928, "rewards/margins": 0.18481215834617615, "rewards/rejected": -0.3336621820926666, "step": 1156 }, { "epoch": 1.8226950354609928, "grad_norm": 0.24414725601673126, "learning_rate": 9.737800490129023e-08, "log_odds_chosen": 2.3346362113952637, "log_odds_ratio": -0.09844735264778137, "logits/chosen": -0.7340440154075623, "logits/rejected": -1.5652546882629395, "logps/chosen": -1.431807518005371, "logps/rejected": -3.5199954509735107, "loss": 1.4371, "nll_loss": 1.4272624254226685, "rewards/accuracies": 1.0, "rewards/chosen": -0.1431807577610016, "rewards/margins": 0.20881877839565277, "rewards/rejected": -0.35199952125549316, "step": 1157 }, { "epoch": 1.8242710795902286, "grad_norm": 0.5417945384979248, "learning_rate": 9.569080785949335e-08, "log_odds_chosen": 2.034602165222168, "log_odds_ratio": -0.1383841186761856, "logits/chosen": -0.6918726563453674, "logits/rejected": -1.5117642879486084, "logps/chosen": -1.3313159942626953, "logps/rejected": -3.110893964767456, "loss": 1.3745, "nll_loss": 1.3606191873550415, "rewards/accuracies": 1.0, "rewards/chosen": -0.13313160836696625, "rewards/margins": 0.17795780301094055, "rewards/rejected": -0.311089426279068, "step": 1158 }, { "epoch": 1.825847123719464, "grad_norm": 0.2598019540309906, "learning_rate": 9.401799696407642e-08, "log_odds_chosen": 1.8350038528442383, "log_odds_ratio": -0.16442470252513885, "logits/chosen": -0.662948727607727, "logits/rejected": -1.5339016914367676, "logps/chosen": -1.4630563259124756, "logps/rejected": -3.077997922897339, "loss": 1.4734, "nll_loss": 1.4569679498672485, "rewards/accuracies": 1.0, "rewards/chosen": -0.146305650472641, "rewards/margins": 0.161494180560112, "rewards/rejected": -0.3077997863292694, "step": 1159 }, { "epoch": 1.8274231678486998, "grad_norm": 0.24015839397907257, "learning_rate": 9.23595848523262e-08, "log_odds_chosen": 1.9413986206054688, "log_odds_ratio": -0.14081013202667236, "logits/chosen": -0.7353992462158203, "logits/rejected": -1.5711203813552856, "logps/chosen": -1.411322832107544, "logps/rejected": -3.116825819015503, "loss": 1.4154, "nll_loss": 1.401334285736084, "rewards/accuracies": 1.0, "rewards/chosen": -0.14113228023052216, "rewards/margins": 0.17055031657218933, "rewards/rejected": -0.3116826117038727, "step": 1160 }, { "epoch": 1.8289992119779352, "grad_norm": 0.23824945092201233, "learning_rate": 9.071558405275426e-08, "log_odds_chosen": 2.109463930130005, "log_odds_ratio": -0.13128668069839478, "logits/chosen": -0.6813396215438843, "logits/rejected": -1.553991675376892, "logps/chosen": -1.428255319595337, "logps/rejected": -3.3068318367004395, "loss": 1.4435, "nll_loss": 1.4304184913635254, "rewards/accuracies": 1.0, "rewards/chosen": -0.14282551407814026, "rewards/margins": 0.18785765767097473, "rewards/rejected": -0.3306832015514374, "step": 1161 }, { "epoch": 1.830575256107171, "grad_norm": 0.23513326048851013, "learning_rate": 8.90860069850019e-08, "log_odds_chosen": 2.4860615730285645, "log_odds_ratio": -0.08801858872175217, "logits/chosen": -0.6555715799331665, "logits/rejected": -1.6251070499420166, "logps/chosen": -1.3625683784484863, "logps/rejected": -3.579991579055786, "loss": 1.3677, "nll_loss": 1.3588677644729614, "rewards/accuracies": 1.0, "rewards/chosen": -0.1362568438053131, "rewards/margins": 0.22174230217933655, "rewards/rejected": -0.35799914598464966, "step": 1162 }, { "epoch": 1.8321513002364065, "grad_norm": 0.23478761315345764, "learning_rate": 8.747086595974518e-08, "log_odds_chosen": 2.253387689590454, "log_odds_ratio": -0.1065862700343132, "logits/chosen": -0.6542295813560486, "logits/rejected": -1.74689781665802, "logps/chosen": -1.5093393325805664, "logps/rejected": -3.5413918495178223, "loss": 1.5051, "nll_loss": 1.4944535493850708, "rewards/accuracies": 1.0, "rewards/chosen": -0.15093393623828888, "rewards/margins": 0.20320525765419006, "rewards/rejected": -0.35413920879364014, "step": 1163 }, { "epoch": 1.8337273443656423, "grad_norm": 0.2529953122138977, "learning_rate": 8.58701731786029e-08, "log_odds_chosen": 2.086918592453003, "log_odds_ratio": -0.12676191329956055, "logits/chosen": -0.6541511416435242, "logits/rejected": -1.4439666271209717, "logps/chosen": -1.4406934976577759, "logps/rejected": -3.2953906059265137, "loss": 1.46, "nll_loss": 1.4472757577896118, "rewards/accuracies": 1.0, "rewards/chosen": -0.1440693438053131, "rewards/margins": 0.18546970188617706, "rewards/rejected": -0.329539030790329, "step": 1164 }, { "epoch": 1.8353033884948777, "grad_norm": 0.2491406947374344, "learning_rate": 8.428394073404477e-08, "log_odds_chosen": 2.1580991744995117, "log_odds_ratio": -0.12193150073289871, "logits/chosen": -0.6819738149642944, "logits/rejected": -1.6447879076004028, "logps/chosen": -1.472852110862732, "logps/rejected": -3.4042153358459473, "loss": 1.4955, "nll_loss": 1.4833046197891235, "rewards/accuracies": 1.0, "rewards/chosen": -0.14728519320487976, "rewards/margins": 0.1931363195180893, "rewards/rejected": -0.34042155742645264, "step": 1165 }, { "epoch": 1.8368794326241136, "grad_norm": 0.23723764717578888, "learning_rate": 8.271218060929919e-08, "log_odds_chosen": 1.9971917867660522, "log_odds_ratio": -0.13452281057834625, "logits/chosen": -0.626416802406311, "logits/rejected": -1.5081605911254883, "logps/chosen": -1.367218255996704, "logps/rejected": -3.11238956451416, "loss": 1.3945, "nll_loss": 1.3810198307037354, "rewards/accuracies": 1.0, "rewards/chosen": -0.13672181963920593, "rewards/margins": 0.17451712489128113, "rewards/rejected": -0.31123894453048706, "step": 1166 }, { "epoch": 1.838455476753349, "grad_norm": 0.2513558268547058, "learning_rate": 8.115490467826291e-08, "log_odds_chosen": 2.4530985355377197, "log_odds_ratio": -0.09103604406118393, "logits/chosen": -0.714648425579071, "logits/rejected": -1.6450350284576416, "logps/chosen": -1.3734160661697388, "logps/rejected": -3.56103515625, "loss": 1.3768, "nll_loss": 1.3676546812057495, "rewards/accuracies": 1.0, "rewards/chosen": -0.13734160363674164, "rewards/margins": 0.21876195073127747, "rewards/rejected": -0.3561035692691803, "step": 1167 }, { "epoch": 1.8400315208825848, "grad_norm": 0.3669838309288025, "learning_rate": 7.961212470541245e-08, "log_odds_chosen": 2.133593797683716, "log_odds_ratio": -0.11845318973064423, "logits/chosen": -0.5416200160980225, "logits/rejected": -1.4701862335205078, "logps/chosen": -1.387611985206604, "logps/rejected": -3.2740254402160645, "loss": 1.4036, "nll_loss": 1.3917574882507324, "rewards/accuracies": 1.0, "rewards/chosen": -0.13876119256019592, "rewards/margins": 0.18864135444164276, "rewards/rejected": -0.3274025619029999, "step": 1168 }, { "epoch": 1.8416075650118202, "grad_norm": 0.25524067878723145, "learning_rate": 7.808385234571302e-08, "log_odds_chosen": 1.7881041765213013, "log_odds_ratio": -0.17589043080806732, "logits/chosen": -0.7094375491142273, "logits/rejected": -1.4388020038604736, "logps/chosen": -1.5025449991226196, "logps/rejected": -3.0905256271362305, "loss": 1.5048, "nll_loss": 1.4872537851333618, "rewards/accuracies": 1.0, "rewards/chosen": -0.150254487991333, "rewards/margins": 0.15879808366298676, "rewards/rejected": -0.30905258655548096, "step": 1169 }, { "epoch": 1.843183609141056, "grad_norm": 0.24344611167907715, "learning_rate": 7.657009914453239e-08, "log_odds_chosen": 2.267740488052368, "log_odds_ratio": -0.10849397629499435, "logits/chosen": -0.632416307926178, "logits/rejected": -1.6099388599395752, "logps/chosen": -1.3430719375610352, "logps/rejected": -3.343228578567505, "loss": 1.3737, "nll_loss": 1.3628438711166382, "rewards/accuracies": 1.0, "rewards/chosen": -0.13430720567703247, "rewards/margins": 0.20001563429832458, "rewards/rejected": -0.33432286977767944, "step": 1170 }, { "epoch": 1.8447596532702915, "grad_norm": 0.24538294970989227, "learning_rate": 7.507087653755318e-08, "log_odds_chosen": 2.7046775817871094, "log_odds_ratio": -0.0802297443151474, "logits/chosen": -0.7009217739105225, "logits/rejected": -1.6930599212646484, "logps/chosen": -1.387384057044983, "logps/rejected": -3.8260409832000732, "loss": 1.4092, "nll_loss": 1.4012088775634766, "rewards/accuracies": 1.0, "rewards/chosen": -0.13873840868473053, "rewards/margins": 0.2438656985759735, "rewards/rejected": -0.38260412216186523, "step": 1171 }, { "epoch": 1.8463356973995273, "grad_norm": 0.24805966019630432, "learning_rate": 7.358619585068582e-08, "log_odds_chosen": 2.0566694736480713, "log_odds_ratio": -0.13199502229690552, "logits/chosen": -0.6529271602630615, "logits/rejected": -1.385631799697876, "logps/chosen": -1.2771646976470947, "logps/rejected": -3.046116828918457, "loss": 1.3361, "nll_loss": 1.3228695392608643, "rewards/accuracies": 1.0, "rewards/chosen": -0.12771648168563843, "rewards/margins": 0.17689520120620728, "rewards/rejected": -0.3046116828918457, "step": 1172 }, { "epoch": 1.8479117415287627, "grad_norm": 0.44653815031051636, "learning_rate": 7.21160682999844e-08, "log_odds_chosen": 1.7622298002243042, "log_odds_ratio": -0.16657209396362305, "logits/chosen": -0.6410238742828369, "logits/rejected": -1.3407227993011475, "logps/chosen": -1.4757630825042725, "logps/rejected": -3.0298616886138916, "loss": 1.4804, "nll_loss": 1.4637924432754517, "rewards/accuracies": 1.0, "rewards/chosen": -0.14757630228996277, "rewards/margins": 0.15540987253189087, "rewards/rejected": -0.30298617482185364, "step": 1173 }, { "epoch": 1.8494877856579985, "grad_norm": 0.25746652483940125, "learning_rate": 7.06605049915594e-08, "log_odds_chosen": 2.0682730674743652, "log_odds_ratio": -0.12645700573921204, "logits/chosen": -0.7054044008255005, "logits/rejected": -1.4034022092819214, "logps/chosen": -1.3940240144729614, "logps/rejected": -3.220393180847168, "loss": 1.4227, "nll_loss": 1.4100606441497803, "rewards/accuracies": 1.0, "rewards/chosen": -0.13940240442752838, "rewards/margins": 0.18263693153858185, "rewards/rejected": -0.32203933596611023, "step": 1174 }, { "epoch": 1.851063829787234, "grad_norm": 0.2371496856212616, "learning_rate": 6.92195169214973e-08, "log_odds_chosen": 2.1464877128601074, "log_odds_ratio": -0.11482679843902588, "logits/chosen": -0.7046447992324829, "logits/rejected": -1.497187852859497, "logps/chosen": -1.3892666101455688, "logps/rejected": -3.284165382385254, "loss": 1.4084, "nll_loss": 1.3968679904937744, "rewards/accuracies": 1.0, "rewards/chosen": -0.1389266699552536, "rewards/margins": 0.18948988616466522, "rewards/rejected": -0.32841652631759644, "step": 1175 }, { "epoch": 1.8526398739164698, "grad_norm": 0.23818323016166687, "learning_rate": 6.779311497577401e-08, "log_odds_chosen": 1.8529016971588135, "log_odds_ratio": -0.16464988887310028, "logits/chosen": -0.7261921167373657, "logits/rejected": -1.4539761543273926, "logps/chosen": -1.4263057708740234, "logps/rejected": -3.050253391265869, "loss": 1.4474, "nll_loss": 1.4308972358703613, "rewards/accuracies": 1.0, "rewards/chosen": -0.14263057708740234, "rewards/margins": 0.16239476203918457, "rewards/rejected": -0.3050253689289093, "step": 1176 }, { "epoch": 1.8542159180457052, "grad_norm": 0.7260890603065491, "learning_rate": 6.638130993017532e-08, "log_odds_chosen": 2.3057737350463867, "log_odds_ratio": -0.09690903127193451, "logits/chosen": -0.7174903750419617, "logits/rejected": -1.5342347621917725, "logps/chosen": -1.4076248407363892, "logps/rejected": -3.4639885425567627, "loss": 1.4216, "nll_loss": 1.4119166135787964, "rewards/accuracies": 1.0, "rewards/chosen": -0.14076247811317444, "rewards/margins": 0.20563636720180511, "rewards/rejected": -0.34639883041381836, "step": 1177 }, { "epoch": 1.855791962174941, "grad_norm": 0.2622769773006439, "learning_rate": 6.49841124502144e-08, "log_odds_chosen": 1.912718415260315, "log_odds_ratio": -0.16173206269741058, "logits/chosen": -0.6873923540115356, "logits/rejected": -1.4290012121200562, "logps/chosen": -1.431050419807434, "logps/rejected": -3.1225016117095947, "loss": 1.4519, "nll_loss": 1.435699701309204, "rewards/accuracies": 1.0, "rewards/chosen": -0.14310503005981445, "rewards/margins": 0.1691451221704483, "rewards/rejected": -0.31225019693374634, "step": 1178 }, { "epoch": 1.8573680063041764, "grad_norm": 0.24211548268795013, "learning_rate": 6.360153309104998e-08, "log_odds_chosen": 2.4104275703430176, "log_odds_ratio": -0.11408613622188568, "logits/chosen": -0.7975573539733887, "logits/rejected": -1.598517656326294, "logps/chosen": -1.4582797288894653, "logps/rejected": -3.6385772228240967, "loss": 1.4745, "nll_loss": 1.4631030559539795, "rewards/accuracies": 1.0, "rewards/chosen": -0.145827978849411, "rewards/margins": 0.21802973747253418, "rewards/rejected": -0.3638577163219452, "step": 1179 }, { "epoch": 1.8589440504334123, "grad_norm": 0.35385000705718994, "learning_rate": 6.223358229740916e-08, "log_odds_chosen": 2.237973213195801, "log_odds_ratio": -0.11903023719787598, "logits/chosen": -0.6644881367683411, "logits/rejected": -1.5292662382125854, "logps/chosen": -1.4748717546463013, "logps/rejected": -3.486985206604004, "loss": 1.4722, "nll_loss": 1.4603151082992554, "rewards/accuracies": 1.0, "rewards/chosen": -0.14748717844486237, "rewards/margins": 0.20121131837368011, "rewards/rejected": -0.3486984968185425, "step": 1180 }, { "epoch": 1.8605200945626477, "grad_norm": 0.9476044178009033, "learning_rate": 6.088027040350674e-08, "log_odds_chosen": 2.0895965099334717, "log_odds_ratio": -0.13650426268577576, "logits/chosen": -0.6684251427650452, "logits/rejected": -1.5993695259094238, "logps/chosen": -1.4665027856826782, "logps/rejected": -3.3343422412872314, "loss": 1.4815, "nll_loss": 1.4678804874420166, "rewards/accuracies": 1.0, "rewards/chosen": -0.1466502696275711, "rewards/margins": 0.18678393959999084, "rewards/rejected": -0.33343422412872314, "step": 1181 }, { "epoch": 1.8620961386918835, "grad_norm": 0.37048766016960144, "learning_rate": 5.9541607632968003e-08, "log_odds_chosen": 2.0390188694000244, "log_odds_ratio": -0.1283208131790161, "logits/chosen": -0.5664397478103638, "logits/rejected": -1.5264338254928589, "logps/chosen": -1.4557548761367798, "logps/rejected": -3.2629106044769287, "loss": 1.46, "nll_loss": 1.447214961051941, "rewards/accuracies": 1.0, "rewards/chosen": -0.14557547867298126, "rewards/margins": 0.18071557581424713, "rewards/rejected": -0.3262910842895508, "step": 1182 }, { "epoch": 1.863672182821119, "grad_norm": 0.2544536292552948, "learning_rate": 5.821760409875054e-08, "log_odds_chosen": 2.022491693496704, "log_odds_ratio": -0.14355377852916718, "logits/chosen": -0.685704231262207, "logits/rejected": -1.508038878440857, "logps/chosen": -1.4372001886367798, "logps/rejected": -3.232409715652466, "loss": 1.4678, "nll_loss": 1.4534475803375244, "rewards/accuracies": 1.0, "rewards/chosen": -0.14372001588344574, "rewards/margins": 0.17952096462249756, "rewards/rejected": -0.3232409656047821, "step": 1183 }, { "epoch": 1.8652482269503547, "grad_norm": 0.25288259983062744, "learning_rate": 5.690826980306851e-08, "log_odds_chosen": 2.189436435699463, "log_odds_ratio": -0.12546050548553467, "logits/chosen": -0.747494101524353, "logits/rejected": -1.452622890472412, "logps/chosen": -1.3829396963119507, "logps/rejected": -3.324129581451416, "loss": 1.4127, "nll_loss": 1.4001619815826416, "rewards/accuracies": 1.0, "rewards/chosen": -0.13829398155212402, "rewards/margins": 0.19411897659301758, "rewards/rejected": -0.3324129581451416, "step": 1184 }, { "epoch": 1.8668242710795901, "grad_norm": 0.2534860372543335, "learning_rate": 5.56136146373174e-08, "log_odds_chosen": 2.2510364055633545, "log_odds_ratio": -0.11698711663484573, "logits/chosen": -0.7637779116630554, "logits/rejected": -1.7619669437408447, "logps/chosen": -1.4710334539413452, "logps/rejected": -3.4935970306396484, "loss": 1.4831, "nll_loss": 1.4713811874389648, "rewards/accuracies": 1.0, "rewards/chosen": -0.14710333943367004, "rewards/margins": 0.20225638151168823, "rewards/rejected": -0.3493597209453583, "step": 1185 }, { "epoch": 1.868400315208826, "grad_norm": 0.2790590226650238, "learning_rate": 5.4333648381998276e-08, "log_odds_chosen": 2.049938678741455, "log_odds_ratio": -0.125311940908432, "logits/chosen": -0.6624439358711243, "logits/rejected": -1.5354634523391724, "logps/chosen": -1.4959936141967773, "logps/rejected": -3.3266313076019287, "loss": 1.5076, "nll_loss": 1.4950931072235107, "rewards/accuracies": 1.0, "rewards/chosen": -0.1495993733406067, "rewards/margins": 0.18306376039981842, "rewards/rejected": -0.3326631188392639, "step": 1186 }, { "epoch": 1.8699763593380614, "grad_norm": 0.2477812021970749, "learning_rate": 5.306838070664521e-08, "log_odds_chosen": 1.7351773977279663, "log_odds_ratio": -0.17425094544887543, "logits/chosen": -0.76722252368927, "logits/rejected": -1.4085005521774292, "logps/chosen": -1.4180433750152588, "logps/rejected": -2.9326539039611816, "loss": 1.4537, "nll_loss": 1.4362313747406006, "rewards/accuracies": 1.0, "rewards/chosen": -0.14180433750152588, "rewards/margins": 0.15146106481552124, "rewards/rejected": -0.2932654023170471, "step": 1187 }, { "epoch": 1.8715524034672972, "grad_norm": 0.23774941265583038, "learning_rate": 5.181782116975109e-08, "log_odds_chosen": 2.207965850830078, "log_odds_ratio": -0.1150437742471695, "logits/chosen": -0.6965830326080322, "logits/rejected": -1.7350342273712158, "logps/chosen": -1.391701579093933, "logps/rejected": -3.3527185916900635, "loss": 1.4061, "nll_loss": 1.3946412801742554, "rewards/accuracies": 1.0, "rewards/chosen": -0.13917015492916107, "rewards/margins": 0.19610172510147095, "rewards/rejected": -0.3352718949317932, "step": 1188 }, { "epoch": 1.8731284475965326, "grad_norm": 0.2269074022769928, "learning_rate": 5.058197921869567e-08, "log_odds_chosen": 1.886786937713623, "log_odds_ratio": -0.14843595027923584, "logits/chosen": -0.5642238855361938, "logits/rejected": -1.5065851211547852, "logps/chosen": -1.3409210443496704, "logps/rejected": -2.977937698364258, "loss": 1.3742, "nll_loss": 1.3593504428863525, "rewards/accuracies": 1.0, "rewards/chosen": -0.13409209251403809, "rewards/margins": 0.16370168328285217, "rewards/rejected": -0.29779374599456787, "step": 1189 }, { "epoch": 1.8747044917257685, "grad_norm": 0.5069538950920105, "learning_rate": 4.9360864189674775e-08, "log_odds_chosen": 2.192622423171997, "log_odds_ratio": -0.12118689715862274, "logits/chosen": -0.6835799217224121, "logits/rejected": -1.5589611530303955, "logps/chosen": -1.4092533588409424, "logps/rejected": -3.357715606689453, "loss": 1.4173, "nll_loss": 1.405143141746521, "rewards/accuracies": 1.0, "rewards/chosen": -0.140925332903862, "rewards/margins": 0.1948462277650833, "rewards/rejected": -0.3357715904712677, "step": 1190 }, { "epoch": 1.8762805358550039, "grad_norm": 0.23494669795036316, "learning_rate": 4.8154485307629226e-08, "log_odds_chosen": 2.3870654106140137, "log_odds_ratio": -0.09889359772205353, "logits/chosen": -0.7394210696220398, "logits/rejected": -1.5734256505966187, "logps/chosen": -1.333500623703003, "logps/rejected": -3.44507098197937, "loss": 1.3725, "nll_loss": 1.3625988960266113, "rewards/accuracies": 1.0, "rewards/chosen": -0.13335007429122925, "rewards/margins": 0.21115702390670776, "rewards/rejected": -0.3445070683956146, "step": 1191 }, { "epoch": 1.8778565799842397, "grad_norm": 0.24402429163455963, "learning_rate": 4.6962851686175306e-08, "log_odds_chosen": 1.7538031339645386, "log_odds_ratio": -0.2052013874053955, "logits/chosen": -0.5513991117477417, "logits/rejected": -1.214594841003418, "logps/chosen": -1.3362634181976318, "logps/rejected": -2.8638017177581787, "loss": 1.375, "nll_loss": 1.3545267581939697, "rewards/accuracies": 1.0, "rewards/chosen": -0.13362634181976318, "rewards/margins": 0.15275387465953827, "rewards/rejected": -0.28638023138046265, "step": 1192 }, { "epoch": 1.8794326241134751, "grad_norm": 0.26863783597946167, "learning_rate": 4.578597232753556e-08, "log_odds_chosen": 2.5201854705810547, "log_odds_ratio": -0.08224759995937347, "logits/chosen": -0.649528980255127, "logits/rejected": -1.5857754945755005, "logps/chosen": -1.45985746383667, "logps/rejected": -3.738684892654419, "loss": 1.4595, "nll_loss": 1.451249361038208, "rewards/accuracies": 1.0, "rewards/chosen": -0.14598575234413147, "rewards/margins": 0.2278827577829361, "rewards/rejected": -0.37386849522590637, "step": 1193 }, { "epoch": 1.881008668242711, "grad_norm": 0.24516943097114563, "learning_rate": 4.462385612247166e-08, "log_odds_chosen": 2.2726352214813232, "log_odds_ratio": -0.10374291986227036, "logits/chosen": -0.6809590458869934, "logits/rejected": -1.7194184064865112, "logps/chosen": -1.5159534215927124, "logps/rejected": -3.5662026405334473, "loss": 1.5131, "nll_loss": 1.502724289894104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1515953540802002, "rewards/margins": 0.20502494275569916, "rewards/rejected": -0.35662028193473816, "step": 1194 }, { "epoch": 1.8825847123719464, "grad_norm": 0.2504276931285858, "learning_rate": 4.34765118502165e-08, "log_odds_chosen": 2.2006754875183105, "log_odds_ratio": -0.11386527121067047, "logits/chosen": -0.637266218662262, "logits/rejected": -1.6743327379226685, "logps/chosen": -1.362856388092041, "logps/rejected": -3.3067688941955566, "loss": 1.3939, "nll_loss": 1.3825478553771973, "rewards/accuracies": 1.0, "rewards/chosen": -0.13628563284873962, "rewards/margins": 0.19439128041267395, "rewards/rejected": -0.3306769132614136, "step": 1195 }, { "epoch": 1.8841607565011822, "grad_norm": 0.22425080835819244, "learning_rate": 4.234394817840803e-08, "log_odds_chosen": 2.1331887245178223, "log_odds_ratio": -0.12817887961864471, "logits/chosen": -0.6771361231803894, "logits/rejected": -1.3767496347427368, "logps/chosen": -1.3792603015899658, "logps/rejected": -3.263613224029541, "loss": 1.4155, "nll_loss": 1.4026436805725098, "rewards/accuracies": 1.0, "rewards/chosen": -0.13792604207992554, "rewards/margins": 0.18843528628349304, "rewards/rejected": -0.3263613283634186, "step": 1196 }, { "epoch": 1.8857368006304176, "grad_norm": 0.27941346168518066, "learning_rate": 4.122617366302395e-08, "log_odds_chosen": 2.075756311416626, "log_odds_ratio": -0.1254720687866211, "logits/chosen": -0.6811285018920898, "logits/rejected": -1.5680382251739502, "logps/chosen": -1.4613127708435059, "logps/rejected": -3.3105244636535645, "loss": 1.4765, "nll_loss": 1.4639277458190918, "rewards/accuracies": 1.0, "rewards/chosen": -0.14613129198551178, "rewards/margins": 0.18492117524147034, "rewards/rejected": -0.3310524523258209, "step": 1197 }, { "epoch": 1.8873128447596532, "grad_norm": 0.2338075339794159, "learning_rate": 4.0123196748316436e-08, "log_odds_chosen": 1.8068225383758545, "log_odds_ratio": -0.16622896492481232, "logits/chosen": -0.6690441966056824, "logits/rejected": -1.5568608045578003, "logps/chosen": -1.3357353210449219, "logps/rejected": -2.898650884628296, "loss": 1.3649, "nll_loss": 1.3482465744018555, "rewards/accuracies": 1.0, "rewards/chosen": -0.1335735321044922, "rewards/margins": 0.15629157423973083, "rewards/rejected": -0.289865106344223, "step": 1198 }, { "epoch": 1.8888888888888888, "grad_norm": 0.24249647557735443, "learning_rate": 3.903502576674933e-08, "log_odds_chosen": 2.113074779510498, "log_odds_ratio": -0.13862456381320953, "logits/chosen": -0.6959307193756104, "logits/rejected": -1.5667226314544678, "logps/chosen": -1.4631407260894775, "logps/rejected": -3.3541290760040283, "loss": 1.4792, "nll_loss": 1.4653596878051758, "rewards/accuracies": 1.0, "rewards/chosen": -0.14631406962871552, "rewards/margins": 0.18909886479377747, "rewards/rejected": -0.3354129195213318, "step": 1199 }, { "epoch": 1.8904649330181245, "grad_norm": 0.2561950385570526, "learning_rate": 3.796166893893504e-08, "log_odds_chosen": 1.8952630758285522, "log_odds_ratio": -0.14859546720981598, "logits/chosen": -0.5913959741592407, "logits/rejected": -1.5494587421417236, "logps/chosen": -1.5318377017974854, "logps/rejected": -3.2236576080322266, "loss": 1.5348, "nll_loss": 1.5199891328811646, "rewards/accuracies": 1.0, "rewards/chosen": -0.15318375825881958, "rewards/margins": 0.16918198764324188, "rewards/rejected": -0.32236576080322266, "step": 1200 }, { "epoch": 1.89204097714736, "grad_norm": 0.24014967679977417, "learning_rate": 3.690313437357151e-08, "log_odds_chosen": 2.002248525619507, "log_odds_ratio": -0.13294467329978943, "logits/chosen": -0.6289924383163452, "logits/rejected": -1.4315810203552246, "logps/chosen": -1.3551324605941772, "logps/rejected": -3.106968879699707, "loss": 1.3833, "nll_loss": 1.3700439929962158, "rewards/accuracies": 1.0, "rewards/chosen": -0.13551326096057892, "rewards/margins": 0.17518360912799835, "rewards/rejected": -0.31069689989089966, "step": 1201 }, { "epoch": 1.8936170212765957, "grad_norm": 0.29212963581085205, "learning_rate": 3.585943006738179e-08, "log_odds_chosen": 1.9980361461639404, "log_odds_ratio": -0.13997800648212433, "logits/chosen": -0.6282858848571777, "logits/rejected": -1.3599438667297363, "logps/chosen": -1.451352834701538, "logps/rejected": -3.225801467895508, "loss": 1.4793, "nll_loss": 1.4653072357177734, "rewards/accuracies": 1.0, "rewards/chosen": -0.1451352834701538, "rewards/margins": 0.17744486033916473, "rewards/rejected": -0.32258015871047974, "step": 1202 }, { "epoch": 1.8951930654058313, "grad_norm": 0.32998043298721313, "learning_rate": 3.483056390505346e-08, "log_odds_chosen": 2.1989166736602783, "log_odds_ratio": -0.11480152606964111, "logits/chosen": -0.7246573567390442, "logits/rejected": -1.5989744663238525, "logps/chosen": -1.4098200798034668, "logps/rejected": -3.3650972843170166, "loss": 1.4177, "nll_loss": 1.406238079071045, "rewards/accuracies": 1.0, "rewards/chosen": -0.1409820169210434, "rewards/margins": 0.19552773237228394, "rewards/rejected": -0.33650973439216614, "step": 1203 }, { "epoch": 1.896769109535067, "grad_norm": 0.4712628424167633, "learning_rate": 3.381654365917863e-08, "log_odds_chosen": 2.5266122817993164, "log_odds_ratio": -0.08523000776767731, "logits/chosen": -0.6924614906311035, "logits/rejected": -1.8721075057983398, "logps/chosen": -1.5226670503616333, "logps/rejected": -3.8240017890930176, "loss": 1.523, "nll_loss": 1.5144481658935547, "rewards/accuracies": 1.0, "rewards/chosen": -0.1522666960954666, "rewards/margins": 0.23013348877429962, "rewards/rejected": -0.38240015506744385, "step": 1204 }, { "epoch": 1.8983451536643026, "grad_norm": 0.24280346930027008, "learning_rate": 3.281737699019627e-08, "log_odds_chosen": 2.0230071544647217, "log_odds_ratio": -0.13023246824741364, "logits/chosen": -0.6101768016815186, "logits/rejected": -1.4206677675247192, "logps/chosen": -1.3426989316940308, "logps/rejected": -3.1112377643585205, "loss": 1.386, "nll_loss": 1.3729867935180664, "rewards/accuracies": 1.0, "rewards/chosen": -0.13426990807056427, "rewards/margins": 0.17685385048389435, "rewards/rejected": -0.3111237585544586, "step": 1205 }, { "epoch": 1.8999211977935382, "grad_norm": 0.2447904795408249, "learning_rate": 3.1833071446333295e-08, "log_odds_chosen": 1.923195719718933, "log_odds_ratio": -0.15211057662963867, "logits/chosen": -0.6550759077072144, "logits/rejected": -1.3420542478561401, "logps/chosen": -1.4393178224563599, "logps/rejected": -3.1400959491729736, "loss": 1.4535, "nll_loss": 1.438315749168396, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439317911863327, "rewards/margins": 0.17007781565189362, "rewards/rejected": -0.31400957703590393, "step": 1206 }, { "epoch": 1.9014972419227738, "grad_norm": 0.24708132445812225, "learning_rate": 3.0863634463548005e-08, "log_odds_chosen": 2.307788372039795, "log_odds_ratio": -0.11089952290058136, "logits/chosen": -0.5829729437828064, "logits/rejected": -1.5681138038635254, "logps/chosen": -1.3673369884490967, "logps/rejected": -3.4152069091796875, "loss": 1.3965, "nll_loss": 1.385420799255371, "rewards/accuracies": 1.0, "rewards/chosen": -0.13673371076583862, "rewards/margins": 0.2047869861125946, "rewards/rejected": -0.3415206968784332, "step": 1207 }, { "epoch": 1.9030732860520094, "grad_norm": 0.24370893836021423, "learning_rate": 2.9909073365473435e-08, "log_odds_chosen": 2.5115935802459717, "log_odds_ratio": -0.0866653248667717, "logits/chosen": -0.6416139006614685, "logits/rejected": -1.647744059562683, "logps/chosen": -1.4151132106781006, "logps/rejected": -3.6741461753845215, "loss": 1.4288, "nll_loss": 1.420121192932129, "rewards/accuracies": 1.0, "rewards/chosen": -0.14151132106781006, "rewards/margins": 0.22590331733226776, "rewards/rejected": -0.3674146234989166, "step": 1208 }, { "epoch": 1.904649330181245, "grad_norm": 0.27717146277427673, "learning_rate": 2.896939536336296e-08, "log_odds_chosen": 1.9501713514328003, "log_odds_ratio": -0.1399572491645813, "logits/chosen": -0.6383427381515503, "logits/rejected": -1.303566336631775, "logps/chosen": -1.4352314472198486, "logps/rejected": -3.1542906761169434, "loss": 1.4525, "nll_loss": 1.438524842262268, "rewards/accuracies": 1.0, "rewards/chosen": -0.143523171544075, "rewards/margins": 0.17190590500831604, "rewards/rejected": -0.31542906165122986, "step": 1209 }, { "epoch": 1.9062253743104807, "grad_norm": 0.3229542076587677, "learning_rate": 2.8044607556035216e-08, "log_odds_chosen": 2.480558395385742, "log_odds_ratio": -0.11464011669158936, "logits/chosen": -0.7718715667724609, "logits/rejected": -1.6302223205566406, "logps/chosen": -1.4541082382202148, "logps/rejected": -3.7059502601623535, "loss": 1.4581, "nll_loss": 1.4466472864151, "rewards/accuracies": 1.0, "rewards/chosen": -0.14541082084178925, "rewards/margins": 0.22518420219421387, "rewards/rejected": -0.3705950081348419, "step": 1210 }, { "epoch": 1.9078014184397163, "grad_norm": 0.23870877921581268, "learning_rate": 2.7134716929820167e-08, "log_odds_chosen": 2.086005449295044, "log_odds_ratio": -0.1227080374956131, "logits/chosen": -0.5562236905097961, "logits/rejected": -1.573972225189209, "logps/chosen": -1.4692164659500122, "logps/rejected": -3.3303918838500977, "loss": 1.4818, "nll_loss": 1.4695733785629272, "rewards/accuracies": 1.0, "rewards/chosen": -0.14692161977291107, "rewards/margins": 0.18611754477024078, "rewards/rejected": -0.33303919434547424, "step": 1211 }, { "epoch": 1.909377462568952, "grad_norm": 0.23754094541072845, "learning_rate": 2.6239730358506905e-08, "log_odds_chosen": 1.9623842239379883, "log_odds_ratio": -0.13883398473262787, "logits/chosen": -0.6283769011497498, "logits/rejected": -1.6670176982879639, "logps/chosen": -1.4184577465057373, "logps/rejected": -3.148740530014038, "loss": 1.4417, "nll_loss": 1.4277933835983276, "rewards/accuracies": 1.0, "rewards/chosen": -0.14184579253196716, "rewards/margins": 0.17302829027175903, "rewards/rejected": -0.3148740828037262, "step": 1212 }, { "epoch": 1.9109535066981875, "grad_norm": 0.5034695863723755, "learning_rate": 2.535965460329148e-08, "log_odds_chosen": 2.591174602508545, "log_odds_ratio": -0.08406220376491547, "logits/chosen": -0.7297650575637817, "logits/rejected": -1.6605829000473022, "logps/chosen": -1.3507049083709717, "logps/rejected": -3.6722910404205322, "loss": 1.3691, "nll_loss": 1.3607325553894043, "rewards/accuracies": 1.0, "rewards/chosen": -0.13507048785686493, "rewards/margins": 0.23215864598751068, "rewards/rejected": -0.3672291040420532, "step": 1213 }, { "epoch": 1.9125295508274232, "grad_norm": 0.35041630268096924, "learning_rate": 2.4494496312726043e-08, "log_odds_chosen": 2.5737247467041016, "log_odds_ratio": -0.07771475613117218, "logits/chosen": -0.5698294639587402, "logits/rejected": -1.8292227983474731, "logps/chosen": -1.3499077558517456, "logps/rejected": -3.649833917617798, "loss": 1.3696, "nll_loss": 1.361798882484436, "rewards/accuracies": 1.0, "rewards/chosen": -0.13499079644680023, "rewards/margins": 0.2299925982952118, "rewards/rejected": -0.36498335003852844, "step": 1214 }, { "epoch": 1.9141055949566588, "grad_norm": 0.22618412971496582, "learning_rate": 2.3644262022668005e-08, "log_odds_chosen": 2.289720058441162, "log_odds_ratio": -0.0999235212802887, "logits/chosen": -0.6697892546653748, "logits/rejected": -1.6467437744140625, "logps/chosen": -1.3670634031295776, "logps/rejected": -3.3944926261901855, "loss": 1.3851, "nll_loss": 1.3751273155212402, "rewards/accuracies": 1.0, "rewards/chosen": -0.13670635223388672, "rewards/margins": 0.20274288952350616, "rewards/rejected": -0.3394492268562317, "step": 1215 }, { "epoch": 1.9156816390858944, "grad_norm": 0.26918596029281616, "learning_rate": 2.280895815623185e-08, "log_odds_chosen": 2.0213756561279297, "log_odds_ratio": -0.14538761973381042, "logits/chosen": -0.6079831123352051, "logits/rejected": -1.3852109909057617, "logps/chosen": -1.413007140159607, "logps/rejected": -3.202913999557495, "loss": 1.4496, "nll_loss": 1.435054898262024, "rewards/accuracies": 1.0, "rewards/chosen": -0.1413007229566574, "rewards/margins": 0.1789906769990921, "rewards/rejected": -0.3202913999557495, "step": 1216 }, { "epoch": 1.91725768321513, "grad_norm": 0.2670883238315582, "learning_rate": 2.1988591023738513e-08, "log_odds_chosen": 1.9611138105392456, "log_odds_ratio": -0.14200064539909363, "logits/chosen": -0.6325291395187378, "logits/rejected": -1.3312360048294067, "logps/chosen": -1.501002311706543, "logps/rejected": -3.2525320053100586, "loss": 1.5165, "nll_loss": 1.5023219585418701, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501002162694931, "rewards/margins": 0.175152987241745, "rewards/rejected": -0.3252532184123993, "step": 1217 }, { "epoch": 1.9188337273443656, "grad_norm": 0.23781776428222656, "learning_rate": 2.1183166822670518e-08, "log_odds_chosen": 2.4302423000335693, "log_odds_ratio": -0.09699646383523941, "logits/chosen": -0.6007567644119263, "logits/rejected": -1.6736525297164917, "logps/chosen": -1.4307944774627686, "logps/rejected": -3.6152303218841553, "loss": 1.4484, "nll_loss": 1.438686728477478, "rewards/accuracies": 1.0, "rewards/chosen": -0.14307942986488342, "rewards/margins": 0.2184436023235321, "rewards/rejected": -0.3615230321884155, "step": 1218 }, { "epoch": 1.9204097714736013, "grad_norm": 0.25127261877059937, "learning_rate": 2.0392691637622696e-08, "log_odds_chosen": 2.0024428367614746, "log_odds_ratio": -0.1323987990617752, "logits/chosen": -0.5868186950683594, "logits/rejected": -1.6262056827545166, "logps/chosen": -1.4351619482040405, "logps/rejected": -3.202341079711914, "loss": 1.466, "nll_loss": 1.4527740478515625, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435161978006363, "rewards/margins": 0.17671793699264526, "rewards/rejected": -0.32023414969444275, "step": 1219 }, { "epoch": 1.9219858156028369, "grad_norm": 0.26230388879776, "learning_rate": 1.961717144025732e-08, "log_odds_chosen": 2.007323741912842, "log_odds_ratio": -0.13296662271022797, "logits/chosen": -0.5941151976585388, "logits/rejected": -1.4452332258224487, "logps/chosen": -1.3886336088180542, "logps/rejected": -3.1494336128234863, "loss": 1.4276, "nll_loss": 1.4143180847167969, "rewards/accuracies": 1.0, "rewards/chosen": -0.13886335492134094, "rewards/margins": 0.17608001828193665, "rewards/rejected": -0.3149433732032776, "step": 1220 }, { "epoch": 1.9235618597320725, "grad_norm": 0.24751979112625122, "learning_rate": 1.8856612089259482e-08, "log_odds_chosen": 2.4617958068847656, "log_odds_ratio": -0.10518522560596466, "logits/chosen": -0.6848769187927246, "logits/rejected": -1.5902752876281738, "logps/chosen": -1.4523845911026, "logps/rejected": -3.6777474880218506, "loss": 1.4711, "nll_loss": 1.4605536460876465, "rewards/accuracies": 1.0, "rewards/chosen": -0.14523845911026, "rewards/margins": 0.22253631055355072, "rewards/rejected": -0.36777472496032715, "step": 1221 }, { "epoch": 1.9251379038613081, "grad_norm": 0.3200394809246063, "learning_rate": 1.8111019330291798e-08, "log_odds_chosen": 2.1075453758239746, "log_odds_ratio": -0.12425115704536438, "logits/chosen": -0.6616092324256897, "logits/rejected": -1.4234426021575928, "logps/chosen": -1.4398384094238281, "logps/rejected": -3.3168535232543945, "loss": 1.4611, "nll_loss": 1.4486485719680786, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439838409423828, "rewards/margins": 0.1877015084028244, "rewards/rejected": -0.33168530464172363, "step": 1222 }, { "epoch": 1.9267139479905437, "grad_norm": 0.22790563106536865, "learning_rate": 1.738039879595088e-08, "log_odds_chosen": 2.506091356277466, "log_odds_ratio": -0.11787018924951553, "logits/chosen": -0.655095636844635, "logits/rejected": -1.5520126819610596, "logps/chosen": -1.3904443979263306, "logps/rejected": -3.649449586868286, "loss": 1.4038, "nll_loss": 1.3920207023620605, "rewards/accuracies": 1.0, "rewards/chosen": -0.13904446363449097, "rewards/margins": 0.22590050101280212, "rewards/rejected": -0.3649449348449707, "step": 1223 }, { "epoch": 1.9282899921197794, "grad_norm": 0.24332349002361298, "learning_rate": 1.6664756005726478e-08, "log_odds_chosen": 1.9983129501342773, "log_odds_ratio": -0.17521749436855316, "logits/chosen": -0.740903377532959, "logits/rejected": -1.5467405319213867, "logps/chosen": -1.4532381296157837, "logps/rejected": -3.2283029556274414, "loss": 1.4635, "nll_loss": 1.4460121393203735, "rewards/accuracies": 1.0, "rewards/chosen": -0.14532381296157837, "rewards/margins": 0.1775064766407013, "rewards/rejected": -0.32283028960227966, "step": 1224 }, { "epoch": 1.929866036249015, "grad_norm": 0.2376096099615097, "learning_rate": 1.596409636595797e-08, "log_odds_chosen": 2.1978161334991455, "log_odds_ratio": -0.11055047065019608, "logits/chosen": -0.6718356609344482, "logits/rejected": -1.4906890392303467, "logps/chosen": -1.3868401050567627, "logps/rejected": -3.325064182281494, "loss": 1.3863, "nll_loss": 1.3752362728118896, "rewards/accuracies": 1.0, "rewards/chosen": -0.1386840045452118, "rewards/margins": 0.19382244348526, "rewards/rejected": -0.3325064480304718, "step": 1225 }, { "epoch": 1.9314420803782506, "grad_norm": 0.26095396280288696, "learning_rate": 1.5278425169794163e-08, "log_odds_chosen": 2.300527572631836, "log_odds_ratio": -0.10959479212760925, "logits/chosen": -0.6601259112358093, "logits/rejected": -1.6594523191452026, "logps/chosen": -1.4308840036392212, "logps/rejected": -3.4921178817749023, "loss": 1.4486, "nll_loss": 1.4376723766326904, "rewards/accuracies": 1.0, "rewards/chosen": -0.14308838546276093, "rewards/margins": 0.20612342655658722, "rewards/rejected": -0.34921181201934814, "step": 1226 }, { "epoch": 1.9330181245074862, "grad_norm": 0.23057760298252106, "learning_rate": 1.4607747597153109e-08, "log_odds_chosen": 2.2451868057250977, "log_odds_ratio": -0.1309502124786377, "logits/chosen": -0.63688063621521, "logits/rejected": -1.5696496963500977, "logps/chosen": -1.3802334070205688, "logps/rejected": -3.3806991577148438, "loss": 1.4005, "nll_loss": 1.387450933456421, "rewards/accuracies": 1.0, "rewards/chosen": -0.13802333176136017, "rewards/margins": 0.2000465989112854, "rewards/rejected": -0.33806994557380676, "step": 1227 }, { "epoch": 1.9345941686367218, "grad_norm": 0.26310327649116516, "learning_rate": 1.3952068714684129e-08, "log_odds_chosen": 2.3147616386413574, "log_odds_ratio": -0.12972179055213928, "logits/chosen": -0.7092827558517456, "logits/rejected": -1.442671298980713, "logps/chosen": -1.4455046653747559, "logps/rejected": -3.529954671859741, "loss": 1.4531, "nll_loss": 1.4401403665542603, "rewards/accuracies": 1.0, "rewards/chosen": -0.14455047249794006, "rewards/margins": 0.2084449976682663, "rewards/rejected": -0.35299545526504517, "step": 1228 }, { "epoch": 1.9361702127659575, "grad_norm": 0.25792396068573, "learning_rate": 1.3311393475727628e-08, "log_odds_chosen": 2.2611234188079834, "log_odds_ratio": -0.11067617684602737, "logits/chosen": -0.6880636215209961, "logits/rejected": -1.679255485534668, "logps/chosen": -1.442643165588379, "logps/rejected": -3.4605536460876465, "loss": 1.4579, "nll_loss": 1.4468597173690796, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442643254995346, "rewards/margins": 0.20179106295108795, "rewards/rejected": -0.34605538845062256, "step": 1229 }, { "epoch": 1.937746256895193, "grad_norm": 0.2279665172100067, "learning_rate": 1.2685726720279344e-08, "log_odds_chosen": 2.0154387950897217, "log_odds_ratio": -0.1300310641527176, "logits/chosen": -0.6852981448173523, "logits/rejected": -1.5457828044891357, "logps/chosen": -1.53213632106781, "logps/rejected": -3.340744972229004, "loss": 1.5445, "nll_loss": 1.5314971208572388, "rewards/accuracies": 1.0, "rewards/chosen": -0.15321363508701324, "rewards/margins": 0.18086089193820953, "rewards/rejected": -0.3340745270252228, "step": 1230 }, { "epoch": 1.9393223010244287, "grad_norm": 0.26356250047683716, "learning_rate": 1.2075073174952378e-08, "log_odds_chosen": 2.287156343460083, "log_odds_ratio": -0.11871811002492905, "logits/chosen": -0.6400120258331299, "logits/rejected": -1.4665995836257935, "logps/chosen": -1.3877084255218506, "logps/rejected": -3.4254071712493896, "loss": 1.4038, "nll_loss": 1.3919708728790283, "rewards/accuracies": 1.0, "rewards/chosen": -0.13877084851264954, "rewards/margins": 0.20376989245414734, "rewards/rejected": -0.3425407409667969, "step": 1231 }, { "epoch": 1.9408983451536643, "grad_norm": 0.26231732964515686, "learning_rate": 1.1479437452942997e-08, "log_odds_chosen": 2.0814905166625977, "log_odds_ratio": -0.12102769315242767, "logits/chosen": -0.5995890498161316, "logits/rejected": -1.59853196144104, "logps/chosen": -1.4064700603485107, "logps/rejected": -3.2474911212921143, "loss": 1.4118, "nll_loss": 1.3996764421463013, "rewards/accuracies": 1.0, "rewards/chosen": -0.1406470090150833, "rewards/margins": 0.1841021031141281, "rewards/rejected": -0.3247491419315338, "step": 1232 }, { "epoch": 1.9424743892829, "grad_norm": 0.32246655225753784, "learning_rate": 1.0898824053994227e-08, "log_odds_chosen": 2.4490556716918945, "log_odds_ratio": -0.10355079919099808, "logits/chosen": -0.6387543678283691, "logits/rejected": -1.6380618810653687, "logps/chosen": -1.492643117904663, "logps/rejected": -3.717590808868408, "loss": 1.4865, "nll_loss": 1.4761688709259033, "rewards/accuracies": 1.0, "rewards/chosen": -0.14926430583000183, "rewards/margins": 0.22249475121498108, "rewards/rejected": -0.3717590868473053, "step": 1233 }, { "epoch": 1.9440504334121356, "grad_norm": 0.6377884149551392, "learning_rate": 1.0333237364362978e-08, "log_odds_chosen": 2.1542282104492188, "log_odds_ratio": -0.14359650015830994, "logits/chosen": -0.6075577735900879, "logits/rejected": -1.4595712423324585, "logps/chosen": -1.4243805408477783, "logps/rejected": -3.346683979034424, "loss": 1.4413, "nll_loss": 1.426926612854004, "rewards/accuracies": 1.0, "rewards/chosen": -0.14243805408477783, "rewards/margins": 0.19223038852214813, "rewards/rejected": -0.33466842770576477, "step": 1234 }, { "epoch": 1.9456264775413712, "grad_norm": 0.26030442118644714, "learning_rate": 9.782681656786973e-09, "log_odds_chosen": 2.0325188636779785, "log_odds_ratio": -0.12405700981616974, "logits/chosen": -0.6555399298667908, "logits/rejected": -1.5242066383361816, "logps/chosen": -1.385606288909912, "logps/rejected": -3.1708059310913086, "loss": 1.4182, "nll_loss": 1.4058310985565186, "rewards/accuracies": 1.0, "rewards/chosen": -0.13856063783168793, "rewards/margins": 0.17851996421813965, "rewards/rejected": -0.3170805871486664, "step": 1235 }, { "epoch": 1.9472025216706068, "grad_norm": 0.29331284761428833, "learning_rate": 9.247161090451206e-09, "log_odds_chosen": 2.0438590049743652, "log_odds_ratio": -0.12507112324237823, "logits/chosen": -0.64220130443573, "logits/rejected": -1.371909737586975, "logps/chosen": -1.5063974857330322, "logps/rejected": -3.3356056213378906, "loss": 1.4943, "nll_loss": 1.4817700386047363, "rewards/accuracies": 1.0, "rewards/chosen": -0.15063975751399994, "rewards/margins": 0.18292082846164703, "rewards/rejected": -0.333560585975647, "step": 1236 }, { "epoch": 1.9487785657998424, "grad_norm": 0.25294172763824463, "learning_rate": 8.726679710957752e-09, "log_odds_chosen": 2.022395610809326, "log_odds_ratio": -0.13524988293647766, "logits/chosen": -0.6590473651885986, "logits/rejected": -1.4711931943893433, "logps/chosen": -1.4393179416656494, "logps/rejected": -3.2323098182678223, "loss": 1.4504, "nll_loss": 1.4368774890899658, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439317911863327, "rewards/margins": 0.17929919064044952, "rewards/rejected": -0.3232309818267822, "step": 1237 }, { "epoch": 1.950354609929078, "grad_norm": 0.2449255883693695, "learning_rate": 8.221241450295124e-09, "log_odds_chosen": 2.2828528881073, "log_odds_ratio": -0.11480219662189484, "logits/chosen": -0.6655214428901672, "logits/rejected": -1.5491619110107422, "logps/chosen": -1.4490928649902344, "logps/rejected": -3.495936155319214, "loss": 1.4511, "nll_loss": 1.4396617412567139, "rewards/accuracies": 1.0, "rewards/chosen": -0.14490929245948792, "rewards/margins": 0.2046843022108078, "rewards/rejected": -0.3495936095714569, "step": 1238 }, { "epoch": 1.9519306540583137, "grad_norm": 0.25627341866493225, "learning_rate": 7.730850126807853e-09, "log_odds_chosen": 2.446561336517334, "log_odds_ratio": -0.08879183232784271, "logits/chosen": -0.6481437683105469, "logits/rejected": -1.6602849960327148, "logps/chosen": -1.3394733667373657, "logps/rejected": -3.510270357131958, "loss": 1.3751, "nll_loss": 1.3662571907043457, "rewards/accuracies": 1.0, "rewards/chosen": -0.13394732773303986, "rewards/margins": 0.21707971394062042, "rewards/rejected": -0.3510270416736603, "step": 1239 }, { "epoch": 1.9535066981875493, "grad_norm": 0.257373571395874, "learning_rate": 7.255509445168062e-09, "log_odds_chosen": 2.061161756515503, "log_odds_ratio": -0.1593131572008133, "logits/chosen": -0.5462805032730103, "logits/rejected": -1.522867202758789, "logps/chosen": -1.375345230102539, "logps/rejected": -3.1991288661956787, "loss": 1.3888, "nll_loss": 1.37291419506073, "rewards/accuracies": 1.0, "rewards/chosen": -0.13753452897071838, "rewards/margins": 0.1823783665895462, "rewards/rejected": -0.3199129104614258, "step": 1240 }, { "epoch": 1.955082742316785, "grad_norm": 0.2276846319437027, "learning_rate": 6.795222996347494e-09, "log_odds_chosen": 2.1780552864074707, "log_odds_ratio": -0.11423398554325104, "logits/chosen": -0.6086721420288086, "logits/rejected": -1.3943264484405518, "logps/chosen": -1.332671880722046, "logps/rejected": -3.24025297164917, "loss": 1.3591, "nll_loss": 1.3477051258087158, "rewards/accuracies": 1.0, "rewards/chosen": -0.13326719403266907, "rewards/margins": 0.1907581239938736, "rewards/rejected": -0.32402530312538147, "step": 1241 }, { "epoch": 1.9566587864460205, "grad_norm": 0.4101671576499939, "learning_rate": 6.349994257590862e-09, "log_odds_chosen": 2.2584280967712402, "log_odds_ratio": -0.12476156651973724, "logits/chosen": -0.7021857500076294, "logits/rejected": -1.5693060159683228, "logps/chosen": -1.5312988758087158, "logps/rejected": -3.5824179649353027, "loss": 1.5362, "nll_loss": 1.5237008333206177, "rewards/accuracies": 1.0, "rewards/chosen": -0.15312987565994263, "rewards/margins": 0.20511192083358765, "rewards/rejected": -0.3582417964935303, "step": 1242 }, { "epoch": 1.9582348305752562, "grad_norm": 0.24424447119235992, "learning_rate": 5.9198265923881e-09, "log_odds_chosen": 2.382755994796753, "log_odds_ratio": -0.10605061799287796, "logits/chosen": -0.6909282207489014, "logits/rejected": -1.7780449390411377, "logps/chosen": -1.4364418983459473, "logps/rejected": -3.5821585655212402, "loss": 1.4466, "nll_loss": 1.4360185861587524, "rewards/accuracies": 1.0, "rewards/chosen": -0.14364419877529144, "rewards/margins": 0.21457163989543915, "rewards/rejected": -0.358215868473053, "step": 1243 }, { "epoch": 1.9598108747044918, "grad_norm": 0.257253497838974, "learning_rate": 5.504723250450593e-09, "log_odds_chosen": 2.0889639854431152, "log_odds_ratio": -0.12769535183906555, "logits/chosen": -0.6693202257156372, "logits/rejected": -1.6274899244308472, "logps/chosen": -1.4366968870162964, "logps/rejected": -3.291912317276001, "loss": 1.4528, "nll_loss": 1.43999445438385, "rewards/accuracies": 1.0, "rewards/chosen": -0.14366969466209412, "rewards/margins": 0.18552155792713165, "rewards/rejected": -0.3291912376880646, "step": 1244 }, { "epoch": 1.9613869188337274, "grad_norm": 0.6021096110343933, "learning_rate": 5.1046873676861005e-09, "log_odds_chosen": 2.220310688018799, "log_odds_ratio": -0.11021006107330322, "logits/chosen": -0.5942336916923523, "logits/rejected": -1.7292745113372803, "logps/chosen": -1.4646830558776855, "logps/rejected": -3.4539220333099365, "loss": 1.4796, "nll_loss": 1.4685307741165161, "rewards/accuracies": 1.0, "rewards/chosen": -0.14646829664707184, "rewards/margins": 0.19892390072345734, "rewards/rejected": -0.3453921973705292, "step": 1245 }, { "epoch": 1.9629629629629628, "grad_norm": 0.5387910604476929, "learning_rate": 4.719721966174317e-09, "log_odds_chosen": 2.3060860633850098, "log_odds_ratio": -0.10647837817668915, "logits/chosen": -0.7000632286071777, "logits/rejected": -1.4415466785430908, "logps/chosen": -1.3973464965820312, "logps/rejected": -3.4535892009735107, "loss": 1.4128, "nll_loss": 1.4021847248077393, "rewards/accuracies": 1.0, "rewards/chosen": -0.1397346407175064, "rewards/margins": 0.2056242823600769, "rewards/rejected": -0.3453589379787445, "step": 1246 }, { "epoch": 1.9645390070921986, "grad_norm": 0.26819875836372375, "learning_rate": 4.349829954145123e-09, "log_odds_chosen": 1.8778691291809082, "log_odds_ratio": -0.16752654314041138, "logits/chosen": -0.7533020973205566, "logits/rejected": -1.2455486059188843, "logps/chosen": -1.3759647607803345, "logps/rejected": -3.0184898376464844, "loss": 1.4171, "nll_loss": 1.4003074169158936, "rewards/accuracies": 1.0, "rewards/chosen": -0.1375964879989624, "rewards/margins": 0.16425247490406036, "rewards/rejected": -0.30184897780418396, "step": 1247 }, { "epoch": 1.966115051221434, "grad_norm": 0.27044862508773804, "learning_rate": 3.995014125956153e-09, "log_odds_chosen": 1.9101828336715698, "log_odds_ratio": -0.151413694024086, "logits/chosen": -0.7194880247116089, "logits/rejected": -1.5909761190414429, "logps/chosen": -1.3965990543365479, "logps/rejected": -3.0711615085601807, "loss": 1.4172, "nll_loss": 1.402054786682129, "rewards/accuracies": 1.0, "rewards/chosen": -0.13965991139411926, "rewards/margins": 0.1674562692642212, "rewards/rejected": -0.30711618065834045, "step": 1248 }, { "epoch": 1.9676910953506699, "grad_norm": 0.2304389923810959, "learning_rate": 3.6552771620712573e-09, "log_odds_chosen": 2.586142063140869, "log_odds_ratio": -0.08293360471725464, "logits/chosen": -0.7207388877868652, "logits/rejected": -1.8254717588424683, "logps/chosen": -1.3900524377822876, "logps/rejected": -3.7082414627075195, "loss": 1.3902, "nll_loss": 1.3819549083709717, "rewards/accuracies": 1.0, "rewards/chosen": -0.13900524377822876, "rewards/margins": 0.23181888461112976, "rewards/rejected": -0.37082409858703613, "step": 1249 }, { "epoch": 1.9692671394799053, "grad_norm": 0.23947615921497345, "learning_rate": 3.3306216290409637e-09, "log_odds_chosen": 2.053736686706543, "log_odds_ratio": -0.12715302407741547, "logits/chosen": -0.6751238703727722, "logits/rejected": -1.479292631149292, "logps/chosen": -1.294533610343933, "logps/rejected": -3.0750367641448975, "loss": 1.337, "nll_loss": 1.324278473854065, "rewards/accuracies": 1.0, "rewards/chosen": -0.1294533610343933, "rewards/margins": 0.17805030941963196, "rewards/rejected": -0.30750367045402527, "step": 1250 }, { "epoch": 1.9708431836091411, "grad_norm": 0.2823444902896881, "learning_rate": 3.021049979482715e-09, "log_odds_chosen": 2.042980432510376, "log_odds_ratio": -0.1330372840166092, "logits/chosen": -0.5546174049377441, "logits/rejected": -1.4248008728027344, "logps/chosen": -1.3938136100769043, "logps/rejected": -3.19840931892395, "loss": 1.4161, "nll_loss": 1.4027931690216064, "rewards/accuracies": 1.0, "rewards/chosen": -0.13938137888908386, "rewards/margins": 0.1804596185684204, "rewards/rejected": -0.3198409676551819, "step": 1251 }, { "epoch": 1.9724192277383765, "grad_norm": 0.24833545088768005, "learning_rate": 2.72656455206266e-09, "log_odds_chosen": 1.8563863039016724, "log_odds_ratio": -0.16137632727622986, "logits/chosen": -0.6011066436767578, "logits/rejected": -1.389678716659546, "logps/chosen": -1.4618130922317505, "logps/rejected": -3.1044974327087402, "loss": 1.4898, "nll_loss": 1.4736762046813965, "rewards/accuracies": 1.0, "rewards/chosen": -0.14618133008480072, "rewards/margins": 0.16426843404769897, "rewards/rejected": -0.3104497492313385, "step": 1252 }, { "epoch": 1.9739952718676124, "grad_norm": 0.2277497500181198, "learning_rate": 2.447167571477449e-09, "log_odds_chosen": 2.254269599914551, "log_odds_ratio": -0.10515553504228592, "logits/chosen": -0.5626559853553772, "logits/rejected": -1.4993703365325928, "logps/chosen": -1.4048823118209839, "logps/rejected": -3.4048352241516113, "loss": 1.4262, "nll_loss": 1.4156763553619385, "rewards/accuracies": 1.0, "rewards/chosen": -0.14048823714256287, "rewards/margins": 0.1999952644109726, "rewards/rejected": -0.34048348665237427, "step": 1253 }, { "epoch": 1.9755713159968478, "grad_norm": 0.2508019506931305, "learning_rate": 2.182861148437798e-09, "log_odds_chosen": 2.104759931564331, "log_odds_ratio": -0.13345009088516235, "logits/chosen": -0.6912946701049805, "logits/rejected": -1.526583194732666, "logps/chosen": -1.4985864162445068, "logps/rejected": -3.3857264518737793, "loss": 1.4997, "nll_loss": 1.4863353967666626, "rewards/accuracies": 1.0, "rewards/chosen": -0.14985865354537964, "rewards/margins": 0.18871398270130157, "rewards/rejected": -0.33857262134552, "step": 1254 }, { "epoch": 1.9771473601260836, "grad_norm": 0.2378934919834137, "learning_rate": 1.933647279652506e-09, "log_odds_chosen": 2.4360783100128174, "log_odds_ratio": -0.1355494111776352, "logits/chosen": -0.6797410249710083, "logits/rejected": -1.5201764106750488, "logps/chosen": -1.3966715335845947, "logps/rejected": -3.5907230377197266, "loss": 1.4375, "nll_loss": 1.4239916801452637, "rewards/accuracies": 1.0, "rewards/chosen": -0.13966715335845947, "rewards/margins": 0.2194051444530487, "rewards/rejected": -0.3590722978115082, "step": 1255 }, { "epoch": 1.978723404255319, "grad_norm": 0.2774008810520172, "learning_rate": 1.6995278478133534e-09, "log_odds_chosen": 1.871781349182129, "log_odds_ratio": -0.15151040256023407, "logits/chosen": -0.5865615606307983, "logits/rejected": -1.3873411417007446, "logps/chosen": -1.4085748195648193, "logps/rejected": -3.0480005741119385, "loss": 1.4562, "nll_loss": 1.440998911857605, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408574879169464, "rewards/margins": 0.16394257545471191, "rewards/rejected": -0.3048000633716583, "step": 1256 }, { "epoch": 1.9802994483845549, "grad_norm": 0.23505550622940063, "learning_rate": 1.4805046215806693e-09, "log_odds_chosen": 2.1263012886047363, "log_odds_ratio": -0.11954326182603836, "logits/chosen": -0.6618563532829285, "logits/rejected": -1.484593152999878, "logps/chosen": -1.3787755966186523, "logps/rejected": -3.2545793056488037, "loss": 1.4004, "nll_loss": 1.388494610786438, "rewards/accuracies": 1.0, "rewards/chosen": -0.13787758350372314, "rewards/margins": 0.18758037686347961, "rewards/rejected": -0.32545796036720276, "step": 1257 }, { "epoch": 1.9818754925137902, "grad_norm": 0.22150032222270966, "learning_rate": 1.2765792555704536e-09, "log_odds_chosen": 2.062730550765991, "log_odds_ratio": -0.1269073188304901, "logits/chosen": -0.6945916414260864, "logits/rejected": -1.6374279260635376, "logps/chosen": -1.4374325275421143, "logps/rejected": -3.2649648189544678, "loss": 1.4464, "nll_loss": 1.433678388595581, "rewards/accuracies": 1.0, "rewards/chosen": -0.14374326169490814, "rewards/margins": 0.18275325000286102, "rewards/rejected": -0.32649654150009155, "step": 1258 }, { "epoch": 1.983451536643026, "grad_norm": 0.22993101179599762, "learning_rate": 1.0877532903414977e-09, "log_odds_chosen": 2.382324457168579, "log_odds_ratio": -0.09669384360313416, "logits/chosen": -0.6250649094581604, "logits/rejected": -1.4983128309249878, "logps/chosen": -1.3412678241729736, "logps/rejected": -3.4556403160095215, "loss": 1.3701, "nll_loss": 1.3604559898376465, "rewards/accuracies": 1.0, "rewards/chosen": -0.13412679731845856, "rewards/margins": 0.21143727004528046, "rewards/rejected": -0.3455640971660614, "step": 1259 }, { "epoch": 1.9850275807722615, "grad_norm": 0.2812540829181671, "learning_rate": 9.140281523836168e-10, "log_odds_chosen": 2.2740962505340576, "log_odds_ratio": -0.10593446344137192, "logits/chosen": -0.7241764664649963, "logits/rejected": -1.4203543663024902, "logps/chosen": -1.4348554611206055, "logps/rejected": -3.464871883392334, "loss": 1.4553, "nll_loss": 1.4447091817855835, "rewards/accuracies": 1.0, "rewards/chosen": -0.14348556101322174, "rewards/margins": 0.20300163328647614, "rewards/rejected": -0.3464871644973755, "step": 1260 }, { "epoch": 1.9866036249014973, "grad_norm": 0.2406286597251892, "learning_rate": 7.554051541074357e-10, "log_odds_chosen": 2.264719009399414, "log_odds_ratio": -0.11313973367214203, "logits/chosen": -0.647687554359436, "logits/rejected": -1.6995664834976196, "logps/chosen": -1.3738747835159302, "logps/rejected": -3.3787131309509277, "loss": 1.3986, "nll_loss": 1.3872549533843994, "rewards/accuracies": 1.0, "rewards/chosen": -0.1373874843120575, "rewards/margins": 0.20048385858535767, "rewards/rejected": -0.33787134289741516, "step": 1261 }, { "epoch": 1.9881796690307327, "grad_norm": 0.25356847047805786, "learning_rate": 6.118854938337304e-10, "log_odds_chosen": 2.0269861221313477, "log_odds_ratio": -0.14002223312854767, "logits/chosen": -0.6952039003372192, "logits/rejected": -1.4595836400985718, "logps/chosen": -1.4062353372573853, "logps/rejected": -3.2002649307250977, "loss": 1.4314, "nll_loss": 1.417415976524353, "rewards/accuracies": 1.0, "rewards/chosen": -0.1406235247850418, "rewards/margins": 0.17940297722816467, "rewards/rejected": -0.3200264871120453, "step": 1262 }, { "epoch": 1.9897557131599686, "grad_norm": 0.23797620832920074, "learning_rate": 4.834702557852121e-10, "log_odds_chosen": 2.2497360706329346, "log_odds_ratio": -0.11052749305963516, "logits/chosen": -0.683299720287323, "logits/rejected": -1.6744623184204102, "logps/chosen": -1.4912843704223633, "logps/rejected": -3.5169126987457275, "loss": 1.4966, "nll_loss": 1.4855808019638062, "rewards/accuracies": 1.0, "rewards/chosen": -0.14912845194339752, "rewards/margins": 0.2025628387928009, "rewards/rejected": -0.3516913056373596, "step": 1263 }, { "epoch": 1.991331757289204, "grad_norm": 0.2590091824531555, "learning_rate": 3.7016041007742473e-10, "log_odds_chosen": 2.3769733905792236, "log_odds_ratio": -0.09474535286426544, "logits/chosen": -0.6282199621200562, "logits/rejected": -1.6947509050369263, "logps/chosen": -1.4270833730697632, "logps/rejected": -3.5565853118896484, "loss": 1.4451, "nll_loss": 1.4355968236923218, "rewards/accuracies": 1.0, "rewards/chosen": -0.14270833134651184, "rewards/margins": 0.2129502296447754, "rewards/rejected": -0.35565853118896484, "step": 1264 }, { "epoch": 1.9929078014184398, "grad_norm": 0.26440393924713135, "learning_rate": 2.71956812712304e-10, "log_odds_chosen": 2.5334551334381104, "log_odds_ratio": -0.08019878715276718, "logits/chosen": -0.6907272338867188, "logits/rejected": -1.8771357536315918, "logps/chosen": -1.439613699913025, "logps/rejected": -3.7236075401306152, "loss": 1.4646, "nll_loss": 1.456545114517212, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439613699913025, "rewards/margins": 0.2283993810415268, "rewards/rejected": -0.3723607361316681, "step": 1265 }, { "epoch": 1.9944838455476752, "grad_norm": 0.43225669860839844, "learning_rate": 1.8886020557107308e-10, "log_odds_chosen": 2.0030245780944824, "log_odds_ratio": -0.1366153359413147, "logits/chosen": -0.6199713349342346, "logits/rejected": -1.647199273109436, "logps/chosen": -1.4355506896972656, "logps/rejected": -3.206270456314087, "loss": 1.4606, "nll_loss": 1.4469619989395142, "rewards/accuracies": 1.0, "rewards/chosen": -0.14355507493019104, "rewards/margins": 0.1770719587802887, "rewards/rejected": -0.32062703371047974, "step": 1266 }, { "epoch": 1.996059889676911, "grad_norm": 0.2633354663848877, "learning_rate": 1.208712164091352e-10, "log_odds_chosen": 2.133509874343872, "log_odds_ratio": -0.12052997946739197, "logits/chosen": -0.6985264420509338, "logits/rejected": -1.4875023365020752, "logps/chosen": -1.4392731189727783, "logps/rejected": -3.3356130123138428, "loss": 1.452, "nll_loss": 1.439988374710083, "rewards/accuracies": 1.0, "rewards/chosen": -0.14392732083797455, "rewards/margins": 0.18963398039340973, "rewards/rejected": -0.3335613012313843, "step": 1267 }, { "epoch": 1.9976359338061465, "grad_norm": 0.24715520441532135, "learning_rate": 6.799035885030057e-11, "log_odds_chosen": 2.514960765838623, "log_odds_ratio": -0.08915268629789352, "logits/chosen": -0.6880565881729126, "logits/rejected": -1.5162231922149658, "logps/chosen": -1.4475561380386353, "logps/rejected": -3.716582775115967, "loss": 1.4555, "nll_loss": 1.4465482234954834, "rewards/accuracies": 1.0, "rewards/chosen": -0.14475558698177338, "rewards/margins": 0.22690266370773315, "rewards/rejected": -0.3716582655906677, "step": 1268 }, { "epoch": 1.9992119779353823, "grad_norm": 0.2657155990600586, "learning_rate": 3.0218032384565774e-11, "log_odds_chosen": 2.253774642944336, "log_odds_ratio": -0.10505152493715286, "logits/chosen": -0.5824138522148132, "logits/rejected": -1.4273254871368408, "logps/chosen": -1.3337661027908325, "logps/rejected": -3.313795328140259, "loss": 1.3742, "nll_loss": 1.3637198209762573, "rewards/accuracies": 1.0, "rewards/chosen": -0.1333766132593155, "rewards/margins": 0.19800293445587158, "rewards/rejected": -0.3313795328140259, "step": 1269 }, { "epoch": 2.0, "grad_norm": 0.3346489369869232, "learning_rate": 7.554522363895089e-12, "log_odds_chosen": 2.1451759338378906, "log_odds_ratio": -0.12695689499378204, "logits/chosen": -0.6439350247383118, "logits/rejected": -1.7152599096298218, "logps/chosen": -1.3750877380371094, "logps/rejected": -3.2709157466888428, "loss": 1.3954, "nll_loss": 1.3826689720153809, "rewards/accuracies": 1.0, "rewards/chosen": -0.13750876486301422, "rewards/margins": 0.18958280980587006, "rewards/rejected": -0.32709160447120667, "step": 1270 } ], "logging_steps": 1, "max_steps": 1270, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }