{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7880220646178093, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015760441292356187, "grad_norm": 0.711133599281311, "learning_rate": 0.0, "log_odds_chosen": 0.38395223021507263, "log_odds_ratio": -0.5362357497215271, "logits/chosen": -0.6395432353019714, "logits/rejected": -0.1508907973766327, "logps/chosen": -1.973930835723877, "logps/rejected": -2.3123726844787598, "loss": 2.2406, "nll_loss": 2.1870110034942627, "rewards/accuracies": 0.875, "rewards/chosen": -0.19739308953285217, "rewards/margins": 0.03384416550397873, "rewards/rejected": -0.2312372624874115, "step": 1 }, { "epoch": 0.0031520882584712374, "grad_norm": 0.6591355204582214, "learning_rate": 3.1496062992125985e-08, "log_odds_chosen": 0.39076143503189087, "log_odds_ratio": -0.5237792730331421, "logits/chosen": -0.6357220411300659, "logits/rejected": -0.09711451828479767, "logps/chosen": -1.8780229091644287, "logps/rejected": -2.2161149978637695, "loss": 2.1349, "nll_loss": 2.082494020462036, "rewards/accuracies": 1.0, "rewards/chosen": -0.1878022849559784, "rewards/margins": 0.03380918130278587, "rewards/rejected": -0.22161146998405457, "step": 2 }, { "epoch": 0.004728132387706856, "grad_norm": 0.74098140001297, "learning_rate": 6.299212598425197e-08, "log_odds_chosen": 0.5195883512496948, "log_odds_ratio": -0.4742986857891083, "logits/chosen": -0.7715582251548767, "logits/rejected": -0.26645177602767944, "logps/chosen": -1.9943277835845947, "logps/rejected": -2.4578633308410645, "loss": 2.2471, "nll_loss": 2.199704647064209, "rewards/accuracies": 1.0, "rewards/chosen": -0.19943277537822723, "rewards/margins": 0.046353571116924286, "rewards/rejected": -0.24578633904457092, "step": 3 }, { "epoch": 0.006304176516942475, "grad_norm": 0.6662443280220032, "learning_rate": 9.448818897637795e-08, "log_odds_chosen": 0.55958491563797, "log_odds_ratio": -0.46615538001060486, "logits/chosen": -0.5838385820388794, "logits/rejected": -0.1573001742362976, "logps/chosen": -2.007845163345337, "logps/rejected": -2.5115368366241455, "loss": 2.2568, "nll_loss": 2.210216760635376, "rewards/accuracies": 0.875, "rewards/chosen": -0.20078451931476593, "rewards/margins": 0.05036917328834534, "rewards/rejected": -0.2511536777019501, "step": 4 }, { "epoch": 0.007880220646178092, "grad_norm": 0.7787235379219055, "learning_rate": 1.2598425196850394e-07, "log_odds_chosen": 0.705410361289978, "log_odds_ratio": -0.42067474126815796, "logits/chosen": -0.7229734063148499, "logits/rejected": -0.27978262305259705, "logps/chosen": -1.9303616285324097, "logps/rejected": -2.555299758911133, "loss": 2.1734, "nll_loss": 2.1313021183013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.19303615391254425, "rewards/margins": 0.06249381601810455, "rewards/rejected": -0.2555299699306488, "step": 5 }, { "epoch": 0.009456264775413711, "grad_norm": 0.6482278108596802, "learning_rate": 1.5748031496062992e-07, "log_odds_chosen": 0.40409255027770996, "log_odds_ratio": -0.5151315331459045, "logits/chosen": -0.5276838541030884, "logits/rejected": -0.05200649052858353, "logps/chosen": -1.8561073541641235, "logps/rejected": -2.208005905151367, "loss": 2.1449, "nll_loss": 2.0933837890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.18561072647571564, "rewards/margins": 0.03518984466791153, "rewards/rejected": -0.22080056369304657, "step": 6 }, { "epoch": 0.01103230890464933, "grad_norm": 0.6663646697998047, "learning_rate": 1.889763779527559e-07, "log_odds_chosen": 0.546273410320282, "log_odds_ratio": -0.46661460399627686, "logits/chosen": -0.5808312296867371, "logits/rejected": -0.19844059646129608, "logps/chosen": -1.9260658025741577, "logps/rejected": -2.4119839668273926, "loss": 2.1774, "nll_loss": 2.1307828426361084, "rewards/accuracies": 1.0, "rewards/chosen": -0.1926065981388092, "rewards/margins": 0.04859180375933647, "rewards/rejected": -0.24119840562343597, "step": 7 }, { "epoch": 0.01260835303388495, "grad_norm": 0.782015323638916, "learning_rate": 2.2047244094488187e-07, "log_odds_chosen": 0.8110038638114929, "log_odds_ratio": -0.3770079016685486, "logits/chosen": -0.6198790669441223, "logits/rejected": -0.24129487574100494, "logps/chosen": -2.014923572540283, "logps/rejected": -2.744842290878296, "loss": 2.2715, "nll_loss": 2.2338366508483887, "rewards/accuracies": 1.0, "rewards/chosen": -0.20149235427379608, "rewards/margins": 0.07299190014600754, "rewards/rejected": -0.274484246969223, "step": 8 }, { "epoch": 0.014184397163120567, "grad_norm": 0.6285175085067749, "learning_rate": 2.519685039370079e-07, "log_odds_chosen": 0.4325932562351227, "log_odds_ratio": -0.5066515207290649, "logits/chosen": -0.5963254570960999, "logits/rejected": -0.07179142534732819, "logps/chosen": -1.9378471374511719, "logps/rejected": -2.31904673576355, "loss": 2.1726, "nll_loss": 2.1219170093536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.1937847137451172, "rewards/margins": 0.038119956851005554, "rewards/rejected": -0.23190467059612274, "step": 9 }, { "epoch": 0.015760441292356184, "grad_norm": 0.7205284833908081, "learning_rate": 2.8346456692913386e-07, "log_odds_chosen": 0.4166225790977478, "log_odds_ratio": -0.5110099911689758, "logits/chosen": -0.6733875274658203, "logits/rejected": -0.2730729877948761, "logps/chosen": -1.96082603931427, "logps/rejected": -2.3293237686157227, "loss": 2.21, "nll_loss": 2.158905267715454, "rewards/accuracies": 1.0, "rewards/chosen": -0.19608259201049805, "rewards/margins": 0.03684981167316437, "rewards/rejected": -0.2329324334859848, "step": 10 }, { "epoch": 0.017336485421591805, "grad_norm": 0.7679106593132019, "learning_rate": 3.1496062992125984e-07, "log_odds_chosen": 0.48609045147895813, "log_odds_ratio": -0.48330000042915344, "logits/chosen": -0.5869827270507812, "logits/rejected": -0.24110490083694458, "logps/chosen": -2.035675048828125, "logps/rejected": -2.4683971405029297, "loss": 2.2835, "nll_loss": 2.2351536750793457, "rewards/accuracies": 1.0, "rewards/chosen": -0.2035675048828125, "rewards/margins": 0.043272241950035095, "rewards/rejected": -0.2468397617340088, "step": 11 }, { "epoch": 0.018912529550827423, "grad_norm": 0.8531121015548706, "learning_rate": 3.464566929133858e-07, "log_odds_chosen": 0.5449747443199158, "log_odds_ratio": -0.461398184299469, "logits/chosen": -0.7053269147872925, "logits/rejected": -0.12370388209819794, "logps/chosen": -2.076099157333374, "logps/rejected": -2.565109968185425, "loss": 2.3324, "nll_loss": 2.286276340484619, "rewards/accuracies": 1.0, "rewards/chosen": -0.20760990679264069, "rewards/margins": 0.04890113323926926, "rewards/rejected": -0.25651103258132935, "step": 12 }, { "epoch": 0.02048857368006304, "grad_norm": 0.8578523397445679, "learning_rate": 3.779527559055118e-07, "log_odds_chosen": 0.5090824365615845, "log_odds_ratio": -0.47858256101608276, "logits/chosen": -0.7868019342422485, "logits/rejected": -0.07148027420043945, "logps/chosen": -1.9688328504562378, "logps/rejected": -2.4192914962768555, "loss": 2.2258, "nll_loss": 2.1779398918151855, "rewards/accuracies": 1.0, "rewards/chosen": -0.19688329100608826, "rewards/margins": 0.045045845210552216, "rewards/rejected": -0.24192912876605988, "step": 13 }, { "epoch": 0.02206461780929866, "grad_norm": 0.675309956073761, "learning_rate": 4.0944881889763777e-07, "log_odds_chosen": 0.3657957911491394, "log_odds_ratio": -0.5386834144592285, "logits/chosen": -0.6320536136627197, "logits/rejected": -0.3780551254749298, "logps/chosen": -1.9759397506713867, "logps/rejected": -2.300447463989258, "loss": 2.2257, "nll_loss": 2.171861171722412, "rewards/accuracies": 1.0, "rewards/chosen": -0.19759398698806763, "rewards/margins": 0.03245077282190323, "rewards/rejected": -0.23004476726055145, "step": 14 }, { "epoch": 0.02364066193853428, "grad_norm": 0.7733155488967896, "learning_rate": 4.4094488188976375e-07, "log_odds_chosen": 0.41122347116470337, "log_odds_ratio": -0.5111778378486633, "logits/chosen": -0.7448755502700806, "logits/rejected": -0.08966228365898132, "logps/chosen": -1.9629881381988525, "logps/rejected": -2.32719087600708, "loss": 2.2273, "nll_loss": 2.176145076751709, "rewards/accuracies": 1.0, "rewards/chosen": -0.19629880785942078, "rewards/margins": 0.03642028942704201, "rewards/rejected": -0.23271909356117249, "step": 15 }, { "epoch": 0.0252167060677699, "grad_norm": 0.7707588076591492, "learning_rate": 4.7244094488188973e-07, "log_odds_chosen": 0.4248766303062439, "log_odds_ratio": -0.5078074336051941, "logits/chosen": -0.6721003651618958, "logits/rejected": -0.19612114131450653, "logps/chosen": -1.9606484174728394, "logps/rejected": -2.336439609527588, "loss": 2.22, "nll_loss": 2.1692113876342773, "rewards/accuracies": 1.0, "rewards/chosen": -0.19606485962867737, "rewards/margins": 0.03757911175489426, "rewards/rejected": -0.23364394903182983, "step": 16 }, { "epoch": 0.026792750197005517, "grad_norm": 0.6388130784034729, "learning_rate": 5.039370078740158e-07, "log_odds_chosen": 0.6333746314048767, "log_odds_ratio": -0.43810510635375977, "logits/chosen": -0.5735284686088562, "logits/rejected": -0.31118011474609375, "logps/chosen": -1.9192208051681519, "logps/rejected": -2.4831156730651855, "loss": 2.1769, "nll_loss": 2.133085012435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.19192209839820862, "rewards/margins": 0.056389469653367996, "rewards/rejected": -0.24831153452396393, "step": 17 }, { "epoch": 0.028368794326241134, "grad_norm": 0.7463440299034119, "learning_rate": 5.354330708661418e-07, "log_odds_chosen": 0.3546241819858551, "log_odds_ratio": -0.544340193271637, "logits/chosen": -0.6276163458824158, "logits/rejected": 0.05960750952363014, "logps/chosen": -1.9554523229599, "logps/rejected": -2.2666845321655273, "loss": 2.2226, "nll_loss": 2.168125629425049, "rewards/accuracies": 0.875, "rewards/chosen": -0.1955452263355255, "rewards/margins": 0.031123224645853043, "rewards/rejected": -0.22666846215724945, "step": 18 }, { "epoch": 0.029944838455476755, "grad_norm": 0.6933729648590088, "learning_rate": 5.669291338582677e-07, "log_odds_chosen": 0.6329823732376099, "log_odds_ratio": -0.4341467022895813, "logits/chosen": -0.753471851348877, "logits/rejected": -0.27794983983039856, "logps/chosen": -1.8416026830673218, "logps/rejected": -2.3966176509857178, "loss": 2.115, "nll_loss": 2.07161283493042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1841602772474289, "rewards/margins": 0.05550149455666542, "rewards/rejected": -0.23966176807880402, "step": 19 }, { "epoch": 0.03152088258471237, "grad_norm": 0.7374937534332275, "learning_rate": 5.984251968503937e-07, "log_odds_chosen": 0.5668491125106812, "log_odds_ratio": -0.46694958209991455, "logits/chosen": -0.562913179397583, "logits/rejected": -0.25034084916114807, "logps/chosen": -1.995664119720459, "logps/rejected": -2.5026793479919434, "loss": 2.262, "nll_loss": 2.215284824371338, "rewards/accuracies": 0.875, "rewards/chosen": -0.19956642389297485, "rewards/margins": 0.05070152133703232, "rewards/rejected": -0.25026795268058777, "step": 20 }, { "epoch": 0.03309692671394799, "grad_norm": 0.75541090965271, "learning_rate": 6.299212598425197e-07, "log_odds_chosen": 0.35937565565109253, "log_odds_ratio": -0.5362535715103149, "logits/chosen": -0.65813809633255, "logits/rejected": -0.1279284954071045, "logps/chosen": -2.04636812210083, "logps/rejected": -2.3680672645568848, "loss": 2.3058, "nll_loss": 2.2522225379943848, "rewards/accuracies": 0.75, "rewards/chosen": -0.20463679730892181, "rewards/margins": 0.032169945538043976, "rewards/rejected": -0.2368067502975464, "step": 21 }, { "epoch": 0.03467297084318361, "grad_norm": 0.7607430219650269, "learning_rate": 6.614173228346457e-07, "log_odds_chosen": 0.36671358346939087, "log_odds_ratio": -0.5375123620033264, "logits/chosen": -0.7311565279960632, "logits/rejected": -0.08119023591279984, "logps/chosen": -1.9978885650634766, "logps/rejected": -2.3232674598693848, "loss": 2.2612, "nll_loss": 2.207477331161499, "rewards/accuracies": 0.875, "rewards/chosen": -0.1997888684272766, "rewards/margins": 0.03253789618611336, "rewards/rejected": -0.23232676088809967, "step": 22 }, { "epoch": 0.036249014972419225, "grad_norm": 0.7234435081481934, "learning_rate": 6.929133858267716e-07, "log_odds_chosen": 0.39767685532569885, "log_odds_ratio": -0.5170127153396606, "logits/chosen": -0.7005606293678284, "logits/rejected": -0.19814857840538025, "logps/chosen": -2.0349388122558594, "logps/rejected": -2.389338254928589, "loss": 2.2728, "nll_loss": 2.221050977706909, "rewards/accuracies": 1.0, "rewards/chosen": -0.2034938931465149, "rewards/margins": 0.03543993830680847, "rewards/rejected": -0.23893383145332336, "step": 23 }, { "epoch": 0.037825059101654845, "grad_norm": 0.7151917815208435, "learning_rate": 7.244094488188977e-07, "log_odds_chosen": 0.4232009947299957, "log_odds_ratio": -0.5118768215179443, "logits/chosen": -0.42520439624786377, "logits/rejected": -0.13911336660385132, "logps/chosen": -1.974929690361023, "logps/rejected": -2.349339723587036, "loss": 2.2336, "nll_loss": 2.1824424266815186, "rewards/accuracies": 1.0, "rewards/chosen": -0.19749295711517334, "rewards/margins": 0.03744099289178848, "rewards/rejected": -0.23493395745754242, "step": 24 }, { "epoch": 0.039401103230890466, "grad_norm": 0.7771602272987366, "learning_rate": 7.559055118110236e-07, "log_odds_chosen": 0.3602335751056671, "log_odds_ratio": -0.5373751521110535, "logits/chosen": -0.7194356918334961, "logits/rejected": -0.2223992645740509, "logps/chosen": -2.008389711380005, "logps/rejected": -2.332648277282715, "loss": 2.2619, "nll_loss": 2.208117961883545, "rewards/accuracies": 0.875, "rewards/chosen": -0.20083898305892944, "rewards/margins": 0.03242585435509682, "rewards/rejected": -0.23326483368873596, "step": 25 }, { "epoch": 0.04097714736012608, "grad_norm": 0.7429983615875244, "learning_rate": 7.874015748031496e-07, "log_odds_chosen": 0.46550512313842773, "log_odds_ratio": -0.4939187467098236, "logits/chosen": -0.8255457282066345, "logits/rejected": -0.3358853757381439, "logps/chosen": -1.954911708831787, "logps/rejected": -2.3684439659118652, "loss": 2.2146, "nll_loss": 2.1652259826660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.19549117982387543, "rewards/margins": 0.041353195905685425, "rewards/rejected": -0.23684436082839966, "step": 26 }, { "epoch": 0.0425531914893617, "grad_norm": 0.7892085313796997, "learning_rate": 8.188976377952755e-07, "log_odds_chosen": 0.7098885774612427, "log_odds_ratio": -0.41376522183418274, "logits/chosen": -0.6670718789100647, "logits/rejected": -0.18330247700214386, "logps/chosen": -2.0041563510894775, "logps/rejected": -2.642397165298462, "loss": 2.2622, "nll_loss": 2.2208266258239746, "rewards/accuracies": 1.0, "rewards/chosen": -0.20041564106941223, "rewards/margins": 0.06382407993078232, "rewards/rejected": -0.26423972845077515, "step": 27 }, { "epoch": 0.04412923561859732, "grad_norm": 0.7302277684211731, "learning_rate": 8.503937007874016e-07, "log_odds_chosen": 0.4039226770401001, "log_odds_ratio": -0.5200110673904419, "logits/chosen": -0.6893348097801208, "logits/rejected": -0.09423862397670746, "logps/chosen": -1.9387571811676025, "logps/rejected": -2.295835494995117, "loss": 2.2167, "nll_loss": 2.1646535396575928, "rewards/accuracies": 0.875, "rewards/chosen": -0.19387571513652802, "rewards/margins": 0.03570783883333206, "rewards/rejected": -0.22958354651927948, "step": 28 }, { "epoch": 0.045705279747832936, "grad_norm": 0.6707211136817932, "learning_rate": 8.818897637795275e-07, "log_odds_chosen": 0.35679179430007935, "log_odds_ratio": -0.5352319478988647, "logits/chosen": -0.5461763739585876, "logits/rejected": -0.14010290801525116, "logps/chosen": -2.050537109375, "logps/rejected": -2.372122287750244, "loss": 2.3054, "nll_loss": 2.2518763542175293, "rewards/accuracies": 1.0, "rewards/chosen": -0.20505373179912567, "rewards/margins": 0.032158493995666504, "rewards/rejected": -0.23721222579479218, "step": 29 }, { "epoch": 0.04728132387706856, "grad_norm": 0.8279486894607544, "learning_rate": 9.133858267716535e-07, "log_odds_chosen": 0.5706252455711365, "log_odds_ratio": -0.45796477794647217, "logits/chosen": -0.6770057678222656, "logits/rejected": -0.2806178331375122, "logps/chosen": -2.0110249519348145, "logps/rejected": -2.521954298019409, "loss": 2.3011, "nll_loss": 2.2553153038024902, "rewards/accuracies": 0.875, "rewards/chosen": -0.20110251009464264, "rewards/margins": 0.05109292268753052, "rewards/rejected": -0.25219541788101196, "step": 30 }, { "epoch": 0.04885736800630418, "grad_norm": 0.7405450344085693, "learning_rate": 9.448818897637795e-07, "log_odds_chosen": 0.6274739503860474, "log_odds_ratio": -0.4411022663116455, "logits/chosen": -0.6367984414100647, "logits/rejected": 0.004103410989046097, "logps/chosen": -2.0546810626983643, "logps/rejected": -2.6223583221435547, "loss": 2.3002, "nll_loss": 2.2560579776763916, "rewards/accuracies": 1.0, "rewards/chosen": -0.20546811819076538, "rewards/margins": 0.056767746806144714, "rewards/rejected": -0.2622358202934265, "step": 31 }, { "epoch": 0.0504334121355398, "grad_norm": 0.638395369052887, "learning_rate": 9.763779527559055e-07, "log_odds_chosen": 0.6788234114646912, "log_odds_ratio": -0.42549028992652893, "logits/chosen": -0.570993185043335, "logits/rejected": -0.3356379568576813, "logps/chosen": -1.9266096353530884, "logps/rejected": -2.533726692199707, "loss": 2.1793, "nll_loss": 2.136709690093994, "rewards/accuracies": 1.0, "rewards/chosen": -0.19266097247600555, "rewards/margins": 0.060711681842803955, "rewards/rejected": -0.2533726394176483, "step": 32 }, { "epoch": 0.05200945626477541, "grad_norm": 0.6255950331687927, "learning_rate": 1.0078740157480315e-06, "log_odds_chosen": 0.5599108338356018, "log_odds_ratio": -0.4542999565601349, "logits/chosen": -0.5288444757461548, "logits/rejected": -0.19333161413669586, "logps/chosen": -1.9585925340652466, "logps/rejected": -2.4559030532836914, "loss": 2.2007, "nll_loss": 2.155308961868286, "rewards/accuracies": 1.0, "rewards/chosen": -0.19585925340652466, "rewards/margins": 0.04973103851079941, "rewards/rejected": -0.24559029936790466, "step": 33 }, { "epoch": 0.05358550039401103, "grad_norm": 0.6921458840370178, "learning_rate": 1.0393700787401573e-06, "log_odds_chosen": 0.3519511818885803, "log_odds_ratio": -0.5428643226623535, "logits/chosen": -0.536325216293335, "logits/rejected": -0.2403833270072937, "logps/chosen": -1.9013547897338867, "logps/rejected": -2.205166816711426, "loss": 2.1703, "nll_loss": 2.116063117980957, "rewards/accuracies": 1.0, "rewards/chosen": -0.19013547897338867, "rewards/margins": 0.030381204560399055, "rewards/rejected": -0.22051668167114258, "step": 34 }, { "epoch": 0.055161544523246654, "grad_norm": 0.8423165082931519, "learning_rate": 1.0708661417322836e-06, "log_odds_chosen": 0.42774710059165955, "log_odds_ratio": -0.5038249492645264, "logits/chosen": -0.7048341631889343, "logits/rejected": 0.0643918514251709, "logps/chosen": -2.080242872238159, "logps/rejected": -2.46270489692688, "loss": 2.3413, "nll_loss": 2.2909011840820312, "rewards/accuracies": 1.0, "rewards/chosen": -0.2080242931842804, "rewards/margins": 0.03824619948863983, "rewards/rejected": -0.24627049267292023, "step": 35 }, { "epoch": 0.05673758865248227, "grad_norm": 0.7374855875968933, "learning_rate": 1.1023622047244094e-06, "log_odds_chosen": 0.5830976963043213, "log_odds_ratio": -0.44582459330558777, "logits/chosen": -0.8039961457252502, "logits/rejected": -0.06987367570400238, "logps/chosen": -1.92990243434906, "logps/rejected": -2.4467267990112305, "loss": 2.1837, "nll_loss": 2.139101266860962, "rewards/accuracies": 1.0, "rewards/chosen": -0.192990243434906, "rewards/margins": 0.05168245732784271, "rewards/rejected": -0.2446727156639099, "step": 36 }, { "epoch": 0.05831363278171789, "grad_norm": 0.6995700001716614, "learning_rate": 1.1338582677165354e-06, "log_odds_chosen": 0.4410613477230072, "log_odds_ratio": -0.5076866149902344, "logits/chosen": -0.6389314532279968, "logits/rejected": -0.06876173615455627, "logps/chosen": -1.9238637685775757, "logps/rejected": -2.3091113567352295, "loss": 2.1937, "nll_loss": 2.1429271697998047, "rewards/accuracies": 0.875, "rewards/chosen": -0.19238635897636414, "rewards/margins": 0.03852475434541702, "rewards/rejected": -0.23091113567352295, "step": 37 }, { "epoch": 0.05988967691095351, "grad_norm": 0.7543565034866333, "learning_rate": 1.1653543307086612e-06, "log_odds_chosen": 0.3667663633823395, "log_odds_ratio": -0.5343418121337891, "logits/chosen": -0.7570109367370605, "logits/rejected": -0.23115745186805725, "logps/chosen": -1.921524167060852, "logps/rejected": -2.239858865737915, "loss": 2.1977, "nll_loss": 2.1442925930023193, "rewards/accuracies": 0.875, "rewards/chosen": -0.19215241074562073, "rewards/margins": 0.03183349221944809, "rewards/rejected": -0.22398591041564941, "step": 38 }, { "epoch": 0.061465721040189124, "grad_norm": 0.7876814007759094, "learning_rate": 1.1968503937007875e-06, "log_odds_chosen": 0.2030404657125473, "log_odds_ratio": -0.6065970659255981, "logits/chosen": -0.6633272171020508, "logits/rejected": -0.040516383945941925, "logps/chosen": -2.038562774658203, "logps/rejected": -2.220552444458008, "loss": 2.3176, "nll_loss": 2.256978988647461, "rewards/accuracies": 0.75, "rewards/chosen": -0.20385627448558807, "rewards/margins": 0.01819896697998047, "rewards/rejected": -0.22205524146556854, "step": 39 }, { "epoch": 0.06304176516942474, "grad_norm": 0.7621078491210938, "learning_rate": 1.2283464566929133e-06, "log_odds_chosen": 0.37610867619514465, "log_odds_ratio": -0.5284633040428162, "logits/chosen": -0.7413150668144226, "logits/rejected": -0.18302536010742188, "logps/chosen": -1.9204038381576538, "logps/rejected": -2.2485194206237793, "loss": 2.1889, "nll_loss": 2.1360361576080322, "rewards/accuracies": 1.0, "rewards/chosen": -0.19204038381576538, "rewards/margins": 0.03281155228614807, "rewards/rejected": -0.22485193610191345, "step": 40 }, { "epoch": 0.06461780929866036, "grad_norm": 0.6990500688552856, "learning_rate": 1.2598425196850393e-06, "log_odds_chosen": 0.27543094754219055, "log_odds_ratio": -0.5724075436592102, "logits/chosen": -0.6147856712341309, "logits/rejected": -0.09826792776584625, "logps/chosen": -1.9605408906936646, "logps/rejected": -2.205174446105957, "loss": 2.2093, "nll_loss": 2.152068614959717, "rewards/accuracies": 0.75, "rewards/chosen": -0.1960541009902954, "rewards/margins": 0.024463361129164696, "rewards/rejected": -0.22051745653152466, "step": 41 }, { "epoch": 0.06619385342789598, "grad_norm": 0.6998929381370544, "learning_rate": 1.2913385826771652e-06, "log_odds_chosen": 0.3089278042316437, "log_odds_ratio": -0.5570518374443054, "logits/chosen": -0.6978001594543457, "logits/rejected": -0.15154145658016205, "logps/chosen": -1.960097074508667, "logps/rejected": -2.2320406436920166, "loss": 2.2221, "nll_loss": 2.166372537612915, "rewards/accuracies": 0.875, "rewards/chosen": -0.19600971043109894, "rewards/margins": 0.02719433605670929, "rewards/rejected": -0.22320404648780823, "step": 42 }, { "epoch": 0.0677698975571316, "grad_norm": 0.6787785291671753, "learning_rate": 1.3228346456692914e-06, "log_odds_chosen": 0.16384947299957275, "log_odds_ratio": -0.6235592365264893, "logits/chosen": -0.5829145312309265, "logits/rejected": -0.14386498928070068, "logps/chosen": -1.978913426399231, "logps/rejected": -2.123264789581299, "loss": 2.2412, "nll_loss": 2.1788277626037598, "rewards/accuracies": 0.625, "rewards/chosen": -0.19789133965969086, "rewards/margins": 0.014435119926929474, "rewards/rejected": -0.21232648193836212, "step": 43 }, { "epoch": 0.06934594168636722, "grad_norm": 0.7161246538162231, "learning_rate": 1.3543307086614172e-06, "log_odds_chosen": 0.5076345801353455, "log_odds_ratio": -0.4807063937187195, "logits/chosen": -0.7080238461494446, "logits/rejected": 0.010522328317165375, "logps/chosen": -1.9329512119293213, "logps/rejected": -2.384503126144409, "loss": 2.1908, "nll_loss": 2.142735004425049, "rewards/accuracies": 1.0, "rewards/chosen": -0.19329513609409332, "rewards/margins": 0.045155204832553864, "rewards/rejected": -0.2384503185749054, "step": 44 }, { "epoch": 0.07092198581560284, "grad_norm": 0.6544040441513062, "learning_rate": 1.3858267716535433e-06, "log_odds_chosen": 0.5975862145423889, "log_odds_ratio": -0.4529675245285034, "logits/chosen": -0.5176110863685608, "logits/rejected": -0.06739248335361481, "logps/chosen": -1.9081940650939941, "logps/rejected": -2.438753843307495, "loss": 2.1874, "nll_loss": 2.1420602798461914, "rewards/accuracies": 0.875, "rewards/chosen": -0.1908193975687027, "rewards/margins": 0.05305597186088562, "rewards/rejected": -0.24387536942958832, "step": 45 }, { "epoch": 0.07249802994483845, "grad_norm": 0.7946521043777466, "learning_rate": 1.417322834645669e-06, "log_odds_chosen": 0.5836816430091858, "log_odds_ratio": -0.458423912525177, "logits/chosen": -0.7441750764846802, "logits/rejected": -0.32784658670425415, "logps/chosen": -1.9207260608673096, "logps/rejected": -2.439450740814209, "loss": 2.1926, "nll_loss": 2.14674973487854, "rewards/accuracies": 1.0, "rewards/chosen": -0.19207260012626648, "rewards/margins": 0.05187246948480606, "rewards/rejected": -0.24394509196281433, "step": 46 }, { "epoch": 0.07407407407407407, "grad_norm": 0.6708806753158569, "learning_rate": 1.4488188976377953e-06, "log_odds_chosen": 0.2689046561717987, "log_odds_ratio": -0.578476071357727, "logits/chosen": -0.7736371159553528, "logits/rejected": -0.19334951043128967, "logps/chosen": -1.9722037315368652, "logps/rejected": -2.2119808197021484, "loss": 2.2323, "nll_loss": 2.1744863986968994, "rewards/accuracies": 0.875, "rewards/chosen": -0.19722038507461548, "rewards/margins": 0.023977704346179962, "rewards/rejected": -0.22119809687137604, "step": 47 }, { "epoch": 0.07565011820330969, "grad_norm": 0.7817642688751221, "learning_rate": 1.4803149606299211e-06, "log_odds_chosen": 0.413425475358963, "log_odds_ratio": -0.5176951289176941, "logits/chosen": -0.5976810455322266, "logits/rejected": -0.1483435034751892, "logps/chosen": -2.023184061050415, "logps/rejected": -2.3940744400024414, "loss": 2.2969, "nll_loss": 2.2451674938201904, "rewards/accuracies": 1.0, "rewards/chosen": -0.20231840014457703, "rewards/margins": 0.03708904981613159, "rewards/rejected": -0.23940744996070862, "step": 48 }, { "epoch": 0.07722616233254531, "grad_norm": 0.631397008895874, "learning_rate": 1.5118110236220472e-06, "log_odds_chosen": 0.4730543792247772, "log_odds_ratio": -0.49180155992507935, "logits/chosen": -0.622490406036377, "logits/rejected": -0.015296130441129208, "logps/chosen": -1.9089866876602173, "logps/rejected": -2.3237972259521484, "loss": 2.1401, "nll_loss": 2.0909266471862793, "rewards/accuracies": 1.0, "rewards/chosen": -0.19089870154857635, "rewards/margins": 0.04148102179169655, "rewards/rejected": -0.2323797196149826, "step": 49 }, { "epoch": 0.07880220646178093, "grad_norm": 0.6548082232475281, "learning_rate": 1.543307086614173e-06, "log_odds_chosen": 0.2945685088634491, "log_odds_ratio": -0.5625656843185425, "logits/chosen": -0.638104259967804, "logits/rejected": -0.22048690915107727, "logps/chosen": -1.9748042821884155, "logps/rejected": -2.235903024673462, "loss": 2.2419, "nll_loss": 2.185655355453491, "rewards/accuracies": 1.0, "rewards/chosen": -0.19748042523860931, "rewards/margins": 0.026109864935278893, "rewards/rejected": -0.22359029948711395, "step": 50 }, { "epoch": 0.08037825059101655, "grad_norm": 0.6355348825454712, "learning_rate": 1.5748031496062992e-06, "log_odds_chosen": 0.6154743432998657, "log_odds_ratio": -0.4387228488922119, "logits/chosen": -0.5254096984863281, "logits/rejected": -0.08727583289146423, "logps/chosen": -1.8662408590316772, "logps/rejected": -2.4071710109710693, "loss": 2.1184, "nll_loss": 2.0745527744293213, "rewards/accuracies": 1.0, "rewards/chosen": -0.18662410974502563, "rewards/margins": 0.054093025624752045, "rewards/rejected": -0.24071712791919708, "step": 51 }, { "epoch": 0.08195429472025216, "grad_norm": 0.6476663947105408, "learning_rate": 1.6062992125984253e-06, "log_odds_chosen": 0.5350648760795593, "log_odds_ratio": -0.46677446365356445, "logits/chosen": -0.4835931360721588, "logits/rejected": -0.016967706382274628, "logps/chosen": -2.0277915000915527, "logps/rejected": -2.507091522216797, "loss": 2.2718, "nll_loss": 2.2251675128936768, "rewards/accuracies": 1.0, "rewards/chosen": -0.202779158949852, "rewards/margins": 0.04792997986078262, "rewards/rejected": -0.2507091462612152, "step": 52 }, { "epoch": 0.08353033884948778, "grad_norm": 0.718711793422699, "learning_rate": 1.637795275590551e-06, "log_odds_chosen": 0.3033553957939148, "log_odds_ratio": -0.5565884709358215, "logits/chosen": -0.5977045297622681, "logits/rejected": -0.13718965649604797, "logps/chosen": -1.9643840789794922, "logps/rejected": -2.230160713195801, "loss": 2.2233, "nll_loss": 2.1675939559936523, "rewards/accuracies": 1.0, "rewards/chosen": -0.19643841683864594, "rewards/margins": 0.026577647775411606, "rewards/rejected": -0.22301605343818665, "step": 53 }, { "epoch": 0.0851063829787234, "grad_norm": 0.6878601908683777, "learning_rate": 1.6692913385826771e-06, "log_odds_chosen": 0.30879032611846924, "log_odds_ratio": -0.5532358288764954, "logits/chosen": -0.6751337051391602, "logits/rejected": -0.14412644505500793, "logps/chosen": -1.903916835784912, "logps/rejected": -2.1728036403656006, "loss": 2.1706, "nll_loss": 2.1152570247650146, "rewards/accuracies": 1.0, "rewards/chosen": -0.1903916895389557, "rewards/margins": 0.026888679713010788, "rewards/rejected": -0.2172803431749344, "step": 54 }, { "epoch": 0.08668242710795902, "grad_norm": 0.7093151211738586, "learning_rate": 1.7007874015748031e-06, "log_odds_chosen": 0.42612695693969727, "log_odds_ratio": -0.5059034824371338, "logits/chosen": -0.6053857803344727, "logits/rejected": -0.21064752340316772, "logps/chosen": -1.9867243766784668, "logps/rejected": -2.36510968208313, "loss": 2.2557, "nll_loss": 2.205127716064453, "rewards/accuracies": 1.0, "rewards/chosen": -0.19867242872714996, "rewards/margins": 0.037838518619537354, "rewards/rejected": -0.23651094734668732, "step": 55 }, { "epoch": 0.08825847123719464, "grad_norm": 0.8093518614768982, "learning_rate": 1.7322834645669292e-06, "log_odds_chosen": 0.352740615606308, "log_odds_ratio": -0.5422862768173218, "logits/chosen": -0.5116597414016724, "logits/rejected": -0.0041604433208703995, "logps/chosen": -2.0882434844970703, "logps/rejected": -2.4050841331481934, "loss": 2.3456, "nll_loss": 2.291337728500366, "rewards/accuracies": 0.875, "rewards/chosen": -0.20882436633110046, "rewards/margins": 0.03168405592441559, "rewards/rejected": -0.24050842225551605, "step": 56 }, { "epoch": 0.08983451536643026, "grad_norm": 0.6063317656517029, "learning_rate": 1.763779527559055e-06, "log_odds_chosen": 0.37979212403297424, "log_odds_ratio": -0.5244497656822205, "logits/chosen": -0.5742859840393066, "logits/rejected": -0.12732906639575958, "logps/chosen": -1.9634265899658203, "logps/rejected": -2.2987239360809326, "loss": 2.2111, "nll_loss": 2.158637285232544, "rewards/accuracies": 1.0, "rewards/chosen": -0.19634266197681427, "rewards/margins": 0.03352972865104675, "rewards/rejected": -0.22987240552902222, "step": 57 }, { "epoch": 0.09141055949566587, "grad_norm": 0.7049276828765869, "learning_rate": 1.795275590551181e-06, "log_odds_chosen": 0.38489818572998047, "log_odds_ratio": -0.5272811651229858, "logits/chosen": -0.603106677532196, "logits/rejected": -0.32046759128570557, "logps/chosen": -1.931801199913025, "logps/rejected": -2.274812936782837, "loss": 2.1909, "nll_loss": 2.138176918029785, "rewards/accuracies": 1.0, "rewards/chosen": -0.193180114030838, "rewards/margins": 0.034301191568374634, "rewards/rejected": -0.22748132050037384, "step": 58 }, { "epoch": 0.09298660362490149, "grad_norm": 0.7724094390869141, "learning_rate": 1.826771653543307e-06, "log_odds_chosen": 0.6565301418304443, "log_odds_ratio": -0.4238013029098511, "logits/chosen": -0.5493816137313843, "logits/rejected": -0.04481405392289162, "logps/chosen": -2.03456711769104, "logps/rejected": -2.6273677349090576, "loss": 2.2666, "nll_loss": 2.2242276668548584, "rewards/accuracies": 1.0, "rewards/chosen": -0.20345671474933624, "rewards/margins": 0.05928007513284683, "rewards/rejected": -0.2627367675304413, "step": 59 }, { "epoch": 0.09456264775413711, "grad_norm": 0.7019063830375671, "learning_rate": 1.858267716535433e-06, "log_odds_chosen": 0.48238804936408997, "log_odds_ratio": -0.4877777695655823, "logits/chosen": -0.6007660627365112, "logits/rejected": -0.25237271189689636, "logps/chosen": -1.9639641046524048, "logps/rejected": -2.3943371772766113, "loss": 2.223, "nll_loss": 2.174217462539673, "rewards/accuracies": 1.0, "rewards/chosen": -0.19639641046524048, "rewards/margins": 0.04303732514381409, "rewards/rejected": -0.23943373560905457, "step": 60 }, { "epoch": 0.09613869188337273, "grad_norm": 0.7680268883705139, "learning_rate": 1.889763779527559e-06, "log_odds_chosen": 0.25518718361854553, "log_odds_ratio": -0.5833750367164612, "logits/chosen": -0.5432047247886658, "logits/rejected": -0.21941693127155304, "logps/chosen": -2.100229501724243, "logps/rejected": -2.330411195755005, "loss": 2.3455, "nll_loss": 2.2871248722076416, "rewards/accuracies": 0.75, "rewards/chosen": -0.2100229412317276, "rewards/margins": 0.023018185049295425, "rewards/rejected": -0.23304113745689392, "step": 61 }, { "epoch": 0.09771473601260836, "grad_norm": 0.8415902853012085, "learning_rate": 1.9212598425196847e-06, "log_odds_chosen": 0.3082142472267151, "log_odds_ratio": -0.5612497329711914, "logits/chosen": -0.6062051653862, "logits/rejected": -0.11909964680671692, "logps/chosen": -2.0706098079681396, "logps/rejected": -2.3465754985809326, "loss": 2.3287, "nll_loss": 2.272575616836548, "rewards/accuracies": 0.75, "rewards/chosen": -0.20706097781658173, "rewards/margins": 0.02759658358991146, "rewards/rejected": -0.23465755581855774, "step": 62 }, { "epoch": 0.09929078014184398, "grad_norm": 0.6543618440628052, "learning_rate": 1.952755905511811e-06, "log_odds_chosen": 0.5749139785766602, "log_odds_ratio": -0.45239800214767456, "logits/chosen": -0.7079123258590698, "logits/rejected": -0.2599683403968811, "logps/chosen": -1.8045680522918701, "logps/rejected": -2.300361394882202, "loss": 2.0753, "nll_loss": 2.030048370361328, "rewards/accuracies": 1.0, "rewards/chosen": -0.18045681715011597, "rewards/margins": 0.04957934468984604, "rewards/rejected": -0.23003613948822021, "step": 63 }, { "epoch": 0.1008668242710796, "grad_norm": 0.6648881435394287, "learning_rate": 1.9842519685039368e-06, "log_odds_chosen": 0.6830120086669922, "log_odds_ratio": -0.4217277765274048, "logits/chosen": -0.6593011617660522, "logits/rejected": -0.08247893303632736, "logps/chosen": -1.8293054103851318, "logps/rejected": -2.4285809993743896, "loss": 2.0953, "nll_loss": 2.0531651973724365, "rewards/accuracies": 1.0, "rewards/chosen": -0.18293055891990662, "rewards/margins": 0.059927552938461304, "rewards/rejected": -0.24285811185836792, "step": 64 }, { "epoch": 0.1024428684003152, "grad_norm": 0.6505405902862549, "learning_rate": 2.015748031496063e-06, "log_odds_chosen": 0.6114057898521423, "log_odds_ratio": -0.4410432279109955, "logits/chosen": -0.6462138295173645, "logits/rejected": -0.3533778786659241, "logps/chosen": -1.9271063804626465, "logps/rejected": -2.47088360786438, "loss": 2.1845, "nll_loss": 2.1404013633728027, "rewards/accuracies": 1.0, "rewards/chosen": -0.19271063804626465, "rewards/margins": 0.0543777197599411, "rewards/rejected": -0.24708837270736694, "step": 65 }, { "epoch": 0.10401891252955082, "grad_norm": 0.6485214829444885, "learning_rate": 2.047244094488189e-06, "log_odds_chosen": 0.452458918094635, "log_odds_ratio": -0.49970927834510803, "logits/chosen": -0.6230807900428772, "logits/rejected": -0.13434045016765594, "logps/chosen": -2.0179097652435303, "logps/rejected": -2.419290781021118, "loss": 2.2838, "nll_loss": 2.233837842941284, "rewards/accuracies": 0.875, "rewards/chosen": -0.20179098844528198, "rewards/margins": 0.04013810679316521, "rewards/rejected": -0.2419290840625763, "step": 66 }, { "epoch": 0.10559495665878645, "grad_norm": 0.6492588520050049, "learning_rate": 2.0787401574803147e-06, "log_odds_chosen": 0.5289919972419739, "log_odds_ratio": -0.4675235152244568, "logits/chosen": -0.534376859664917, "logits/rejected": -0.03799459710717201, "logps/chosen": -1.955706238746643, "logps/rejected": -2.4241867065429688, "loss": 2.1946, "nll_loss": 2.1478805541992188, "rewards/accuracies": 1.0, "rewards/chosen": -0.19557063281536102, "rewards/margins": 0.04684804379940033, "rewards/rejected": -0.24241869151592255, "step": 67 }, { "epoch": 0.10717100078802207, "grad_norm": 0.774642288684845, "learning_rate": 2.1102362204724405e-06, "log_odds_chosen": 0.41134878993034363, "log_odds_ratio": -0.5170325040817261, "logits/chosen": -0.6746619939804077, "logits/rejected": -0.185140922665596, "logps/chosen": -1.947751760482788, "logps/rejected": -2.311985969543457, "loss": 2.2474, "nll_loss": 2.1957201957702637, "rewards/accuracies": 0.875, "rewards/chosen": -0.19477517902851105, "rewards/margins": 0.036423418670892715, "rewards/rejected": -0.23119859397411346, "step": 68 }, { "epoch": 0.10874704491725769, "grad_norm": 0.8252844214439392, "learning_rate": 2.141732283464567e-06, "log_odds_chosen": 0.22346967458724976, "log_odds_ratio": -0.5949736833572388, "logits/chosen": -0.41877222061157227, "logits/rejected": -0.11722514033317566, "logps/chosen": -2.0319464206695557, "logps/rejected": -2.2292232513427734, "loss": 2.3097, "nll_loss": 2.250192403793335, "rewards/accuracies": 0.875, "rewards/chosen": -0.20319463312625885, "rewards/margins": 0.019727692008018494, "rewards/rejected": -0.22292232513427734, "step": 69 }, { "epoch": 0.11032308904649331, "grad_norm": 0.6770060658454895, "learning_rate": 2.173228346456693e-06, "log_odds_chosen": 0.4207773804664612, "log_odds_ratio": -0.5108780264854431, "logits/chosen": -0.5316891670227051, "logits/rejected": -0.10831936448812485, "logps/chosen": -1.9700641632080078, "logps/rejected": -2.342879056930542, "loss": 2.2395, "nll_loss": 2.188405752182007, "rewards/accuracies": 1.0, "rewards/chosen": -0.19700641930103302, "rewards/margins": 0.03728148713707924, "rewards/rejected": -0.23428791761398315, "step": 70 }, { "epoch": 0.11189913317572892, "grad_norm": 0.8300355672836304, "learning_rate": 2.204724409448819e-06, "log_odds_chosen": 0.5063481330871582, "log_odds_ratio": -0.48265108466148376, "logits/chosen": -0.7789384126663208, "logits/rejected": -0.24630165100097656, "logps/chosen": -1.9212646484375, "logps/rejected": -2.370161771774292, "loss": 2.1808, "nll_loss": 2.1325840950012207, "rewards/accuracies": 1.0, "rewards/chosen": -0.19212648272514343, "rewards/margins": 0.04488971084356308, "rewards/rejected": -0.23701618611812592, "step": 71 }, { "epoch": 0.11347517730496454, "grad_norm": 0.6843920946121216, "learning_rate": 2.2362204724409446e-06, "log_odds_chosen": 0.32779812812805176, "log_odds_ratio": -0.5560404062271118, "logits/chosen": -0.5591788291931152, "logits/rejected": -0.06292789429426193, "logps/chosen": -1.936835527420044, "logps/rejected": -2.2265937328338623, "loss": 2.1968, "nll_loss": 2.1412172317504883, "rewards/accuracies": 0.875, "rewards/chosen": -0.19368356466293335, "rewards/margins": 0.02897578477859497, "rewards/rejected": -0.22265934944152832, "step": 72 }, { "epoch": 0.11505122143420016, "grad_norm": 0.6691780090332031, "learning_rate": 2.267716535433071e-06, "log_odds_chosen": 0.3424806594848633, "log_odds_ratio": -0.5391549468040466, "logits/chosen": -0.5084943175315857, "logits/rejected": -0.18799816071987152, "logps/chosen": -1.982399821281433, "logps/rejected": -2.2857072353363037, "loss": 2.2447, "nll_loss": 2.190774917602539, "rewards/accuracies": 1.0, "rewards/chosen": -0.1982399970293045, "rewards/margins": 0.0303307194262743, "rewards/rejected": -0.22857069969177246, "step": 73 }, { "epoch": 0.11662726556343578, "grad_norm": 0.7013347744941711, "learning_rate": 2.2992125984251967e-06, "log_odds_chosen": 0.42650213837623596, "log_odds_ratio": -0.5057096481323242, "logits/chosen": -0.6094076037406921, "logits/rejected": -0.11771736294031143, "logps/chosen": -1.8428946733474731, "logps/rejected": -2.2101123332977295, "loss": 2.1276, "nll_loss": 2.0770435333251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.18428948521614075, "rewards/margins": 0.03672178089618683, "rewards/rejected": -0.22101125121116638, "step": 74 }, { "epoch": 0.1182033096926714, "grad_norm": 0.6637840867042542, "learning_rate": 2.3307086614173225e-06, "log_odds_chosen": 0.5096178650856018, "log_odds_ratio": -0.476134717464447, "logits/chosen": -0.5619401335716248, "logits/rejected": -0.2677369713783264, "logps/chosen": -1.8805265426635742, "logps/rejected": -2.32961106300354, "loss": 2.1385, "nll_loss": 2.0909037590026855, "rewards/accuracies": 1.0, "rewards/chosen": -0.18805265426635742, "rewards/margins": 0.044908471405506134, "rewards/rejected": -0.23296113312244415, "step": 75 }, { "epoch": 0.11977935382190702, "grad_norm": 0.6581621170043945, "learning_rate": 2.3622047244094483e-06, "log_odds_chosen": 0.5206456184387207, "log_odds_ratio": -0.47490978240966797, "logits/chosen": -0.5504530072212219, "logits/rejected": -0.03281405568122864, "logps/chosen": -1.9286243915557861, "logps/rejected": -2.3910675048828125, "loss": 2.1717, "nll_loss": 2.124224901199341, "rewards/accuracies": 1.0, "rewards/chosen": -0.19286245107650757, "rewards/margins": 0.04624428227543831, "rewards/rejected": -0.23910671472549438, "step": 76 }, { "epoch": 0.12135539795114263, "grad_norm": 0.7063953876495361, "learning_rate": 2.393700787401575e-06, "log_odds_chosen": 0.4808962941169739, "log_odds_ratio": -0.49262717366218567, "logits/chosen": -0.5677655339241028, "logits/rejected": -0.21073025465011597, "logps/chosen": -1.9511951208114624, "logps/rejected": -2.377624988555908, "loss": 2.2192, "nll_loss": 2.169985771179199, "rewards/accuracies": 1.0, "rewards/chosen": -0.19511950016021729, "rewards/margins": 0.042642995715141296, "rewards/rejected": -0.23776251077651978, "step": 77 }, { "epoch": 0.12293144208037825, "grad_norm": 0.5751843452453613, "learning_rate": 2.425196850393701e-06, "log_odds_chosen": 0.3792150020599365, "log_odds_ratio": -0.5299723744392395, "logits/chosen": -0.37227052450180054, "logits/rejected": -0.4450679123401642, "logps/chosen": -2.0182228088378906, "logps/rejected": -2.355861186981201, "loss": 2.2537, "nll_loss": 2.200679302215576, "rewards/accuracies": 0.875, "rewards/chosen": -0.20182228088378906, "rewards/margins": 0.03376384079456329, "rewards/rejected": -0.23558615148067474, "step": 78 }, { "epoch": 0.12450748620961387, "grad_norm": 0.6594578623771667, "learning_rate": 2.4566929133858266e-06, "log_odds_chosen": 0.518637478351593, "log_odds_ratio": -0.47430098056793213, "logits/chosen": -0.4779280126094818, "logits/rejected": -0.2910279631614685, "logps/chosen": -1.9429047107696533, "logps/rejected": -2.402376174926758, "loss": 2.1903, "nll_loss": 2.1428279876708984, "rewards/accuracies": 1.0, "rewards/chosen": -0.19429044425487518, "rewards/margins": 0.04594714939594269, "rewards/rejected": -0.24023759365081787, "step": 79 }, { "epoch": 0.12608353033884948, "grad_norm": 0.6245352625846863, "learning_rate": 2.488188976377953e-06, "log_odds_chosen": 0.5585002303123474, "log_odds_ratio": -0.45803701877593994, "logits/chosen": -0.5769734978675842, "logits/rejected": -0.25027596950531006, "logps/chosen": -1.8669335842132568, "logps/rejected": -2.356663227081299, "loss": 2.1157, "nll_loss": 2.06986403465271, "rewards/accuracies": 1.0, "rewards/chosen": -0.18669337034225464, "rewards/margins": 0.048972949385643005, "rewards/rejected": -0.23566631972789764, "step": 80 }, { "epoch": 0.1276595744680851, "grad_norm": 0.5566908717155457, "learning_rate": 2.5196850393700787e-06, "log_odds_chosen": 0.3883778750896454, "log_odds_ratio": -0.5254943370819092, "logits/chosen": -0.42693546414375305, "logits/rejected": -0.2633028030395508, "logps/chosen": -1.93135666847229, "logps/rejected": -2.2755541801452637, "loss": 2.17, "nll_loss": 2.1174182891845703, "rewards/accuracies": 1.0, "rewards/chosen": -0.19313567876815796, "rewards/margins": 0.034419745206832886, "rewards/rejected": -0.22755542397499084, "step": 81 }, { "epoch": 0.12923561859732072, "grad_norm": 0.5781261324882507, "learning_rate": 2.5511811023622045e-06, "log_odds_chosen": 0.524163544178009, "log_odds_ratio": -0.4806976020336151, "logits/chosen": -0.4674437940120697, "logits/rejected": -0.23945724964141846, "logps/chosen": -1.8828051090240479, "logps/rejected": -2.3516387939453125, "loss": 2.1336, "nll_loss": 2.085569381713867, "rewards/accuracies": 0.875, "rewards/chosen": -0.18828049302101135, "rewards/margins": 0.046883389353752136, "rewards/rejected": -0.23516389727592468, "step": 82 }, { "epoch": 0.13081166272655634, "grad_norm": 0.6088637709617615, "learning_rate": 2.5826771653543303e-06, "log_odds_chosen": 0.36324411630630493, "log_odds_ratio": -0.5395435690879822, "logits/chosen": -0.39306196570396423, "logits/rejected": -0.1700359582901001, "logps/chosen": -1.9157882928848267, "logps/rejected": -2.237780809402466, "loss": 2.1604, "nll_loss": 2.106419563293457, "rewards/accuracies": 1.0, "rewards/chosen": -0.19157883524894714, "rewards/margins": 0.032199256122112274, "rewards/rejected": -0.22377808392047882, "step": 83 }, { "epoch": 0.13238770685579196, "grad_norm": 0.6803274750709534, "learning_rate": 2.6141732283464566e-06, "log_odds_chosen": 0.5524423122406006, "log_odds_ratio": -0.46847474575042725, "logits/chosen": -0.5545064210891724, "logits/rejected": -0.24267421662807465, "logps/chosen": -1.8639458417892456, "logps/rejected": -2.3503522872924805, "loss": 2.1458, "nll_loss": 2.0989749431610107, "rewards/accuracies": 1.0, "rewards/chosen": -0.1863945871591568, "rewards/margins": 0.048640646040439606, "rewards/rejected": -0.235035240650177, "step": 84 }, { "epoch": 0.13396375098502758, "grad_norm": 0.5811371207237244, "learning_rate": 2.645669291338583e-06, "log_odds_chosen": 0.5210408568382263, "log_odds_ratio": -0.46715638041496277, "logits/chosen": -0.3822883367538452, "logits/rejected": -0.1786729395389557, "logps/chosen": -1.9316679239273071, "logps/rejected": -2.3919014930725098, "loss": 2.1698, "nll_loss": 2.1230902671813965, "rewards/accuracies": 1.0, "rewards/chosen": -0.1931667923927307, "rewards/margins": 0.04602333903312683, "rewards/rejected": -0.23919013142585754, "step": 85 }, { "epoch": 0.1355397951142632, "grad_norm": 0.6602110266685486, "learning_rate": 2.6771653543307086e-06, "log_odds_chosen": 0.48973286151885986, "log_odds_ratio": -0.48310309648513794, "logits/chosen": -0.5846769213676453, "logits/rejected": -0.20886988937854767, "logps/chosen": -1.9184874296188354, "logps/rejected": -2.349517822265625, "loss": 2.1703, "nll_loss": 2.1220102310180664, "rewards/accuracies": 1.0, "rewards/chosen": -0.1918487399816513, "rewards/margins": 0.043103061616420746, "rewards/rejected": -0.23495177924633026, "step": 86 }, { "epoch": 0.13711583924349882, "grad_norm": 0.5744991302490234, "learning_rate": 2.7086614173228344e-06, "log_odds_chosen": 0.3423335552215576, "log_odds_ratio": -0.5400257110595703, "logits/chosen": -0.37106236815452576, "logits/rejected": -0.360477089881897, "logps/chosen": -1.922955870628357, "logps/rejected": -2.2213680744171143, "loss": 2.1639, "nll_loss": 2.109863042831421, "rewards/accuracies": 1.0, "rewards/chosen": -0.19229556620121002, "rewards/margins": 0.02984124794602394, "rewards/rejected": -0.22213682532310486, "step": 87 }, { "epoch": 0.13869188337273444, "grad_norm": 0.6963973045349121, "learning_rate": 2.7401574803149607e-06, "log_odds_chosen": 0.5418673753738403, "log_odds_ratio": -0.475824773311615, "logits/chosen": -0.6239266991615295, "logits/rejected": -0.2058069109916687, "logps/chosen": -1.848305583000183, "logps/rejected": -2.326037645339966, "loss": 2.1293, "nll_loss": 2.0817408561706543, "rewards/accuracies": 0.875, "rewards/chosen": -0.18483057618141174, "rewards/margins": 0.04777318611741066, "rewards/rejected": -0.2326037585735321, "step": 88 }, { "epoch": 0.14026792750197006, "grad_norm": 0.6552391648292542, "learning_rate": 2.7716535433070865e-06, "log_odds_chosen": 0.500076174736023, "log_odds_ratio": -0.4754161834716797, "logits/chosen": -0.44256362318992615, "logits/rejected": -0.20719635486602783, "logps/chosen": -1.9562758207321167, "logps/rejected": -2.3987882137298584, "loss": 2.2135, "nll_loss": 2.165970802307129, "rewards/accuracies": 1.0, "rewards/chosen": -0.1956275850534439, "rewards/margins": 0.04425125569105148, "rewards/rejected": -0.2398788183927536, "step": 89 }, { "epoch": 0.14184397163120568, "grad_norm": 0.6141228079795837, "learning_rate": 2.8031496062992123e-06, "log_odds_chosen": 0.428195595741272, "log_odds_ratio": -0.5067068934440613, "logits/chosen": -0.3649379312992096, "logits/rejected": -0.1602931022644043, "logps/chosen": -2.002263307571411, "logps/rejected": -2.3845441341400146, "loss": 2.2267, "nll_loss": 2.1760218143463135, "rewards/accuracies": 1.0, "rewards/chosen": -0.20022635161876678, "rewards/margins": 0.038228072226047516, "rewards/rejected": -0.2384544163942337, "step": 90 }, { "epoch": 0.1434200157604413, "grad_norm": 0.5524324774742126, "learning_rate": 2.834645669291338e-06, "log_odds_chosen": 0.4524117112159729, "log_odds_ratio": -0.49862101674079895, "logits/chosen": -0.36407744884490967, "logits/rejected": -0.2811731696128845, "logps/chosen": -1.8061617612838745, "logps/rejected": -2.198903799057007, "loss": 2.0471, "nll_loss": 1.997222900390625, "rewards/accuracies": 1.0, "rewards/chosen": -0.18061619997024536, "rewards/margins": 0.0392741933465004, "rewards/rejected": -0.21989038586616516, "step": 91 }, { "epoch": 0.1449960598896769, "grad_norm": 0.5921797752380371, "learning_rate": 2.8661417322834644e-06, "log_odds_chosen": 0.5024532675743103, "log_odds_ratio": -0.47816595435142517, "logits/chosen": -0.44370609521865845, "logits/rejected": -0.18724730610847473, "logps/chosen": -1.829555869102478, "logps/rejected": -2.2681233882904053, "loss": 2.0879, "nll_loss": 2.040083169937134, "rewards/accuracies": 1.0, "rewards/chosen": -0.18295560777187347, "rewards/margins": 0.04385674372315407, "rewards/rejected": -0.22681234776973724, "step": 92 }, { "epoch": 0.14657210401891252, "grad_norm": 0.628095805644989, "learning_rate": 2.8976377952755906e-06, "log_odds_chosen": 0.43953385949134827, "log_odds_ratio": -0.5222434401512146, "logits/chosen": -0.4026286005973816, "logits/rejected": -0.23625504970550537, "logps/chosen": -1.9340780973434448, "logps/rejected": -2.3274731636047363, "loss": 2.18, "nll_loss": 2.127760410308838, "rewards/accuracies": 0.75, "rewards/chosen": -0.19340780377388, "rewards/margins": 0.03933952748775482, "rewards/rejected": -0.23274733126163483, "step": 93 }, { "epoch": 0.14814814814814814, "grad_norm": 0.5658537149429321, "learning_rate": 2.9291338582677165e-06, "log_odds_chosen": 0.45617368817329407, "log_odds_ratio": -0.4992842674255371, "logits/chosen": -0.36675694584846497, "logits/rejected": -0.3160392642021179, "logps/chosen": -1.9161105155944824, "logps/rejected": -2.315382480621338, "loss": 2.1513, "nll_loss": 2.1013565063476562, "rewards/accuracies": 0.875, "rewards/chosen": -0.19161105155944824, "rewards/margins": 0.03992719575762749, "rewards/rejected": -0.23153826594352722, "step": 94 }, { "epoch": 0.14972419227738376, "grad_norm": 0.639642596244812, "learning_rate": 2.9606299212598423e-06, "log_odds_chosen": 0.5549046993255615, "log_odds_ratio": -0.463879257440567, "logits/chosen": -0.43291109800338745, "logits/rejected": -0.15716485679149628, "logps/chosen": -1.9124903678894043, "logps/rejected": -2.4041366577148438, "loss": 2.1648, "nll_loss": 2.1183886528015137, "rewards/accuracies": 1.0, "rewards/chosen": -0.1912490427494049, "rewards/margins": 0.049164604395627975, "rewards/rejected": -0.240413635969162, "step": 95 }, { "epoch": 0.15130023640661938, "grad_norm": 0.569778323173523, "learning_rate": 2.9921259842519685e-06, "log_odds_chosen": 0.5220546722412109, "log_odds_ratio": -0.46969637274742126, "logits/chosen": -0.42379647493362427, "logits/rejected": -0.3440685570240021, "logps/chosen": -1.9186266660690308, "logps/rejected": -2.3788628578186035, "loss": 2.141, "nll_loss": 2.0940771102905273, "rewards/accuracies": 1.0, "rewards/chosen": -0.19186267256736755, "rewards/margins": 0.04602360725402832, "rewards/rejected": -0.23788626492023468, "step": 96 }, { "epoch": 0.152876280535855, "grad_norm": 0.56700599193573, "learning_rate": 3.0236220472440943e-06, "log_odds_chosen": 0.350454717874527, "log_odds_ratio": -0.5345020294189453, "logits/chosen": -0.47989675402641296, "logits/rejected": -0.17767037451267242, "logps/chosen": -1.8971889019012451, "logps/rejected": -2.200305461883545, "loss": 2.1227, "nll_loss": 2.069289445877075, "rewards/accuracies": 1.0, "rewards/chosen": -0.18971890211105347, "rewards/margins": 0.03031165711581707, "rewards/rejected": -0.22003056108951569, "step": 97 }, { "epoch": 0.15445232466509062, "grad_norm": 0.5734896063804626, "learning_rate": 3.05511811023622e-06, "log_odds_chosen": 0.4143810570240021, "log_odds_ratio": -0.512945294380188, "logits/chosen": -0.31117746233940125, "logits/rejected": -0.24713104963302612, "logps/chosen": -2.0033698081970215, "logps/rejected": -2.3717055320739746, "loss": 2.2459, "nll_loss": 2.1946518421173096, "rewards/accuracies": 1.0, "rewards/chosen": -0.2003369927406311, "rewards/margins": 0.03683357313275337, "rewards/rejected": -0.23717054724693298, "step": 98 }, { "epoch": 0.15602836879432624, "grad_norm": 0.6178714036941528, "learning_rate": 3.086614173228346e-06, "log_odds_chosen": 0.24748294055461884, "log_odds_ratio": -0.5794985890388489, "logits/chosen": -0.5359123945236206, "logits/rejected": -0.3172164559364319, "logps/chosen": -1.911259651184082, "logps/rejected": -2.12546443939209, "loss": 2.1682, "nll_loss": 2.1102778911590576, "rewards/accuracies": 1.0, "rewards/chosen": -0.19112597405910492, "rewards/margins": 0.021420463919639587, "rewards/rejected": -0.2125464230775833, "step": 99 }, { "epoch": 0.15760441292356187, "grad_norm": 0.590636670589447, "learning_rate": 3.1181102362204722e-06, "log_odds_chosen": 0.36373111605644226, "log_odds_ratio": -0.533814549446106, "logits/chosen": -0.4497278034687042, "logits/rejected": -0.2689790427684784, "logps/chosen": -1.8333114385604858, "logps/rejected": -2.1512179374694824, "loss": 2.0894, "nll_loss": 2.036029815673828, "rewards/accuracies": 1.0, "rewards/chosen": -0.18333116173744202, "rewards/margins": 0.031790636479854584, "rewards/rejected": -0.2151218056678772, "step": 100 }, { "epoch": 0.15918045705279749, "grad_norm": 0.5497896075248718, "learning_rate": 3.1496062992125985e-06, "log_odds_chosen": 0.42619574069976807, "log_odds_ratio": -0.5083091855049133, "logits/chosen": -0.3605027198791504, "logits/rejected": -0.24103917181491852, "logps/chosen": -1.8285539150238037, "logps/rejected": -2.200834035873413, "loss": 2.0895, "nll_loss": 2.038670301437378, "rewards/accuracies": 1.0, "rewards/chosen": -0.18285538256168365, "rewards/margins": 0.03722800686955452, "rewards/rejected": -0.22008340060710907, "step": 101 }, { "epoch": 0.1607565011820331, "grad_norm": 0.5178012251853943, "learning_rate": 3.1811023622047243e-06, "log_odds_chosen": 0.43138545751571655, "log_odds_ratio": -0.5122407078742981, "logits/chosen": -0.3726266324520111, "logits/rejected": -0.37165510654449463, "logps/chosen": -1.7784022092819214, "logps/rejected": -2.1532044410705566, "loss": 2.0323, "nll_loss": 1.9810512065887451, "rewards/accuracies": 1.0, "rewards/chosen": -0.1778402179479599, "rewards/margins": 0.03748023882508278, "rewards/rejected": -0.21532045304775238, "step": 102 }, { "epoch": 0.16233254531126873, "grad_norm": 0.5599470734596252, "learning_rate": 3.2125984251968505e-06, "log_odds_chosen": 0.4818297326564789, "log_odds_ratio": -0.49377134442329407, "logits/chosen": -0.2876685857772827, "logits/rejected": -0.038474664092063904, "logps/chosen": -1.7715296745300293, "logps/rejected": -2.1862025260925293, "loss": 2.0313, "nll_loss": 1.9819062948226929, "rewards/accuracies": 0.875, "rewards/chosen": -0.17715296149253845, "rewards/margins": 0.04146728664636612, "rewards/rejected": -0.21862025558948517, "step": 103 }, { "epoch": 0.16390858944050432, "grad_norm": 0.5964917540550232, "learning_rate": 3.2440944881889763e-06, "log_odds_chosen": 0.4325031638145447, "log_odds_ratio": -0.5093069672584534, "logits/chosen": -0.23736125230789185, "logits/rejected": -0.06159596145153046, "logps/chosen": -1.9128376245498657, "logps/rejected": -2.294654369354248, "loss": 2.1641, "nll_loss": 2.113180637359619, "rewards/accuracies": 1.0, "rewards/chosen": -0.19128376245498657, "rewards/margins": 0.03818168863654137, "rewards/rejected": -0.22946545481681824, "step": 104 }, { "epoch": 0.16548463356973994, "grad_norm": 0.4927206039428711, "learning_rate": 3.275590551181102e-06, "log_odds_chosen": 0.5299558043479919, "log_odds_ratio": -0.4695979058742523, "logits/chosen": -0.377352774143219, "logits/rejected": -0.17640215158462524, "logps/chosen": -1.7906301021575928, "logps/rejected": -2.250798225402832, "loss": 2.0519, "nll_loss": 2.0049400329589844, "rewards/accuracies": 1.0, "rewards/chosen": -0.17906302213668823, "rewards/margins": 0.04601679742336273, "rewards/rejected": -0.22507980465888977, "step": 105 }, { "epoch": 0.16706067769897556, "grad_norm": 0.5032868385314941, "learning_rate": 3.307086614173228e-06, "log_odds_chosen": 0.4509121775627136, "log_odds_ratio": -0.5058165788650513, "logits/chosen": -0.35679227113723755, "logits/rejected": -0.1107356995344162, "logps/chosen": -1.8084826469421387, "logps/rejected": -2.190898895263672, "loss": 2.0546, "nll_loss": 2.003988742828369, "rewards/accuracies": 0.875, "rewards/chosen": -0.18084825575351715, "rewards/margins": 0.03824164718389511, "rewards/rejected": -0.21908989548683167, "step": 106 }, { "epoch": 0.16863672182821118, "grad_norm": 0.5300337672233582, "learning_rate": 3.3385826771653542e-06, "log_odds_chosen": 0.43685051798820496, "log_odds_ratio": -0.5093865394592285, "logits/chosen": -0.2651398479938507, "logits/rejected": -0.28625091910362244, "logps/chosen": -1.8772473335266113, "logps/rejected": -2.2612218856811523, "loss": 2.118, "nll_loss": 2.0670440196990967, "rewards/accuracies": 0.875, "rewards/chosen": -0.1877247393131256, "rewards/margins": 0.03839743137359619, "rewards/rejected": -0.226122185587883, "step": 107 }, { "epoch": 0.1702127659574468, "grad_norm": 0.49163365364074707, "learning_rate": 3.37007874015748e-06, "log_odds_chosen": 0.3183455765247345, "log_odds_ratio": -0.5563209056854248, "logits/chosen": -0.23882922530174255, "logits/rejected": -0.3078491985797882, "logps/chosen": -1.85860013961792, "logps/rejected": -2.1333258152008057, "loss": 2.102, "nll_loss": 2.0463998317718506, "rewards/accuracies": 0.875, "rewards/chosen": -0.1858600229024887, "rewards/margins": 0.02747257985174656, "rewards/rejected": -0.21333259344100952, "step": 108 }, { "epoch": 0.17178881008668243, "grad_norm": 0.5118012428283691, "learning_rate": 3.4015748031496063e-06, "log_odds_chosen": 0.4563853442668915, "log_odds_ratio": -0.49264228343963623, "logits/chosen": -0.16680516302585602, "logits/rejected": -0.2830784320831299, "logps/chosen": -1.9145252704620361, "logps/rejected": -2.3134219646453857, "loss": 2.1528, "nll_loss": 2.1035213470458984, "rewards/accuracies": 1.0, "rewards/chosen": -0.19145254790782928, "rewards/margins": 0.03988967090845108, "rewards/rejected": -0.23134221136569977, "step": 109 }, { "epoch": 0.17336485421591805, "grad_norm": 0.505682110786438, "learning_rate": 3.433070866141732e-06, "log_odds_chosen": 0.49830734729766846, "log_odds_ratio": -0.48064208030700684, "logits/chosen": -0.27441835403442383, "logits/rejected": -0.24226327240467072, "logps/chosen": -1.8005558252334595, "logps/rejected": -2.2267792224884033, "loss": 2.0182, "nll_loss": 1.9701097011566162, "rewards/accuracies": 1.0, "rewards/chosen": -0.18005558848381042, "rewards/margins": 0.04262235015630722, "rewards/rejected": -0.22267794609069824, "step": 110 }, { "epoch": 0.17494089834515367, "grad_norm": 0.5464332103729248, "learning_rate": 3.4645669291338583e-06, "log_odds_chosen": 0.5071738362312317, "log_odds_ratio": -0.4801686406135559, "logits/chosen": -0.23766781389713287, "logits/rejected": -0.30691930651664734, "logps/chosen": -1.902522087097168, "logps/rejected": -2.3450286388397217, "loss": 2.1483, "nll_loss": 2.10026478767395, "rewards/accuracies": 1.0, "rewards/chosen": -0.19025221467018127, "rewards/margins": 0.044250644743442535, "rewards/rejected": -0.2345028668642044, "step": 111 }, { "epoch": 0.1765169424743893, "grad_norm": 0.5223987102508545, "learning_rate": 3.496062992125984e-06, "log_odds_chosen": 0.3665584325790405, "log_odds_ratio": -0.5315839052200317, "logits/chosen": -0.20885254442691803, "logits/rejected": -0.12856575846672058, "logps/chosen": -1.9509872198104858, "logps/rejected": -2.271317481994629, "loss": 2.1848, "nll_loss": 2.131622314453125, "rewards/accuracies": 1.0, "rewards/chosen": -0.19509869813919067, "rewards/margins": 0.0320330373942852, "rewards/rejected": -0.22713173925876617, "step": 112 }, { "epoch": 0.1780929866036249, "grad_norm": 0.5140707492828369, "learning_rate": 3.52755905511811e-06, "log_odds_chosen": 0.38427114486694336, "log_odds_ratio": -0.526176929473877, "logits/chosen": -0.3687783181667328, "logits/rejected": -0.2154863178730011, "logps/chosen": -1.7950341701507568, "logps/rejected": -2.127782106399536, "loss": 2.0463, "nll_loss": 1.9936522245407104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1795034110546112, "rewards/margins": 0.03327479213476181, "rewards/rejected": -0.21277819573879242, "step": 113 }, { "epoch": 0.17966903073286053, "grad_norm": 0.48041167855262756, "learning_rate": 3.559055118110236e-06, "log_odds_chosen": 0.2929834723472595, "log_odds_ratio": -0.5727202892303467, "logits/chosen": -0.11503149569034576, "logits/rejected": -0.22482499480247498, "logps/chosen": -1.921325922012329, "logps/rejected": -2.181408405303955, "loss": 2.167, "nll_loss": 2.1096832752227783, "rewards/accuracies": 0.875, "rewards/chosen": -0.1921325922012329, "rewards/margins": 0.026008253917098045, "rewards/rejected": -0.2181408405303955, "step": 114 }, { "epoch": 0.18124507486209615, "grad_norm": 0.483804315328598, "learning_rate": 3.590551181102362e-06, "log_odds_chosen": 0.46048107743263245, "log_odds_ratio": -0.4994431436061859, "logits/chosen": -0.21135865151882172, "logits/rejected": -0.4032437205314636, "logps/chosen": -1.8342138528823853, "logps/rejected": -2.2355825901031494, "loss": 2.0671, "nll_loss": 2.017176389694214, "rewards/accuracies": 0.875, "rewards/chosen": -0.18342137336730957, "rewards/margins": 0.040136873722076416, "rewards/rejected": -0.2235582321882248, "step": 115 }, { "epoch": 0.18282111899133174, "grad_norm": 0.4813881814479828, "learning_rate": 3.622047244094488e-06, "log_odds_chosen": 0.3711977005004883, "log_odds_ratio": -0.5412157773971558, "logits/chosen": -0.22255532443523407, "logits/rejected": -0.2175833135843277, "logps/chosen": -1.8901299238204956, "logps/rejected": -2.2157669067382812, "loss": 2.1143, "nll_loss": 2.060180187225342, "rewards/accuracies": 0.875, "rewards/chosen": -0.18901298940181732, "rewards/margins": 0.032563693821430206, "rewards/rejected": -0.22157667577266693, "step": 116 }, { "epoch": 0.18439716312056736, "grad_norm": 0.4870263934135437, "learning_rate": 3.653543307086614e-06, "log_odds_chosen": 0.526931881904602, "log_odds_ratio": -0.47742602229118347, "logits/chosen": -0.2706168293952942, "logits/rejected": -0.5030975341796875, "logps/chosen": -1.8207095861434937, "logps/rejected": -2.2792084217071533, "loss": 2.0652, "nll_loss": 2.0174200534820557, "rewards/accuracies": 0.875, "rewards/chosen": -0.18207095563411713, "rewards/margins": 0.045849889516830444, "rewards/rejected": -0.22792083024978638, "step": 117 }, { "epoch": 0.18597320724980299, "grad_norm": 0.47839877009391785, "learning_rate": 3.68503937007874e-06, "log_odds_chosen": 0.30926501750946045, "log_odds_ratio": -0.5605666637420654, "logits/chosen": -0.08142746239900589, "logits/rejected": -0.2830328047275543, "logps/chosen": -1.83827805519104, "logps/rejected": -2.1060383319854736, "loss": 2.0833, "nll_loss": 2.0272810459136963, "rewards/accuracies": 0.875, "rewards/chosen": -0.18382780253887177, "rewards/margins": 0.026776034384965897, "rewards/rejected": -0.21060383319854736, "step": 118 }, { "epoch": 0.1875492513790386, "grad_norm": 0.4611856937408447, "learning_rate": 3.716535433070866e-06, "log_odds_chosen": 0.30054840445518494, "log_odds_ratio": -0.5679630041122437, "logits/chosen": -0.13648174703121185, "logits/rejected": -0.2771826982498169, "logps/chosen": -1.8114385604858398, "logps/rejected": -2.073773145675659, "loss": 2.0738, "nll_loss": 2.016970157623291, "rewards/accuracies": 0.875, "rewards/chosen": -0.1811438798904419, "rewards/margins": 0.02623344026505947, "rewards/rejected": -0.20737731456756592, "step": 119 }, { "epoch": 0.18912529550827423, "grad_norm": 0.4521031677722931, "learning_rate": 3.748031496062992e-06, "log_odds_chosen": 0.6006217002868652, "log_odds_ratio": -0.44487234950065613, "logits/chosen": -0.18391111493110657, "logits/rejected": -0.23368988931179047, "logps/chosen": -1.713165283203125, "logps/rejected": -2.2281906604766846, "loss": 1.9444, "nll_loss": 1.8999552726745605, "rewards/accuracies": 1.0, "rewards/chosen": -0.17131653428077698, "rewards/margins": 0.05150254815816879, "rewards/rejected": -0.22281907498836517, "step": 120 }, { "epoch": 0.19070133963750985, "grad_norm": 0.4375765323638916, "learning_rate": 3.779527559055118e-06, "log_odds_chosen": 0.607134222984314, "log_odds_ratio": -0.4383259415626526, "logits/chosen": -0.2432423233985901, "logits/rejected": -0.42568087577819824, "logps/chosen": -1.8271077871322632, "logps/rejected": -2.3578097820281982, "loss": 2.0471, "nll_loss": 2.0032753944396973, "rewards/accuracies": 1.0, "rewards/chosen": -0.18271078169345856, "rewards/margins": 0.05307020992040634, "rewards/rejected": -0.2357809841632843, "step": 121 }, { "epoch": 0.19227738376674547, "grad_norm": 0.4764109253883362, "learning_rate": 3.8110236220472436e-06, "log_odds_chosen": 0.5355339050292969, "log_odds_ratio": -0.47745591402053833, "logits/chosen": -0.30387943983078003, "logits/rejected": -0.23297454416751862, "logps/chosen": -1.7462990283966064, "logps/rejected": -2.2124016284942627, "loss": 1.9695, "nll_loss": 1.9217469692230225, "rewards/accuracies": 0.875, "rewards/chosen": -0.17462992668151855, "rewards/margins": 0.04661024361848831, "rewards/rejected": -0.22124016284942627, "step": 122 }, { "epoch": 0.1938534278959811, "grad_norm": 0.4778152108192444, "learning_rate": 3.8425196850393695e-06, "log_odds_chosen": 0.39439237117767334, "log_odds_ratio": -0.5168735384941101, "logits/chosen": -0.06380043923854828, "logits/rejected": -0.3037663400173187, "logps/chosen": -1.8502298593521118, "logps/rejected": -2.1919362545013428, "loss": 2.0886, "nll_loss": 2.036864757537842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1850229799747467, "rewards/margins": 0.03417064994573593, "rewards/rejected": -0.21919363737106323, "step": 123 }, { "epoch": 0.1954294720252167, "grad_norm": 0.42974886298179626, "learning_rate": 3.874015748031496e-06, "log_odds_chosen": 0.4634344279766083, "log_odds_ratio": -0.5001177787780762, "logits/chosen": -0.25694918632507324, "logits/rejected": -0.22004857659339905, "logps/chosen": -1.7307007312774658, "logps/rejected": -2.1188673973083496, "loss": 1.9529, "nll_loss": 1.9028429985046387, "rewards/accuracies": 1.0, "rewards/chosen": -0.17307007312774658, "rewards/margins": 0.03881664574146271, "rewards/rejected": -0.2118867039680481, "step": 124 }, { "epoch": 0.19700551615445233, "grad_norm": 0.4418664276599884, "learning_rate": 3.905511811023622e-06, "log_odds_chosen": 0.4357145130634308, "log_odds_ratio": -0.505855143070221, "logits/chosen": -0.15186086297035217, "logits/rejected": -0.4162241220474243, "logps/chosen": -1.7670007944107056, "logps/rejected": -2.140122890472412, "loss": 2.0107, "nll_loss": 1.9601045846939087, "rewards/accuracies": 1.0, "rewards/chosen": -0.17670010030269623, "rewards/margins": 0.03731219470500946, "rewards/rejected": -0.2140122801065445, "step": 125 }, { "epoch": 0.19858156028368795, "grad_norm": 0.47172582149505615, "learning_rate": 3.937007874015748e-06, "log_odds_chosen": 0.6300610899925232, "log_odds_ratio": -0.4334939122200012, "logits/chosen": -0.1414909064769745, "logits/rejected": -0.23909084498882294, "logps/chosen": -1.773337483406067, "logps/rejected": -2.3242154121398926, "loss": 1.998, "nll_loss": 1.9546327590942383, "rewards/accuracies": 1.0, "rewards/chosen": -0.1773337423801422, "rewards/margins": 0.05508778989315033, "rewards/rejected": -0.23242153227329254, "step": 126 }, { "epoch": 0.20015760441292357, "grad_norm": 0.4991217255592346, "learning_rate": 3.9685039370078736e-06, "log_odds_chosen": 0.4967314600944519, "log_odds_ratio": -0.4841119050979614, "logits/chosen": -0.11699728667736053, "logits/rejected": -0.20840948820114136, "logps/chosen": -1.8117221593856812, "logps/rejected": -2.245494842529297, "loss": 2.0711, "nll_loss": 2.0227043628692627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1811722218990326, "rewards/margins": 0.04337725043296814, "rewards/rejected": -0.22454948723316193, "step": 127 }, { "epoch": 0.2017336485421592, "grad_norm": 0.45142993330955505, "learning_rate": 4e-06, "log_odds_chosen": 0.3186233639717102, "log_odds_ratio": -0.5520759224891663, "logits/chosen": -0.007777207065373659, "logits/rejected": -0.0923052504658699, "logps/chosen": -1.8868976831436157, "logps/rejected": -2.1636693477630615, "loss": 2.1041, "nll_loss": 2.0488626956939697, "rewards/accuracies": 0.75, "rewards/chosen": -0.18868978321552277, "rewards/margins": 0.02767716720700264, "rewards/rejected": -0.2163669317960739, "step": 128 }, { "epoch": 0.2033096926713948, "grad_norm": 0.4361562430858612, "learning_rate": 3.999992445477635e-06, "log_odds_chosen": 0.45227304100990295, "log_odds_ratio": -0.5005910396575928, "logits/chosen": -0.04890897497534752, "logits/rejected": -0.3455308675765991, "logps/chosen": -1.8800231218338013, "logps/rejected": -2.2768943309783936, "loss": 2.0833, "nll_loss": 2.0331974029541016, "rewards/accuracies": 0.875, "rewards/chosen": -0.1880023181438446, "rewards/margins": 0.03968711942434311, "rewards/rejected": -0.2276894450187683, "step": 129 }, { "epoch": 0.2048857368006304, "grad_norm": 0.43126383423805237, "learning_rate": 3.999969781967615e-06, "log_odds_chosen": 0.27507030963897705, "log_odds_ratio": -0.5684081315994263, "logits/chosen": -0.03611285984516144, "logits/rejected": -0.18150243163108826, "logps/chosen": -1.980247139930725, "logps/rejected": -2.222841501235962, "loss": 2.1813, "nll_loss": 2.1244537830352783, "rewards/accuracies": 1.0, "rewards/chosen": -0.198024719953537, "rewards/margins": 0.024259435012936592, "rewards/rejected": -0.22228413820266724, "step": 130 }, { "epoch": 0.20646178092986603, "grad_norm": 0.39808744192123413, "learning_rate": 3.99993200964115e-06, "log_odds_chosen": 0.37286853790283203, "log_odds_ratio": -0.5256955623626709, "logits/chosen": -0.13878293335437775, "logits/rejected": -0.38629403710365295, "logps/chosen": -1.8050813674926758, "logps/rejected": -2.1250405311584473, "loss": 2.0229, "nll_loss": 1.9703779220581055, "rewards/accuracies": 1.0, "rewards/chosen": -0.18050813674926758, "rewards/margins": 0.031995922327041626, "rewards/rejected": -0.2125040590763092, "step": 131 }, { "epoch": 0.20803782505910165, "grad_norm": 0.49582305550575256, "learning_rate": 3.99987912878359e-06, "log_odds_chosen": 0.1634901463985443, "log_odds_ratio": -0.6200548410415649, "logits/chosen": -0.09574344009160995, "logits/rejected": -0.01687694527208805, "logps/chosen": -1.7705621719360352, "logps/rejected": -1.9035704135894775, "loss": 2.0395, "nll_loss": 1.977489709854126, "rewards/accuracies": 0.75, "rewards/chosen": -0.1770562380552292, "rewards/margins": 0.013300813734531403, "rewards/rejected": -0.19035704433918, "step": 132 }, { "epoch": 0.20961386918833727, "grad_norm": 0.42744600772857666, "learning_rate": 3.999811139794429e-06, "log_odds_chosen": 0.3788083493709564, "log_odds_ratio": -0.5247887372970581, "logits/chosen": -0.06136152893304825, "logits/rejected": -0.26565802097320557, "logps/chosen": -1.8101221323013306, "logps/rejected": -2.135667324066162, "loss": 2.0189, "nll_loss": 1.9664689302444458, "rewards/accuracies": 1.0, "rewards/chosen": -0.18101221323013306, "rewards/margins": 0.032554514706134796, "rewards/rejected": -0.21356670558452606, "step": 133 }, { "epoch": 0.2111899133175729, "grad_norm": 0.4283032715320587, "learning_rate": 3.999728043187288e-06, "log_odds_chosen": 0.35587507486343384, "log_odds_ratio": -0.5397139191627502, "logits/chosen": -0.04053102061152458, "logits/rejected": -0.2791404724121094, "logps/chosen": -1.8541502952575684, "logps/rejected": -2.164419412612915, "loss": 2.0887, "nll_loss": 2.0347094535827637, "rewards/accuracies": 0.875, "rewards/chosen": -0.18541501462459564, "rewards/margins": 0.03102692775428295, "rewards/rejected": -0.21644194424152374, "step": 134 }, { "epoch": 0.2127659574468085, "grad_norm": 0.42146554589271545, "learning_rate": 3.999629839589922e-06, "log_odds_chosen": 0.22164756059646606, "log_odds_ratio": -0.5917123556137085, "logits/chosen": 0.012096976861357689, "logits/rejected": -0.4592039883136749, "logps/chosen": -1.9410529136657715, "logps/rejected": -2.1342885494232178, "loss": 2.1714, "nll_loss": 2.1121792793273926, "rewards/accuracies": 0.875, "rewards/chosen": -0.19410529732704163, "rewards/margins": 0.01932355761528015, "rewards/rejected": -0.21342885494232178, "step": 135 }, { "epoch": 0.21434200157604413, "grad_norm": 0.4338121712207794, "learning_rate": 3.999516529744215e-06, "log_odds_chosen": 0.3292469382286072, "log_odds_ratio": -0.5459690690040588, "logits/chosen": 0.07608616352081299, "logits/rejected": -0.40737682580947876, "logps/chosen": -1.8194847106933594, "logps/rejected": -2.10274600982666, "loss": 2.0403, "nll_loss": 1.9856888055801392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819484829902649, "rewards/margins": 0.02832612209022045, "rewards/rejected": -0.2102746069431305, "step": 136 }, { "epoch": 0.21591804570527975, "grad_norm": 0.3987630605697632, "learning_rate": 3.999388114506166e-06, "log_odds_chosen": 0.4010235667228699, "log_odds_ratio": -0.518801212310791, "logits/chosen": -0.046873223036527634, "logits/rejected": -0.4238424599170685, "logps/chosen": -1.7170600891113281, "logps/rejected": -2.0600481033325195, "loss": 1.931, "nll_loss": 1.8791390657424927, "rewards/accuracies": 1.0, "rewards/chosen": -0.17170599102973938, "rewards/margins": 0.034298814833164215, "rewards/rejected": -0.206004798412323, "step": 137 }, { "epoch": 0.21749408983451538, "grad_norm": 0.4006027579307556, "learning_rate": 3.999244594845892e-06, "log_odds_chosen": 0.25786924362182617, "log_odds_ratio": -0.5790534019470215, "logits/chosen": -0.023555610328912735, "logits/rejected": -0.35695680975914, "logps/chosen": -1.7636680603027344, "logps/rejected": -1.985347032546997, "loss": 1.9912, "nll_loss": 1.9332479238510132, "rewards/accuracies": 0.875, "rewards/chosen": -0.17636680603027344, "rewards/margins": 0.02216789685189724, "rewards/rejected": -0.19853469729423523, "step": 138 }, { "epoch": 0.219070133963751, "grad_norm": 0.4049820303916931, "learning_rate": 3.999085971847616e-06, "log_odds_chosen": 0.24562738835811615, "log_odds_ratio": -0.58580482006073, "logits/chosen": -0.012129198759794235, "logits/rejected": -0.512136697769165, "logps/chosen": -1.8218495845794678, "logps/rejected": -2.0348801612854004, "loss": 2.0389, "nll_loss": 1.980314016342163, "rewards/accuracies": 0.875, "rewards/chosen": -0.18218494951725006, "rewards/margins": 0.02130305953323841, "rewards/rejected": -0.20348800718784332, "step": 139 }, { "epoch": 0.22064617809298662, "grad_norm": 0.3915075361728668, "learning_rate": 3.998912246709658e-06, "log_odds_chosen": 0.3664124310016632, "log_odds_ratio": -0.5369927287101746, "logits/chosen": -0.08141148090362549, "logits/rejected": -0.4445778429508209, "logps/chosen": -1.7210732698440552, "logps/rejected": -2.033111572265625, "loss": 1.9395, "nll_loss": 1.885791301727295, "rewards/accuracies": 0.875, "rewards/chosen": -0.17210730910301208, "rewards/margins": 0.0312038566917181, "rewards/rejected": -0.20331117510795593, "step": 140 }, { "epoch": 0.2222222222222222, "grad_norm": 0.42792966961860657, "learning_rate": 3.9987234207444295e-06, "log_odds_chosen": 0.3271371126174927, "log_odds_ratio": -0.5472476482391357, "logits/chosen": -0.05070888251066208, "logits/rejected": -0.30135494470596313, "logps/chosen": -1.817905068397522, "logps/rejected": -2.099316358566284, "loss": 2.0393, "nll_loss": 1.984588384628296, "rewards/accuracies": 1.0, "rewards/chosen": -0.1817905157804489, "rewards/margins": 0.028141150251030922, "rewards/rejected": -0.2099316567182541, "step": 141 }, { "epoch": 0.22379826635145783, "grad_norm": 0.40370792150497437, "learning_rate": 3.998519495378419e-06, "log_odds_chosen": 0.34052586555480957, "log_odds_ratio": -0.5426381826400757, "logits/chosen": 0.0529387891292572, "logits/rejected": -0.22841407358646393, "logps/chosen": -1.896314024925232, "logps/rejected": -2.194129705429077, "loss": 2.0889, "nll_loss": 2.0345916748046875, "rewards/accuracies": 1.0, "rewards/chosen": -0.189631387591362, "rewards/margins": 0.029781583696603775, "rewards/rejected": -0.21941299736499786, "step": 142 }, { "epoch": 0.22537431048069345, "grad_norm": 0.36527448892593384, "learning_rate": 3.998300472152187e-06, "log_odds_chosen": 0.3244752287864685, "log_odds_ratio": -0.5529875755310059, "logits/chosen": -0.07975448668003082, "logits/rejected": -0.5476751923561096, "logps/chosen": -1.6814563274383545, "logps/rejected": -1.9556654691696167, "loss": 1.9047, "nll_loss": 1.8494292497634888, "rewards/accuracies": 0.875, "rewards/chosen": -0.16814564168453217, "rewards/margins": 0.02742091566324234, "rewards/rejected": -0.1955665647983551, "step": 143 }, { "epoch": 0.22695035460992907, "grad_norm": 0.39946654438972473, "learning_rate": 3.998066352720347e-06, "log_odds_chosen": 0.4114699959754944, "log_odds_ratio": -0.5125808715820312, "logits/chosen": 0.10884220898151398, "logits/rejected": -0.2769298553466797, "logps/chosen": -1.807681679725647, "logps/rejected": -2.163328170776367, "loss": 2.0156, "nll_loss": 1.9643856287002563, "rewards/accuracies": 1.0, "rewards/chosen": -0.1807681769132614, "rewards/margins": 0.035564638674259186, "rewards/rejected": -0.2163328230381012, "step": 144 }, { "epoch": 0.2285263987391647, "grad_norm": 0.3883441388607025, "learning_rate": 3.997817138851562e-06, "log_odds_chosen": 0.3042789101600647, "log_odds_ratio": -0.5641068816184998, "logits/chosen": 0.047148481011390686, "logits/rejected": -0.2664666771888733, "logps/chosen": -1.894091248512268, "logps/rejected": -2.1613922119140625, "loss": 2.1037, "nll_loss": 2.0473220348358154, "rewards/accuracies": 0.875, "rewards/chosen": -0.18940910696983337, "rewards/margins": 0.0267301294952631, "rewards/rejected": -0.21613925695419312, "step": 145 }, { "epoch": 0.23010244286840031, "grad_norm": 0.3664163053035736, "learning_rate": 3.997552832428522e-06, "log_odds_chosen": 0.40218284726142883, "log_odds_ratio": -0.5150843262672424, "logits/chosen": -0.06997128576040268, "logits/rejected": -0.30114221572875977, "logps/chosen": -1.683500051498413, "logps/rejected": -2.0218000411987305, "loss": 1.9221, "nll_loss": 1.8706339597702026, "rewards/accuracies": 1.0, "rewards/chosen": -0.1683500111103058, "rewards/margins": 0.03382997214794159, "rewards/rejected": -0.20217998325824738, "step": 146 }, { "epoch": 0.23167848699763594, "grad_norm": 0.35738253593444824, "learning_rate": 3.9972734354479366e-06, "log_odds_chosen": 0.33341336250305176, "log_odds_ratio": -0.5498776435852051, "logits/chosen": 0.05102141201496124, "logits/rejected": -0.588348925113678, "logps/chosen": -1.6781635284423828, "logps/rejected": -1.957244634628296, "loss": 1.9026, "nll_loss": 1.84757661819458, "rewards/accuracies": 0.875, "rewards/chosen": -0.1678163707256317, "rewards/margins": 0.027908099815249443, "rewards/rejected": -0.1957244724035263, "step": 147 }, { "epoch": 0.23325453112687156, "grad_norm": 0.40279892086982727, "learning_rate": 3.996978950020517e-06, "log_odds_chosen": 0.21276284754276276, "log_odds_ratio": -0.6048003435134888, "logits/chosen": 0.1419999599456787, "logits/rejected": -0.28115570545196533, "logps/chosen": -1.8639137744903564, "logps/rejected": -2.0437545776367188, "loss": 2.0862, "nll_loss": 2.0256824493408203, "rewards/accuracies": 0.75, "rewards/chosen": -0.18639138340950012, "rewards/margins": 0.01798408292233944, "rewards/rejected": -0.2043754756450653, "step": 148 }, { "epoch": 0.23483057525610718, "grad_norm": 0.35266202688217163, "learning_rate": 3.996669378370959e-06, "log_odds_chosen": 0.5316495299339294, "log_odds_ratio": -0.46872708201408386, "logits/chosen": 0.010169305838644505, "logits/rejected": -0.3486666679382324, "logps/chosen": -1.6488699913024902, "logps/rejected": -2.1011159420013428, "loss": 1.8663, "nll_loss": 1.8194066286087036, "rewards/accuracies": 1.0, "rewards/chosen": -0.16488701105117798, "rewards/margins": 0.045224592089653015, "rewards/rejected": -0.2101115882396698, "step": 149 }, { "epoch": 0.2364066193853428, "grad_norm": 0.39699894189834595, "learning_rate": 3.996344722837929e-06, "log_odds_chosen": 0.49894896149635315, "log_odds_ratio": -0.4788591265678406, "logits/chosen": -0.0008517892565578222, "logits/rejected": -0.31570884585380554, "logps/chosen": -1.8067049980163574, "logps/rejected": -2.239896535873413, "loss": 2.0051, "nll_loss": 1.95723295211792, "rewards/accuracies": 1.0, "rewards/chosen": -0.18067049980163574, "rewards/margins": 0.043319158256053925, "rewards/rejected": -0.22398965060710907, "step": 150 }, { "epoch": 0.23798266351457842, "grad_norm": 0.33054688572883606, "learning_rate": 3.996004985874043e-06, "log_odds_chosen": 0.2428944855928421, "log_odds_ratio": -0.5818551778793335, "logits/chosen": 0.11997652053833008, "logits/rejected": -0.2498077005147934, "logps/chosen": -1.698814868927002, "logps/rejected": -1.9012809991836548, "loss": 1.9266, "nll_loss": 1.8684155941009521, "rewards/accuracies": 1.0, "rewards/chosen": -0.16988149285316467, "rewards/margins": 0.020246637985110283, "rewards/rejected": -0.19012810289859772, "step": 151 }, { "epoch": 0.23955870764381404, "grad_norm": 0.3768727481365204, "learning_rate": 3.995650170045855e-06, "log_odds_chosen": 0.28013092279434204, "log_odds_ratio": -0.5691770315170288, "logits/chosen": 0.045284271240234375, "logits/rejected": -0.3556942641735077, "logps/chosen": -1.7669368982315063, "logps/rejected": -2.0047860145568848, "loss": 1.9804, "nll_loss": 1.9234654903411865, "rewards/accuracies": 0.875, "rewards/chosen": -0.17669367790222168, "rewards/margins": 0.023784920573234558, "rewards/rejected": -0.20047861337661743, "step": 152 }, { "epoch": 0.24113475177304963, "grad_norm": 0.3691398501396179, "learning_rate": 3.995280278033825e-06, "log_odds_chosen": 0.20656853914260864, "log_odds_ratio": -0.6042188405990601, "logits/chosen": 0.11217048764228821, "logits/rejected": -0.24175406992435455, "logps/chosen": -1.8632197380065918, "logps/rejected": -2.039140462875366, "loss": 2.0785, "nll_loss": 2.0180578231811523, "rewards/accuracies": 0.75, "rewards/chosen": -0.18632197380065918, "rewards/margins": 0.017592042684555054, "rewards/rejected": -0.20391403138637543, "step": 153 }, { "epoch": 0.24271079590228525, "grad_norm": 0.35842734575271606, "learning_rate": 3.994895312632314e-06, "log_odds_chosen": 0.4981134533882141, "log_odds_ratio": -0.4802461564540863, "logits/chosen": 0.16929033398628235, "logits/rejected": -0.7549564838409424, "logps/chosen": -1.7397829294204712, "logps/rejected": -2.1666393280029297, "loss": 1.9514, "nll_loss": 1.903334140777588, "rewards/accuracies": 1.0, "rewards/chosen": -0.1739783138036728, "rewards/margins": 0.04268564283847809, "rewards/rejected": -0.21666395664215088, "step": 154 }, { "epoch": 0.24428684003152087, "grad_norm": 0.33668240904808044, "learning_rate": 3.994495276749549e-06, "log_odds_chosen": 0.47896263003349304, "log_odds_ratio": -0.48850810527801514, "logits/chosen": 0.07264027744531631, "logits/rejected": -0.33893775939941406, "logps/chosen": -1.7592413425445557, "logps/rejected": -2.172550916671753, "loss": 1.9568, "nll_loss": 1.9079933166503906, "rewards/accuracies": 1.0, "rewards/chosen": -0.175924152135849, "rewards/margins": 0.04133095592260361, "rewards/rejected": -0.217255100607872, "step": 155 }, { "epoch": 0.2458628841607565, "grad_norm": 0.34656822681427, "learning_rate": 3.994080173407612e-06, "log_odds_chosen": 0.23366691172122955, "log_odds_ratio": -0.5879085063934326, "logits/chosen": 0.1407233029603958, "logits/rejected": -0.3865904211997986, "logps/chosen": -1.7929620742797852, "logps/rejected": -1.9902442693710327, "loss": 1.9994, "nll_loss": 1.9406037330627441, "rewards/accuracies": 0.75, "rewards/chosen": -0.17929621040821075, "rewards/margins": 0.01972820609807968, "rewards/rejected": -0.19902442395687103, "step": 156 }, { "epoch": 0.24743892828999212, "grad_norm": 0.34390729665756226, "learning_rate": 3.993650005742409e-06, "log_odds_chosen": 0.3310392498970032, "log_odds_ratio": -0.546916127204895, "logits/chosen": 0.06716110557317734, "logits/rejected": -0.23844899237155914, "logps/chosen": -1.7643970251083374, "logps/rejected": -2.046977996826172, "loss": 1.9724, "nll_loss": 1.9176855087280273, "rewards/accuracies": 1.0, "rewards/chosen": -0.17643971741199493, "rewards/margins": 0.028258096426725388, "rewards/rejected": -0.20469780266284943, "step": 157 }, { "epoch": 0.24901497241922774, "grad_norm": 0.3150049149990082, "learning_rate": 3.993204777003652e-06, "log_odds_chosen": 0.337340384721756, "log_odds_ratio": -0.5445095896720886, "logits/chosen": 0.07445216178894043, "logits/rejected": -0.441582053899765, "logps/chosen": -1.6941965818405151, "logps/rejected": -1.980376124382019, "loss": 1.898, "nll_loss": 1.843545913696289, "rewards/accuracies": 1.0, "rewards/chosen": -0.16941964626312256, "rewards/margins": 0.02861795574426651, "rewards/rejected": -0.19803762435913086, "step": 158 }, { "epoch": 0.25059101654846333, "grad_norm": 0.3337211012840271, "learning_rate": 3.992744490554832e-06, "log_odds_chosen": 0.33180904388427734, "log_odds_ratio": -0.5461182594299316, "logits/chosen": 0.21238459646701813, "logits/rejected": -0.269389808177948, "logps/chosen": -1.7364294528961182, "logps/rejected": -2.0181472301483154, "loss": 1.9565, "nll_loss": 1.9018971920013428, "rewards/accuracies": 1.0, "rewards/chosen": -0.17364296317100525, "rewards/margins": 0.028171781450510025, "rewards/rejected": -0.20181472599506378, "step": 159 }, { "epoch": 0.25216706067769895, "grad_norm": 0.31171178817749023, "learning_rate": 3.992269149873192e-06, "log_odds_chosen": 0.32716354727745056, "log_odds_ratio": -0.547857403755188, "logits/chosen": 0.028445789590477943, "logits/rejected": -0.35958513617515564, "logps/chosen": -1.7089133262634277, "logps/rejected": -1.9816714525222778, "loss": 1.9143, "nll_loss": 1.8594882488250732, "rewards/accuracies": 1.0, "rewards/chosen": -0.17089134454727173, "rewards/margins": 0.02727578952908516, "rewards/rejected": -0.19816714525222778, "step": 160 }, { "epoch": 0.25374310480693457, "grad_norm": 0.30663299560546875, "learning_rate": 3.991778758549705e-06, "log_odds_chosen": 0.195327490568161, "log_odds_ratio": -0.6073517799377441, "logits/chosen": 0.2354024052619934, "logits/rejected": -0.21214549243450165, "logps/chosen": -1.8341491222381592, "logps/rejected": -2.0042359828948975, "loss": 2.023, "nll_loss": 1.9623081684112549, "rewards/accuracies": 0.75, "rewards/chosen": -0.18341490626335144, "rewards/margins": 0.017008693888783455, "rewards/rejected": -0.20042361319065094, "step": 161 }, { "epoch": 0.2553191489361702, "grad_norm": 0.31510376930236816, "learning_rate": 3.9912733202890415e-06, "log_odds_chosen": 0.32201409339904785, "log_odds_ratio": -0.548933744430542, "logits/chosen": 0.10407942533493042, "logits/rejected": -0.4005528688430786, "logps/chosen": -1.7570445537567139, "logps/rejected": -2.0296471118927, "loss": 1.96, "nll_loss": 1.905151128768921, "rewards/accuracies": 1.0, "rewards/chosen": -0.1757044792175293, "rewards/margins": 0.027260230854153633, "rewards/rejected": -0.20296470820903778, "step": 162 }, { "epoch": 0.2568951930654058, "grad_norm": 0.3028022050857544, "learning_rate": 3.990752838909548e-06, "log_odds_chosen": 0.1706855297088623, "log_odds_ratio": -0.6158415079116821, "logits/chosen": 0.1903667449951172, "logits/rejected": -0.2845366299152374, "logps/chosen": -1.8539068698883057, "logps/rejected": -1.9989856481552124, "loss": 2.0475, "nll_loss": 1.9859018325805664, "rewards/accuracies": 0.75, "rewards/chosen": -0.18539069592952728, "rewards/margins": 0.014507867395877838, "rewards/rejected": -0.19989855587482452, "step": 163 }, { "epoch": 0.25847123719464143, "grad_norm": 0.3319096267223358, "learning_rate": 3.990217318343213e-06, "log_odds_chosen": 0.5035750269889832, "log_odds_ratio": -0.4748569130897522, "logits/chosen": 0.15394604206085205, "logits/rejected": -0.5184996128082275, "logps/chosen": -1.921760082244873, "logps/rejected": -2.365640163421631, "loss": 2.1041, "nll_loss": 2.0565714836120605, "rewards/accuracies": 1.0, "rewards/chosen": -0.19217601418495178, "rewards/margins": 0.044387996196746826, "rewards/rejected": -0.2365640103816986, "step": 164 }, { "epoch": 0.26004728132387706, "grad_norm": 0.3104284405708313, "learning_rate": 3.989666762635637e-06, "log_odds_chosen": 0.2950212061405182, "log_odds_ratio": -0.5599942803382874, "logits/chosen": 0.1402396708726883, "logits/rejected": -0.27953556180000305, "logps/chosen": -1.767073631286621, "logps/rejected": -2.017169713973999, "loss": 1.9826, "nll_loss": 1.926632285118103, "rewards/accuracies": 1.0, "rewards/chosen": -0.17670737206935883, "rewards/margins": 0.025009607896208763, "rewards/rejected": -0.20171695947647095, "step": 165 }, { "epoch": 0.2616233254531127, "grad_norm": 0.30276185274124146, "learning_rate": 3.9891011759460056e-06, "log_odds_chosen": 0.3634149432182312, "log_odds_ratio": -0.5344185829162598, "logits/chosen": 0.1359172910451889, "logits/rejected": -0.3355481028556824, "logps/chosen": -1.75217866897583, "logps/rejected": -2.055960178375244, "loss": 1.958, "nll_loss": 1.9045321941375732, "rewards/accuracies": 1.0, "rewards/chosen": -0.1752178817987442, "rewards/margins": 0.03037814423441887, "rewards/rejected": -0.20559601485729218, "step": 166 }, { "epoch": 0.2631993695823483, "grad_norm": 0.2769591212272644, "learning_rate": 3.988520562547057e-06, "log_odds_chosen": 0.3933558762073517, "log_odds_ratio": -0.5260743498802185, "logits/chosen": 0.15155696868896484, "logits/rejected": -0.42779073119163513, "logps/chosen": -1.7146203517913818, "logps/rejected": -2.04492449760437, "loss": 1.9032, "nll_loss": 1.8506273031234741, "rewards/accuracies": 1.0, "rewards/chosen": -0.1714620441198349, "rewards/margins": 0.033030424267053604, "rewards/rejected": -0.2044924646615982, "step": 167 }, { "epoch": 0.2647754137115839, "grad_norm": 0.29901036620140076, "learning_rate": 3.987924926825047e-06, "log_odds_chosen": 0.40397408604621887, "log_odds_ratio": -0.5212303996086121, "logits/chosen": 0.12876684963703156, "logits/rejected": -0.32701608538627625, "logps/chosen": -1.790124773979187, "logps/rejected": -2.1377172470092773, "loss": 2.0015, "nll_loss": 1.9493852853775024, "rewards/accuracies": 0.875, "rewards/chosen": -0.1790124773979187, "rewards/margins": 0.03475925698876381, "rewards/rejected": -0.2137717306613922, "step": 168 }, { "epoch": 0.26635145784081954, "grad_norm": 0.2879612147808075, "learning_rate": 3.98731427327972e-06, "log_odds_chosen": 0.38767939805984497, "log_odds_ratio": -0.5189070105552673, "logits/chosen": 0.09615111351013184, "logits/rejected": -0.552105724811554, "logps/chosen": -1.7201054096221924, "logps/rejected": -2.047496795654297, "loss": 1.9089, "nll_loss": 1.8569788932800293, "rewards/accuracies": 1.0, "rewards/chosen": -0.17201054096221924, "rewards/margins": 0.03273913264274597, "rewards/rejected": -0.2047496885061264, "step": 169 }, { "epoch": 0.26792750197005516, "grad_norm": 0.2887536585330963, "learning_rate": 3.986688606524273e-06, "log_odds_chosen": 0.32007887959480286, "log_odds_ratio": -0.5568875670433044, "logits/chosen": 0.20907297730445862, "logits/rejected": -0.717089831829071, "logps/chosen": -1.7479276657104492, "logps/rejected": -2.0232884883880615, "loss": 1.9428, "nll_loss": 1.8871328830718994, "rewards/accuracies": 0.75, "rewards/chosen": -0.17479278147220612, "rewards/margins": 0.02753606252372265, "rewards/rejected": -0.20232883095741272, "step": 170 }, { "epoch": 0.2695035460992908, "grad_norm": 0.2829509675502777, "learning_rate": 3.986047931285315e-06, "log_odds_chosen": 0.493367999792099, "log_odds_ratio": -0.4808065891265869, "logits/chosen": 0.08703712373971939, "logits/rejected": -0.6400361657142639, "logps/chosen": -1.6581294536590576, "logps/rejected": -2.072545051574707, "loss": 1.848, "nll_loss": 1.7999612092971802, "rewards/accuracies": 1.0, "rewards/chosen": -0.16581295430660248, "rewards/margins": 0.041441574692726135, "rewards/rejected": -0.20725451409816742, "step": 171 }, { "epoch": 0.2710795902285264, "grad_norm": 0.27343836426734924, "learning_rate": 3.985392252402847e-06, "log_odds_chosen": 0.23880890011787415, "log_odds_ratio": -0.5835365653038025, "logits/chosen": 0.08178934454917908, "logits/rejected": -0.431425541639328, "logps/chosen": -1.655697226524353, "logps/rejected": -1.8529317378997803, "loss": 1.8557, "nll_loss": 1.7973511219024658, "rewards/accuracies": 1.0, "rewards/chosen": -0.1655697375535965, "rewards/margins": 0.0197234395891428, "rewards/rejected": -0.18529316782951355, "step": 172 }, { "epoch": 0.272655634357762, "grad_norm": 0.2862318754196167, "learning_rate": 3.984721574830206e-06, "log_odds_chosen": 0.27102503180503845, "log_odds_ratio": -0.5708762407302856, "logits/chosen": 0.14478133618831635, "logits/rejected": -0.510442316532135, "logps/chosen": -1.7600399255752563, "logps/rejected": -1.9898476600646973, "loss": 1.952, "nll_loss": 1.894890308380127, "rewards/accuracies": 1.0, "rewards/chosen": -0.17600399255752563, "rewards/margins": 0.02298077940940857, "rewards/rejected": -0.1989847719669342, "step": 173 }, { "epoch": 0.27423167848699764, "grad_norm": 0.26644009351730347, "learning_rate": 3.984035903634041e-06, "log_odds_chosen": 0.2790209650993347, "log_odds_ratio": -0.5701457262039185, "logits/chosen": 0.1971094161272049, "logits/rejected": -0.5762545466423035, "logps/chosen": -1.7363693714141846, "logps/rejected": -1.9710581302642822, "loss": 1.914, "nll_loss": 1.8569939136505127, "rewards/accuracies": 0.75, "rewards/chosen": -0.17363695800304413, "rewards/margins": 0.023468857631087303, "rewards/rejected": -0.19710581004619598, "step": 174 }, { "epoch": 0.27580772261623326, "grad_norm": 0.2771873474121094, "learning_rate": 3.983335243994273e-06, "log_odds_chosen": 0.37392759323120117, "log_odds_ratio": -0.5302526354789734, "logits/chosen": 0.21129730343818665, "logits/rejected": -0.5397756099700928, "logps/chosen": -1.726618766784668, "logps/rejected": -2.042837381362915, "loss": 1.9182, "nll_loss": 1.8651602268218994, "rewards/accuracies": 0.875, "rewards/chosen": -0.17266185581684113, "rewards/margins": 0.03162187710404396, "rewards/rejected": -0.20428375899791718, "step": 175 }, { "epoch": 0.2773837667454689, "grad_norm": 0.2980581223964691, "learning_rate": 3.982619601204049e-06, "log_odds_chosen": 0.2845771908760071, "log_odds_ratio": -0.565645158290863, "logits/chosen": 0.1371021270751953, "logits/rejected": -0.544967770576477, "logps/chosen": -1.784250259399414, "logps/rejected": -2.0266098976135254, "loss": 1.9752, "nll_loss": 1.9185881614685059, "rewards/accuracies": 1.0, "rewards/chosen": -0.17842502892017365, "rewards/margins": 0.02423596940934658, "rewards/rejected": -0.20266100764274597, "step": 176 }, { "epoch": 0.2789598108747045, "grad_norm": 0.28183117508888245, "learning_rate": 3.9818889806697085e-06, "log_odds_chosen": 0.37239906191825867, "log_odds_ratio": -0.5329021215438843, "logits/chosen": 0.24540948867797852, "logits/rejected": -0.537138044834137, "logps/chosen": -1.824477195739746, "logps/rejected": -2.1465721130371094, "loss": 1.9904, "nll_loss": 1.9371103048324585, "rewards/accuracies": 0.875, "rewards/chosen": -0.18244771659374237, "rewards/margins": 0.032209500670433044, "rewards/rejected": -0.21465720236301422, "step": 177 }, { "epoch": 0.2805358550039401, "grad_norm": 0.2755196988582611, "learning_rate": 3.98114338791074e-06, "log_odds_chosen": 0.4614989757537842, "log_odds_ratio": -0.5006829500198364, "logits/chosen": 0.11655398458242416, "logits/rejected": -0.4944975674152374, "logps/chosen": -1.6953446865081787, "logps/rejected": -2.088918685913086, "loss": 1.89, "nll_loss": 1.8398841619491577, "rewards/accuracies": 1.0, "rewards/chosen": -0.16953447461128235, "rewards/margins": 0.03935740143060684, "rewards/rejected": -0.2088918834924698, "step": 178 }, { "epoch": 0.28211189913317575, "grad_norm": 0.262363463640213, "learning_rate": 3.980382828559742e-06, "log_odds_chosen": 0.37132197618484497, "log_odds_ratio": -0.53047114610672, "logits/chosen": 0.1178860068321228, "logits/rejected": -0.5356312990188599, "logps/chosen": -1.7367223501205444, "logps/rejected": -2.051452159881592, "loss": 1.9086, "nll_loss": 1.8555084466934204, "rewards/accuracies": 0.875, "rewards/chosen": -0.17367224395275116, "rewards/margins": 0.03147297352552414, "rewards/rejected": -0.2051452100276947, "step": 179 }, { "epoch": 0.28368794326241137, "grad_norm": 0.2845723032951355, "learning_rate": 3.9796073083623774e-06, "log_odds_chosen": 0.33220580220222473, "log_odds_ratio": -0.5529859066009521, "logits/chosen": 0.13935938477516174, "logits/rejected": -0.5945897698402405, "logps/chosen": -1.805295705795288, "logps/rejected": -2.08857798576355, "loss": 1.9753, "nll_loss": 1.9200148582458496, "rewards/accuracies": 0.875, "rewards/chosen": -0.18052956461906433, "rewards/margins": 0.028328238055109978, "rewards/rejected": -0.20885780453681946, "step": 180 }, { "epoch": 0.285263987391647, "grad_norm": 0.2820357382297516, "learning_rate": 3.978816833177329e-06, "log_odds_chosen": 0.3633921444416046, "log_odds_ratio": -0.5330725312232971, "logits/chosen": 0.2247752994298935, "logits/rejected": -0.42544880509376526, "logps/chosen": -1.7320586442947388, "logps/rejected": -2.0406675338745117, "loss": 1.91, "nll_loss": 1.8566887378692627, "rewards/accuracies": 1.0, "rewards/chosen": -0.17320585250854492, "rewards/margins": 0.03086087293922901, "rewards/rejected": -0.20406673848628998, "step": 181 }, { "epoch": 0.2868400315208826, "grad_norm": 0.2795039713382721, "learning_rate": 3.978011408976261e-06, "log_odds_chosen": 0.2320139855146408, "log_odds_ratio": -0.5891825556755066, "logits/chosen": 0.19194500148296356, "logits/rejected": -0.5741883516311646, "logps/chosen": -1.750451683998108, "logps/rejected": -1.9439057111740112, "loss": 1.9425, "nll_loss": 1.8835428953170776, "rewards/accuracies": 0.75, "rewards/chosen": -0.1750451624393463, "rewards/margins": 0.019345413893461227, "rewards/rejected": -0.19439058005809784, "step": 182 }, { "epoch": 0.28841607565011823, "grad_norm": 0.2846316993236542, "learning_rate": 3.9771910418437674e-06, "log_odds_chosen": 0.3140004575252533, "log_odds_ratio": -0.54939204454422, "logits/chosen": 0.1681434065103531, "logits/rejected": -0.3001997172832489, "logps/chosen": -1.7808198928833008, "logps/rejected": -2.049476385116577, "loss": 1.956, "nll_loss": 1.9010967016220093, "rewards/accuracies": 1.0, "rewards/chosen": -0.17808198928833008, "rewards/margins": 0.026865659281611443, "rewards/rejected": -0.20494765043258667, "step": 183 }, { "epoch": 0.2899921197793538, "grad_norm": 0.2878468334674835, "learning_rate": 3.976355737977332e-06, "log_odds_chosen": 0.3773011863231659, "log_odds_ratio": -0.5303803086280823, "logits/chosen": 0.18860210478305817, "logits/rejected": -0.5023236274719238, "logps/chosen": -1.7276164293289185, "logps/rejected": -2.046980381011963, "loss": 1.9104, "nll_loss": 1.8573479652404785, "rewards/accuracies": 0.875, "rewards/chosen": -0.17276166379451752, "rewards/margins": 0.03193638473749161, "rewards/rejected": -0.20469802618026733, "step": 184 }, { "epoch": 0.2915681639085894, "grad_norm": 0.2533093988895416, "learning_rate": 3.975505503687274e-06, "log_odds_chosen": 0.3036259412765503, "log_odds_ratio": -0.5556970238685608, "logits/chosen": 0.23977245390415192, "logits/rejected": -0.28354203701019287, "logps/chosen": -1.7183681726455688, "logps/rejected": -1.9746772050857544, "loss": 1.8965, "nll_loss": 1.8409373760223389, "rewards/accuracies": 1.0, "rewards/chosen": -0.17183682322502136, "rewards/margins": 0.025630896911025047, "rewards/rejected": -0.19746771454811096, "step": 185 }, { "epoch": 0.29314420803782504, "grad_norm": 0.2554759383201599, "learning_rate": 3.974640345396708e-06, "log_odds_chosen": 0.458074152469635, "log_odds_ratio": -0.4967803359031677, "logits/chosen": 0.09675043821334839, "logits/rejected": -0.8243634104728699, "logps/chosen": -1.6910256147384644, "logps/rejected": -2.077892780303955, "loss": 1.8667, "nll_loss": 1.8170145750045776, "rewards/accuracies": 1.0, "rewards/chosen": -0.16910257935523987, "rewards/margins": 0.03868672996759415, "rewards/rejected": -0.20778930187225342, "step": 186 }, { "epoch": 0.29472025216706066, "grad_norm": 0.2935222387313843, "learning_rate": 3.9737602696414925e-06, "log_odds_chosen": 0.159349724650383, "log_odds_ratio": -0.6268662214279175, "logits/chosen": 0.21252146363258362, "logits/rejected": -0.6300286650657654, "logps/chosen": -1.78038489818573, "logps/rejected": -1.914283037185669, "loss": 1.9361, "nll_loss": 1.8734400272369385, "rewards/accuracies": 0.625, "rewards/chosen": -0.17803849279880524, "rewards/margins": 0.013389825820922852, "rewards/rejected": -0.1914283186197281, "step": 187 }, { "epoch": 0.2962962962962963, "grad_norm": 0.2445816546678543, "learning_rate": 3.972865283070179e-06, "log_odds_chosen": 0.38453128933906555, "log_odds_ratio": -0.5239704847335815, "logits/chosen": 0.16407378017902374, "logits/rejected": -0.7210597991943359, "logps/chosen": -1.6779310703277588, "logps/rejected": -2.003727436065674, "loss": 1.8617, "nll_loss": 1.8092904090881348, "rewards/accuracies": 1.0, "rewards/chosen": -0.16779311001300812, "rewards/margins": 0.032579630613327026, "rewards/rejected": -0.20037274062633514, "step": 188 }, { "epoch": 0.2978723404255319, "grad_norm": 0.2602602243423462, "learning_rate": 3.971955392443965e-06, "log_odds_chosen": 0.3589805066585541, "log_odds_ratio": -0.535193920135498, "logits/chosen": 0.19359144568443298, "logits/rejected": -0.5294786691665649, "logps/chosen": -1.7104607820510864, "logps/rejected": -2.017254114151001, "loss": 1.8819, "nll_loss": 1.8284274339675903, "rewards/accuracies": 1.0, "rewards/chosen": -0.17104607820510864, "rewards/margins": 0.03067934513092041, "rewards/rejected": -0.20172543823719025, "step": 189 }, { "epoch": 0.2994483845547675, "grad_norm": 0.2570762634277344, "learning_rate": 3.971030604636637e-06, "log_odds_chosen": 0.20797011256217957, "log_odds_ratio": -0.5995408892631531, "logits/chosen": 0.24219730496406555, "logits/rejected": -0.49577596783638, "logps/chosen": -1.726780891418457, "logps/rejected": -1.9002296924591064, "loss": 1.8985, "nll_loss": 1.8385004997253418, "rewards/accuracies": 0.875, "rewards/chosen": -0.17267809808254242, "rewards/margins": 0.017344871535897255, "rewards/rejected": -0.19002296030521393, "step": 190 }, { "epoch": 0.30102442868400314, "grad_norm": 0.27055835723876953, "learning_rate": 3.970090926634526e-06, "log_odds_chosen": 0.29205840826034546, "log_odds_ratio": -0.5609118938446045, "logits/chosen": 0.3021741211414337, "logits/rejected": -0.7257508635520935, "logps/chosen": -1.7830076217651367, "logps/rejected": -2.0307276248931885, "loss": 1.9615, "nll_loss": 1.9053698778152466, "rewards/accuracies": 1.0, "rewards/chosen": -0.17830076813697815, "rewards/margins": 0.024771984666585922, "rewards/rejected": -0.20307274162769318, "step": 191 }, { "epoch": 0.30260047281323876, "grad_norm": 0.27686426043510437, "learning_rate": 3.9691363655364526e-06, "log_odds_chosen": 0.3535913825035095, "log_odds_ratio": -0.5396946668624878, "logits/chosen": 0.19939753413200378, "logits/rejected": -0.527854859828949, "logps/chosen": -1.7547531127929688, "logps/rejected": -2.056049108505249, "loss": 1.9315, "nll_loss": 1.8775546550750732, "rewards/accuracies": 0.875, "rewards/chosen": -0.1754753142595291, "rewards/margins": 0.030129600316286087, "rewards/rejected": -0.2056049108505249, "step": 192 }, { "epoch": 0.3041765169424744, "grad_norm": 0.24814869463443756, "learning_rate": 3.968166928553666e-06, "log_odds_chosen": 0.29018789529800415, "log_odds_ratio": -0.5631506443023682, "logits/chosen": 0.23991963267326355, "logits/rejected": -0.6883436441421509, "logps/chosen": -1.708446741104126, "logps/rejected": -1.9508640766143799, "loss": 1.9016, "nll_loss": 1.8453017473220825, "rewards/accuracies": 0.875, "rewards/chosen": -0.1708446592092514, "rewards/margins": 0.02424173429608345, "rewards/rejected": -0.19508640468120575, "step": 193 }, { "epoch": 0.30575256107171, "grad_norm": 0.2546519637107849, "learning_rate": 3.967182623009804e-06, "log_odds_chosen": 0.42340725660324097, "log_odds_ratio": -0.5090901851654053, "logits/chosen": 0.18889212608337402, "logits/rejected": -0.5412212014198303, "logps/chosen": -1.7333446741104126, "logps/rejected": -2.0966084003448486, "loss": 1.8946, "nll_loss": 1.8436520099639893, "rewards/accuracies": 1.0, "rewards/chosen": -0.17333447933197021, "rewards/margins": 0.03632635623216629, "rewards/rejected": -0.2096608430147171, "step": 194 }, { "epoch": 0.3073286052009456, "grad_norm": 0.27097785472869873, "learning_rate": 3.966183456340821e-06, "log_odds_chosen": 0.19589565694332123, "log_odds_ratio": -0.6020736694335938, "logits/chosen": 0.23710918426513672, "logits/rejected": -0.5323060154914856, "logps/chosen": -1.7581830024719238, "logps/rejected": -1.9214140176773071, "loss": 1.9344, "nll_loss": 1.8741968870162964, "rewards/accuracies": 1.0, "rewards/chosen": -0.1758182942867279, "rewards/margins": 0.01632309891283512, "rewards/rejected": -0.19214141368865967, "step": 195 }, { "epoch": 0.30890464933018125, "grad_norm": 0.2723829448223114, "learning_rate": 3.965169436094947e-06, "log_odds_chosen": 0.35698461532592773, "log_odds_ratio": -0.5402282476425171, "logits/chosen": 0.22953951358795166, "logits/rejected": -0.668543815612793, "logps/chosen": -1.6529057025909424, "logps/rejected": -1.9540297985076904, "loss": 1.8363, "nll_loss": 1.7822985649108887, "rewards/accuracies": 0.875, "rewards/chosen": -0.16529057919979095, "rewards/margins": 0.030112413689494133, "rewards/rejected": -0.19540299475193024, "step": 196 }, { "epoch": 0.31048069345941687, "grad_norm": 0.26106390357017517, "learning_rate": 3.964140569932618e-06, "log_odds_chosen": 0.29018843173980713, "log_odds_ratio": -0.5803290009498596, "logits/chosen": 0.1857983022928238, "logits/rejected": -0.8773932456970215, "logps/chosen": -1.7773064374923706, "logps/rejected": -2.021932601928711, "loss": 1.9428, "nll_loss": 1.884739637374878, "rewards/accuracies": 0.75, "rewards/chosen": -0.17773064970970154, "rewards/margins": 0.02446262538433075, "rewards/rejected": -0.2021932750940323, "step": 197 }, { "epoch": 0.3120567375886525, "grad_norm": 0.25397989153862, "learning_rate": 3.9630968656264285e-06, "log_odds_chosen": 0.42598769068717957, "log_odds_ratio": -0.509061872959137, "logits/chosen": 0.21715356409549713, "logits/rejected": -0.4817379415035248, "logps/chosen": -1.7002034187316895, "logps/rejected": -2.0615005493164062, "loss": 1.8609, "nll_loss": 1.810002326965332, "rewards/accuracies": 1.0, "rewards/chosen": -0.17002034187316895, "rewards/margins": 0.036129724234342575, "rewards/rejected": -0.20615006983280182, "step": 198 }, { "epoch": 0.3136327817178881, "grad_norm": 0.26781895756721497, "learning_rate": 3.962038331061065e-06, "log_odds_chosen": 0.1461435854434967, "log_odds_ratio": -0.6345757246017456, "logits/chosen": 0.28389307856559753, "logits/rejected": -0.6472858786582947, "logps/chosen": -1.7552485466003418, "logps/rejected": -1.883392333984375, "loss": 1.9384, "nll_loss": 1.8749713897705078, "rewards/accuracies": 0.625, "rewards/chosen": -0.17552484571933746, "rewards/margins": 0.012814389541745186, "rewards/rejected": -0.1883392333984375, "step": 199 }, { "epoch": 0.31520882584712373, "grad_norm": 0.26174989342689514, "learning_rate": 3.96096497423325e-06, "log_odds_chosen": 0.26020583510398865, "log_odds_ratio": -0.5759395360946655, "logits/chosen": 0.3217354714870453, "logits/rejected": -0.2811959683895111, "logps/chosen": -1.6606853008270264, "logps/rejected": -1.8771640062332153, "loss": 1.8637, "nll_loss": 1.806121826171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.16606852412223816, "rewards/margins": 0.021647876128554344, "rewards/rejected": -0.18771640956401825, "step": 200 }, { "epoch": 0.31678486997635935, "grad_norm": 0.24145232141017914, "learning_rate": 3.959876803251684e-06, "log_odds_chosen": 0.3890625238418579, "log_odds_ratio": -0.524219274520874, "logits/chosen": 0.1805240660905838, "logits/rejected": -0.5619311928749084, "logps/chosen": -1.6790175437927246, "logps/rejected": -2.00699782371521, "loss": 1.8352, "nll_loss": 1.782778263092041, "rewards/accuracies": 1.0, "rewards/chosen": -0.16790175437927246, "rewards/margins": 0.03279803693294525, "rewards/rejected": -0.2006998062133789, "step": 201 }, { "epoch": 0.31836091410559497, "grad_norm": 0.25001904368400574, "learning_rate": 3.958773826336977e-06, "log_odds_chosen": 0.27317214012145996, "log_odds_ratio": -0.5676698088645935, "logits/chosen": 0.27888408303260803, "logits/rejected": -0.415998250246048, "logps/chosen": -1.7196669578552246, "logps/rejected": -1.9488410949707031, "loss": 1.8963, "nll_loss": 1.8395037651062012, "rewards/accuracies": 1.0, "rewards/chosen": -0.17196668684482574, "rewards/margins": 0.02291741594672203, "rewards/rejected": -0.19488412141799927, "step": 202 }, { "epoch": 0.3199369582348306, "grad_norm": 0.2554585933685303, "learning_rate": 3.957656051821592e-06, "log_odds_chosen": 0.23165369033813477, "log_odds_ratio": -0.5892881751060486, "logits/chosen": 0.3021984100341797, "logits/rejected": -0.6948502063751221, "logps/chosen": -1.7531200647354126, "logps/rejected": -1.9506298303604126, "loss": 1.9028, "nll_loss": 1.8439098596572876, "rewards/accuracies": 0.875, "rewards/chosen": -0.17531201243400574, "rewards/margins": 0.019750984385609627, "rewards/rejected": -0.19506299495697021, "step": 203 }, { "epoch": 0.3215130023640662, "grad_norm": 0.26098933815956116, "learning_rate": 3.956523488149783e-06, "log_odds_chosen": 0.3122096061706543, "log_odds_ratio": -0.5535508394241333, "logits/chosen": 0.3232036828994751, "logits/rejected": -0.47777220606803894, "logps/chosen": -1.8531510829925537, "logps/rejected": -2.122044563293457, "loss": 2.0121, "nll_loss": 1.9567644596099854, "rewards/accuracies": 1.0, "rewards/chosen": -0.1853151172399521, "rewards/margins": 0.02688935585319996, "rewards/rejected": -0.2122044712305069, "step": 204 }, { "epoch": 0.32308904649330183, "grad_norm": 0.2530037462711334, "learning_rate": 3.9553761438775285e-06, "log_odds_chosen": 0.5448867678642273, "log_odds_ratio": -0.4623796045780182, "logits/chosen": 0.1774434596300125, "logits/rejected": -0.509428083896637, "logps/chosen": -1.5850623846054077, "logps/rejected": -2.0427145957946777, "loss": 1.747, "nll_loss": 1.7007546424865723, "rewards/accuracies": 1.0, "rewards/chosen": -0.15850622951984406, "rewards/margins": 0.04576525092124939, "rewards/rejected": -0.20427148044109344, "step": 205 }, { "epoch": 0.32466509062253746, "grad_norm": 0.23750121891498566, "learning_rate": 3.954214027672465e-06, "log_odds_chosen": 0.48206406831741333, "log_odds_ratio": -0.48811841011047363, "logits/chosen": 0.18673132359981537, "logits/rejected": -0.9018082022666931, "logps/chosen": -1.67806077003479, "logps/rejected": -2.0901315212249756, "loss": 1.8315, "nll_loss": 1.7826416492462158, "rewards/accuracies": 1.0, "rewards/chosen": -0.16780607402324677, "rewards/margins": 0.041207075119018555, "rewards/rejected": -0.2090131640434265, "step": 206 }, { "epoch": 0.3262411347517731, "grad_norm": 0.25891193747520447, "learning_rate": 3.953037148313825e-06, "log_odds_chosen": 0.28911662101745605, "log_odds_ratio": -0.5642296075820923, "logits/chosen": 0.27622562646865845, "logits/rejected": -0.581394612789154, "logps/chosen": -1.6668041944503784, "logps/rejected": -1.9111988544464111, "loss": 1.8538, "nll_loss": 1.7974015474319458, "rewards/accuracies": 0.875, "rewards/chosen": -0.16668042540550232, "rewards/margins": 0.024439461529254913, "rewards/rejected": -0.19111987948417664, "step": 207 }, { "epoch": 0.32781717888100864, "grad_norm": 0.2777508497238159, "learning_rate": 3.951845514692371e-06, "log_odds_chosen": 0.39269790053367615, "log_odds_ratio": -0.5250096321105957, "logits/chosen": 0.2141711413860321, "logits/rejected": -0.47686973214149475, "logps/chosen": -1.729377269744873, "logps/rejected": -2.064727783203125, "loss": 1.9033, "nll_loss": 1.8507862091064453, "rewards/accuracies": 1.0, "rewards/chosen": -0.17293773591518402, "rewards/margins": 0.03353503346443176, "rewards/rejected": -0.20647276937961578, "step": 208 }, { "epoch": 0.32939322301024426, "grad_norm": 0.2944742441177368, "learning_rate": 3.950639135810325e-06, "log_odds_chosen": 0.3137563467025757, "log_odds_ratio": -0.5539580583572388, "logits/chosen": 0.3174862265586853, "logits/rejected": -0.45376986265182495, "logps/chosen": -1.850959062576294, "logps/rejected": -2.1199288368225098, "loss": 1.9963, "nll_loss": 1.9408817291259766, "rewards/accuracies": 0.875, "rewards/chosen": -0.1850959062576294, "rewards/margins": 0.02689695730805397, "rewards/rejected": -0.21199287474155426, "step": 209 }, { "epoch": 0.3309692671394799, "grad_norm": 0.3148304224014282, "learning_rate": 3.9494180207813044e-06, "log_odds_chosen": 0.23787932097911835, "log_odds_ratio": -0.5915074348449707, "logits/chosen": 0.30660662055015564, "logits/rejected": -0.7329879403114319, "logps/chosen": -1.717742919921875, "logps/rejected": -1.916438102722168, "loss": 1.8905, "nll_loss": 1.8313184976577759, "rewards/accuracies": 0.875, "rewards/chosen": -0.17177429795265198, "rewards/margins": 0.019869530573487282, "rewards/rejected": -0.1916438341140747, "step": 210 }, { "epoch": 0.3325453112687155, "grad_norm": 0.2588319778442383, "learning_rate": 3.948182178830249e-06, "log_odds_chosen": 0.29779791831970215, "log_odds_ratio": -0.5746859908103943, "logits/chosen": 0.2758365273475647, "logits/rejected": -0.8558934926986694, "logps/chosen": -1.7486122846603394, "logps/rejected": -2.0061802864074707, "loss": 1.904, "nll_loss": 1.8465510606765747, "rewards/accuracies": 0.75, "rewards/chosen": -0.17486125230789185, "rewards/margins": 0.025756794959306717, "rewards/rejected": -0.20061802864074707, "step": 211 }, { "epoch": 0.3341213553979511, "grad_norm": 0.27347490191459656, "learning_rate": 3.9469316192933545e-06, "log_odds_chosen": 0.5377534627914429, "log_odds_ratio": -0.4662073850631714, "logits/chosen": 0.16830803453922272, "logits/rejected": -0.490761935710907, "logps/chosen": -1.6018506288528442, "logps/rejected": -2.05351185798645, "loss": 1.7708, "nll_loss": 1.7242186069488525, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601850688457489, "rewards/margins": 0.04516611993312836, "rewards/rejected": -0.20535118877887726, "step": 212 }, { "epoch": 0.33569739952718675, "grad_norm": 0.2533399164676666, "learning_rate": 3.945666351618001e-06, "log_odds_chosen": 0.2882734537124634, "log_odds_ratio": -0.562584638595581, "logits/chosen": 0.2559697926044464, "logits/rejected": -0.5387569069862366, "logps/chosen": -1.6552236080169678, "logps/rejected": -1.8957173824310303, "loss": 1.823, "nll_loss": 1.7667841911315918, "rewards/accuracies": 1.0, "rewards/chosen": -0.16552235186100006, "rewards/margins": 0.02404937893152237, "rewards/rejected": -0.18957173824310303, "step": 213 }, { "epoch": 0.33727344365642237, "grad_norm": 0.25932711362838745, "learning_rate": 3.9443863853626825e-06, "log_odds_chosen": 0.3108881711959839, "log_odds_ratio": -0.5589509010314941, "logits/chosen": 0.2859116494655609, "logits/rejected": -0.6345618367195129, "logps/chosen": -1.6215680837631226, "logps/rejected": -1.8729337453842163, "loss": 1.8125, "nll_loss": 1.7566089630126953, "rewards/accuracies": 0.875, "rewards/chosen": -0.1621568202972412, "rewards/margins": 0.025136563926935196, "rewards/rejected": -0.1872933804988861, "step": 214 }, { "epoch": 0.338849487785658, "grad_norm": 0.250918984413147, "learning_rate": 3.943091730196931e-06, "log_odds_chosen": 0.2744632363319397, "log_odds_ratio": -0.5730749368667603, "logits/chosen": 0.3415977954864502, "logits/rejected": -0.41705650091171265, "logps/chosen": -1.6209944486618042, "logps/rejected": -1.8511853218078613, "loss": 1.7975, "nll_loss": 1.740174651145935, "rewards/accuracies": 1.0, "rewards/chosen": -0.1620994359254837, "rewards/margins": 0.023019105195999146, "rewards/rejected": -0.18511852622032166, "step": 215 }, { "epoch": 0.3404255319148936, "grad_norm": 0.27474015951156616, "learning_rate": 3.941782395901249e-06, "log_odds_chosen": 0.3491838574409485, "log_odds_ratio": -0.5398426055908203, "logits/chosen": 0.2213699072599411, "logits/rejected": -0.549167811870575, "logps/chosen": -1.69877028465271, "logps/rejected": -1.9932255744934082, "loss": 1.8755, "nll_loss": 1.8215399980545044, "rewards/accuracies": 0.875, "rewards/chosen": -0.16987702250480652, "rewards/margins": 0.02944553829729557, "rewards/rejected": -0.19932258129119873, "step": 216 }, { "epoch": 0.34200157604412923, "grad_norm": 0.25679028034210205, "learning_rate": 3.940458392367032e-06, "log_odds_chosen": 0.3104505240917206, "log_odds_ratio": -0.5546755194664001, "logits/chosen": 0.2595285475254059, "logits/rejected": -0.8533272743225098, "logps/chosen": -1.7510833740234375, "logps/rejected": -2.0138750076293945, "loss": 1.8934, "nll_loss": 1.8379460573196411, "rewards/accuracies": 1.0, "rewards/chosen": -0.17510835826396942, "rewards/margins": 0.026279138401150703, "rewards/rejected": -0.20138749480247498, "step": 217 }, { "epoch": 0.34357762017336485, "grad_norm": 0.2611715495586395, "learning_rate": 3.939119729596493e-06, "log_odds_chosen": 0.35739466547966003, "log_odds_ratio": -0.5335213541984558, "logits/chosen": 0.3199889659881592, "logits/rejected": -0.6550436019897461, "logps/chosen": -1.7533951997756958, "logps/rejected": -2.057891368865967, "loss": 1.9075, "nll_loss": 1.8541964292526245, "rewards/accuracies": 1.0, "rewards/chosen": -0.17533953487873077, "rewards/margins": 0.03044959530234337, "rewards/rejected": -0.20578913390636444, "step": 218 }, { "epoch": 0.34515366430260047, "grad_norm": 0.26392099261283875, "learning_rate": 3.93776641770259e-06, "log_odds_chosen": 0.2948194742202759, "log_odds_ratio": -0.562857449054718, "logits/chosen": 0.28813865780830383, "logits/rejected": -0.7187209129333496, "logps/chosen": -1.7230334281921387, "logps/rejected": -1.973144292831421, "loss": 1.8747, "nll_loss": 1.8184067010879517, "rewards/accuracies": 1.0, "rewards/chosen": -0.17230333387851715, "rewards/margins": 0.025011096149683, "rewards/rejected": -0.19731444120407104, "step": 219 }, { "epoch": 0.3467297084318361, "grad_norm": 0.26364782452583313, "learning_rate": 3.93639846690895e-06, "log_odds_chosen": 0.4254459738731384, "log_odds_ratio": -0.5071126222610474, "logits/chosen": 0.31134355068206787, "logits/rejected": -0.9344062805175781, "logps/chosen": -1.7416664361953735, "logps/rejected": -2.10628080368042, "loss": 1.8893, "nll_loss": 1.8385860919952393, "rewards/accuracies": 1.0, "rewards/chosen": -0.17416664958000183, "rewards/margins": 0.03646141290664673, "rewards/rejected": -0.21062806248664856, "step": 220 }, { "epoch": 0.3483057525610717, "grad_norm": 0.2517707943916321, "learning_rate": 3.9350158875497855e-06, "log_odds_chosen": 0.5023624300956726, "log_odds_ratio": -0.4771158695220947, "logits/chosen": 0.31009235978126526, "logits/rejected": -0.771135687828064, "logps/chosen": -1.6524683237075806, "logps/rejected": -2.075108289718628, "loss": 1.8136, "nll_loss": 1.765908122062683, "rewards/accuracies": 1.0, "rewards/chosen": -0.165246844291687, "rewards/margins": 0.04226400703191757, "rewards/rejected": -0.20751085877418518, "step": 221 }, { "epoch": 0.34988179669030733, "grad_norm": 0.24314001202583313, "learning_rate": 3.933618690069824e-06, "log_odds_chosen": 0.4676588773727417, "log_odds_ratio": -0.4905795454978943, "logits/chosen": 0.17770220339298248, "logits/rejected": -0.7084282636642456, "logps/chosen": -1.6020326614379883, "logps/rejected": -1.9884483814239502, "loss": 1.7733, "nll_loss": 1.7242058515548706, "rewards/accuracies": 1.0, "rewards/chosen": -0.16020327806472778, "rewards/margins": 0.038641560822725296, "rewards/rejected": -0.19884483516216278, "step": 222 }, { "epoch": 0.35145784081954295, "grad_norm": 0.2550933361053467, "learning_rate": 3.932206885024226e-06, "log_odds_chosen": 0.23466067016124725, "log_odds_ratio": -0.5852788090705872, "logits/chosen": 0.22302168607711792, "logits/rejected": -0.6709792017936707, "logps/chosen": -1.7109097242355347, "logps/rejected": -1.9076846837997437, "loss": 1.8546, "nll_loss": 1.7960247993469238, "rewards/accuracies": 0.875, "rewards/chosen": -0.1710909754037857, "rewards/margins": 0.019677501171827316, "rewards/rejected": -0.19076848030090332, "step": 223 }, { "epoch": 0.3530338849487786, "grad_norm": 0.22619383037090302, "learning_rate": 3.930780483078502e-06, "log_odds_chosen": 0.3614213466644287, "log_odds_ratio": -0.5334374904632568, "logits/chosen": 0.1996021270751953, "logits/rejected": -0.7123557329177856, "logps/chosen": -1.6352108716964722, "logps/rejected": -1.9351240396499634, "loss": 1.807, "nll_loss": 1.7537031173706055, "rewards/accuracies": 1.0, "rewards/chosen": -0.16352108120918274, "rewards/margins": 0.02999131567776203, "rewards/rejected": -0.19351240992546082, "step": 224 }, { "epoch": 0.3546099290780142, "grad_norm": 0.2543198764324188, "learning_rate": 3.92933949500844e-06, "log_odds_chosen": 0.39015451073646545, "log_odds_ratio": -0.5326002836227417, "logits/chosen": 0.28930386900901794, "logits/rejected": -0.7000013589859009, "logps/chosen": -1.609468936920166, "logps/rejected": -1.9277451038360596, "loss": 1.7804, "nll_loss": 1.727099895477295, "rewards/accuracies": 0.75, "rewards/chosen": -0.16094687581062317, "rewards/margins": 0.031827617436647415, "rewards/rejected": -0.19277450442314148, "step": 225 }, { "epoch": 0.3561859732072498, "grad_norm": 0.23875081539154053, "learning_rate": 3.9278839317000155e-06, "log_odds_chosen": 0.40506240725517273, "log_odds_ratio": -0.5156422853469849, "logits/chosen": 0.30614709854125977, "logits/rejected": -0.6908950805664062, "logps/chosen": -1.635833978652954, "logps/rejected": -1.9742740392684937, "loss": 1.7931, "nll_loss": 1.7415223121643066, "rewards/accuracies": 1.0, "rewards/chosen": -0.1635833978652954, "rewards/margins": 0.0338440015912056, "rewards/rejected": -0.1974273920059204, "step": 226 }, { "epoch": 0.35776201733648544, "grad_norm": 0.24680069088935852, "learning_rate": 3.926413804149314e-06, "log_odds_chosen": 0.1316530406475067, "log_odds_ratio": -0.6378493309020996, "logits/chosen": 0.28156548738479614, "logits/rejected": -0.6600248217582703, "logps/chosen": -1.7210869789123535, "logps/rejected": -1.8304848670959473, "loss": 1.8788, "nll_loss": 1.815049171447754, "rewards/accuracies": 0.625, "rewards/chosen": -0.1721086949110031, "rewards/margins": 0.010939793661236763, "rewards/rejected": -0.18304848670959473, "step": 227 }, { "epoch": 0.35933806146572106, "grad_norm": 0.2515822649002075, "learning_rate": 3.924929123462447e-06, "log_odds_chosen": 0.2854158878326416, "log_odds_ratio": -0.5666006207466125, "logits/chosen": 0.33641237020492554, "logits/rejected": -0.7290589809417725, "logps/chosen": -1.7506561279296875, "logps/rejected": -1.9922963380813599, "loss": 1.8996, "nll_loss": 1.84292733669281, "rewards/accuracies": 0.875, "rewards/chosen": -0.17506560683250427, "rewards/margins": 0.024164030328392982, "rewards/rejected": -0.1992296427488327, "step": 228 }, { "epoch": 0.3609141055949567, "grad_norm": 0.26427891850471497, "learning_rate": 3.923429900855468e-06, "log_odds_chosen": 0.21544109284877777, "log_odds_ratio": -0.5983462333679199, "logits/chosen": 0.2909620702266693, "logits/rejected": -0.438875675201416, "logps/chosen": -1.8233458995819092, "logps/rejected": -2.004505157470703, "loss": 1.9767, "nll_loss": 1.9168180227279663, "rewards/accuracies": 0.75, "rewards/chosen": -0.18233460187911987, "rewards/margins": 0.018115926533937454, "rewards/rejected": -0.20045052468776703, "step": 229 }, { "epoch": 0.3624901497241923, "grad_norm": 0.241230309009552, "learning_rate": 3.921916147654287e-06, "log_odds_chosen": 0.4597613215446472, "log_odds_ratio": -0.49515679478645325, "logits/chosen": 0.24242226779460907, "logits/rejected": -0.7508520483970642, "logps/chosen": -1.6597049236297607, "logps/rejected": -2.044846773147583, "loss": 1.7986, "nll_loss": 1.7491000890731812, "rewards/accuracies": 1.0, "rewards/chosen": -0.16597048938274384, "rewards/margins": 0.03851418197154999, "rewards/rejected": -0.20448468625545502, "step": 230 }, { "epoch": 0.3640661938534279, "grad_norm": 0.2590387761592865, "learning_rate": 3.920387875294588e-06, "log_odds_chosen": 0.30678579211235046, "log_odds_ratio": -0.5545358657836914, "logits/chosen": 0.2767443060874939, "logits/rejected": -0.5900214910507202, "logps/chosen": -1.6316543817520142, "logps/rejected": -1.8850141763687134, "loss": 1.7885, "nll_loss": 1.7330236434936523, "rewards/accuracies": 1.0, "rewards/chosen": -0.16316545009613037, "rewards/margins": 0.02533598057925701, "rewards/rejected": -0.18850143253803253, "step": 231 }, { "epoch": 0.3656422379826635, "grad_norm": 0.24193865060806274, "learning_rate": 3.918845095321737e-06, "log_odds_chosen": 0.29498574137687683, "log_odds_ratio": -0.5670905113220215, "logits/chosen": 0.23279045522212982, "logits/rejected": -0.841462254524231, "logps/chosen": -1.6641700267791748, "logps/rejected": -1.9037166833877563, "loss": 1.8125, "nll_loss": 1.755805492401123, "rewards/accuracies": 0.875, "rewards/chosen": -0.16641701757907867, "rewards/margins": 0.023954641073942184, "rewards/rejected": -0.19037166237831116, "step": 232 }, { "epoch": 0.3672182821118991, "grad_norm": 0.2594136595726013, "learning_rate": 3.9172878193907004e-06, "log_odds_chosen": 0.3280632793903351, "log_odds_ratio": -0.551827609539032, "logits/chosen": 0.18091654777526855, "logits/rejected": -0.6931451559066772, "logps/chosen": -1.7367044687271118, "logps/rejected": -2.016188859939575, "loss": 1.8981, "nll_loss": 1.8429385423660278, "rewards/accuracies": 0.75, "rewards/chosen": -0.1736704558134079, "rewards/margins": 0.02794845588505268, "rewards/rejected": -0.20161890983581543, "step": 233 }, { "epoch": 0.36879432624113473, "grad_norm": 0.2600264549255371, "learning_rate": 3.915716059265955e-06, "log_odds_chosen": 0.34488314390182495, "log_odds_ratio": -0.543495774269104, "logits/chosen": 0.25614839792251587, "logits/rejected": -0.785013735294342, "logps/chosen": -1.6906204223632812, "logps/rejected": -1.9822051525115967, "loss": 1.8406, "nll_loss": 1.7862180471420288, "rewards/accuracies": 0.75, "rewards/chosen": -0.1690620481967926, "rewards/margins": 0.02915847674012184, "rewards/rejected": -0.19822052121162415, "step": 234 }, { "epoch": 0.37037037037037035, "grad_norm": 0.22871002554893494, "learning_rate": 3.9141298268213966e-06, "log_odds_chosen": 0.4361989200115204, "log_odds_ratio": -0.5028459429740906, "logits/chosen": 0.2875756621360779, "logits/rejected": -0.6794713735580444, "logps/chosen": -1.6101529598236084, "logps/rejected": -1.9754250049591064, "loss": 1.7655, "nll_loss": 1.7152522802352905, "rewards/accuracies": 1.0, "rewards/chosen": -0.16101528704166412, "rewards/margins": 0.03652720898389816, "rewards/rejected": -0.19754250347614288, "step": 235 }, { "epoch": 0.37194641449960597, "grad_norm": 0.261535108089447, "learning_rate": 3.912529134040255e-06, "log_odds_chosen": 0.296495646238327, "log_odds_ratio": -0.5591344833374023, "logits/chosen": 0.27751684188842773, "logits/rejected": -0.5663548707962036, "logps/chosen": -1.6922770738601685, "logps/rejected": -1.9417719841003418, "loss": 1.8539, "nll_loss": 1.7979625463485718, "rewards/accuracies": 1.0, "rewards/chosen": -0.1692277193069458, "rewards/margins": 0.02494947426021099, "rewards/rejected": -0.19417718052864075, "step": 236 }, { "epoch": 0.3735224586288416, "grad_norm": 0.2609320878982544, "learning_rate": 3.910913993014998e-06, "log_odds_chosen": 0.4593818187713623, "log_odds_ratio": -0.4922163188457489, "logits/chosen": 0.3688610792160034, "logits/rejected": -0.7247602939605713, "logps/chosen": -1.687050223350525, "logps/rejected": -2.0763425827026367, "loss": 1.8363, "nll_loss": 1.7870500087738037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1687050312757492, "rewards/margins": 0.038929231464862823, "rewards/rejected": -0.20763425529003143, "step": 237 }, { "epoch": 0.3750985027580772, "grad_norm": 0.25336939096450806, "learning_rate": 3.909284415947246e-06, "log_odds_chosen": 0.4314580261707306, "log_odds_ratio": -0.5070245862007141, "logits/chosen": 0.32482269406318665, "logits/rejected": -0.8647500276565552, "logps/chosen": -1.73568594455719, "logps/rejected": -2.101022243499756, "loss": 1.87, "nll_loss": 1.819305419921875, "rewards/accuracies": 1.0, "rewards/chosen": -0.17356860637664795, "rewards/margins": 0.036533623933792114, "rewards/rejected": -0.21010223031044006, "step": 238 }, { "epoch": 0.37667454688731283, "grad_norm": 0.24124158918857574, "learning_rate": 3.907640415147674e-06, "log_odds_chosen": 0.4658309817314148, "log_odds_ratio": -0.49067068099975586, "logits/chosen": 0.21756987273693085, "logits/rejected": -0.9139111042022705, "logps/chosen": -1.6312497854232788, "logps/rejected": -2.020339250564575, "loss": 1.788, "nll_loss": 1.7389440536499023, "rewards/accuracies": 1.0, "rewards/chosen": -0.16312497854232788, "rewards/margins": 0.038908950984478, "rewards/rejected": -0.20203393697738647, "step": 239 }, { "epoch": 0.37825059101654845, "grad_norm": 0.2780405282974243, "learning_rate": 3.905982003035924e-06, "log_odds_chosen": 0.3844011127948761, "log_odds_ratio": -0.5241358280181885, "logits/chosen": 0.23867914080619812, "logits/rejected": -0.8461136817932129, "logps/chosen": -1.7408959865570068, "logps/rejected": -2.067878246307373, "loss": 1.8775, "nll_loss": 1.825044870376587, "rewards/accuracies": 1.0, "rewards/chosen": -0.17408961057662964, "rewards/margins": 0.03269820660352707, "rewards/rejected": -0.2067878246307373, "step": 240 }, { "epoch": 0.3798266351457841, "grad_norm": 0.24185331165790558, "learning_rate": 3.904309192140506e-06, "log_odds_chosen": 0.278605192899704, "log_odds_ratio": -0.571584165096283, "logits/chosen": 0.24360942840576172, "logits/rejected": -0.4919203519821167, "logps/chosen": -1.6554006338119507, "logps/rejected": -1.8880599737167358, "loss": 1.8086, "nll_loss": 1.7514057159423828, "rewards/accuracies": 0.875, "rewards/chosen": -0.16554008424282074, "rewards/margins": 0.023265928030014038, "rewards/rejected": -0.18880601227283478, "step": 241 }, { "epoch": 0.3814026792750197, "grad_norm": 0.26548993587493896, "learning_rate": 3.90262199509871e-06, "log_odds_chosen": 0.2826857268810272, "log_odds_ratio": -0.5648576021194458, "logits/chosen": 0.22797901928424835, "logits/rejected": -0.5838393568992615, "logps/chosen": -1.6858417987823486, "logps/rejected": -1.9206804037094116, "loss": 1.8445, "nll_loss": 1.7879817485809326, "rewards/accuracies": 1.0, "rewards/chosen": -0.1685841828584671, "rewards/margins": 0.023483866825699806, "rewards/rejected": -0.19206805527210236, "step": 242 }, { "epoch": 0.3829787234042553, "grad_norm": 0.2655971944332123, "learning_rate": 3.900920424656501e-06, "log_odds_chosen": 0.4220297932624817, "log_odds_ratio": -0.51133131980896, "logits/chosen": 0.2471286654472351, "logits/rejected": -0.7664991021156311, "logps/chosen": -1.6024655103683472, "logps/rejected": -1.953262209892273, "loss": 1.7496, "nll_loss": 1.6984376907348633, "rewards/accuracies": 0.875, "rewards/chosen": -0.16024655103683472, "rewards/margins": 0.035079680383205414, "rewards/rejected": -0.19532622396945953, "step": 243 }, { "epoch": 0.38455476753349094, "grad_norm": 0.2719763517379761, "learning_rate": 3.899204493668432e-06, "log_odds_chosen": 0.34531235694885254, "log_odds_ratio": -0.545647382736206, "logits/chosen": 0.2916204631328583, "logits/rejected": -0.6753207445144653, "logps/chosen": -1.760868787765503, "logps/rejected": -2.058903694152832, "loss": 1.8996, "nll_loss": 1.8450438976287842, "rewards/accuracies": 0.75, "rewards/chosen": -0.17608687281608582, "rewards/margins": 0.029803497716784477, "rewards/rejected": -0.20589037239551544, "step": 244 }, { "epoch": 0.38613081166272656, "grad_norm": 0.26759397983551025, "learning_rate": 3.897474215097542e-06, "log_odds_chosen": 0.34969383478164673, "log_odds_ratio": -0.5378445386886597, "logits/chosen": 0.30676940083503723, "logits/rejected": -0.9074739217758179, "logps/chosen": -1.7014522552490234, "logps/rejected": -1.9977072477340698, "loss": 1.8594, "nll_loss": 1.805631160736084, "rewards/accuracies": 1.0, "rewards/chosen": -0.1701452136039734, "rewards/margins": 0.029625503346323967, "rewards/rejected": -0.1997707188129425, "step": 245 }, { "epoch": 0.3877068557919622, "grad_norm": 0.24695803225040436, "learning_rate": 3.8957296020152596e-06, "log_odds_chosen": 0.4582657217979431, "log_odds_ratio": -0.4948864281177521, "logits/chosen": 0.3029223680496216, "logits/rejected": -0.7880758047103882, "logps/chosen": -1.6669402122497559, "logps/rejected": -2.054736614227295, "loss": 1.8203, "nll_loss": 1.7708466053009033, "rewards/accuracies": 1.0, "rewards/chosen": -0.1666940301656723, "rewards/margins": 0.03877962380647659, "rewards/rejected": -0.2054736316204071, "step": 246 }, { "epoch": 0.3892828999211978, "grad_norm": 0.2638940215110779, "learning_rate": 3.893970667601303e-06, "log_odds_chosen": 0.3049715757369995, "log_odds_ratio": -0.5559093952178955, "logits/chosen": 0.20208218693733215, "logits/rejected": -1.0700570344924927, "logps/chosen": -1.7513179779052734, "logps/rejected": -2.0092613697052, "loss": 1.8901, "nll_loss": 1.83454430103302, "rewards/accuracies": 1.0, "rewards/chosen": -0.17513179779052734, "rewards/margins": 0.02579433098435402, "rewards/rejected": -0.20092612504959106, "step": 247 }, { "epoch": 0.3908589440504334, "grad_norm": 0.260355681180954, "learning_rate": 3.892197425143581e-06, "log_odds_chosen": 0.4682731330394745, "log_odds_ratio": -0.48940151929855347, "logits/chosen": 0.255657821893692, "logits/rejected": -0.671928882598877, "logps/chosen": -1.6768563985824585, "logps/rejected": -2.071852922439575, "loss": 1.8127, "nll_loss": 1.763710618019104, "rewards/accuracies": 1.0, "rewards/chosen": -0.16768565773963928, "rewards/margins": 0.03949964791536331, "rewards/rejected": -0.2071852833032608, "step": 248 }, { "epoch": 0.39243498817966904, "grad_norm": 0.2769114673137665, "learning_rate": 3.890409888038094e-06, "log_odds_chosen": 0.35800671577453613, "log_odds_ratio": -0.5353755354881287, "logits/chosen": 0.27717655897140503, "logits/rejected": -0.6495774984359741, "logps/chosen": -1.718163013458252, "logps/rejected": -2.023691415786743, "loss": 1.8664, "nll_loss": 1.812865138053894, "rewards/accuracies": 1.0, "rewards/chosen": -0.17181627452373505, "rewards/margins": 0.030552847310900688, "rewards/rejected": -0.20236913859844208, "step": 249 }, { "epoch": 0.39401103230890466, "grad_norm": 0.2523289620876312, "learning_rate": 3.888608069788831e-06, "log_odds_chosen": 0.3482479751110077, "log_odds_ratio": -0.5456973314285278, "logits/chosen": 0.18573682010173798, "logits/rejected": -0.5659343600273132, "logps/chosen": -1.672256350517273, "logps/rejected": -1.9685872793197632, "loss": 1.8214, "nll_loss": 1.7668198347091675, "rewards/accuracies": 0.875, "rewards/chosen": -0.16722562909126282, "rewards/margins": 0.029633095487952232, "rewards/rejected": -0.1968587189912796, "step": 250 }, { "epoch": 0.3955870764381403, "grad_norm": 0.23957766592502594, "learning_rate": 3.8867919840076685e-06, "log_odds_chosen": 0.4184243083000183, "log_odds_ratio": -0.5095526576042175, "logits/chosen": 0.22751504182815552, "logits/rejected": -0.8524928092956543, "logps/chosen": -1.6037871837615967, "logps/rejected": -1.9516103267669678, "loss": 1.7432, "nll_loss": 1.692209005355835, "rewards/accuracies": 1.0, "rewards/chosen": -0.16037872433662415, "rewards/margins": 0.03478231281042099, "rewards/rejected": -0.19516104459762573, "step": 251 }, { "epoch": 0.3971631205673759, "grad_norm": 0.2576284110546112, "learning_rate": 3.884961644414267e-06, "log_odds_chosen": 0.34130245447158813, "log_odds_ratio": -0.5387775897979736, "logits/chosen": 0.24311238527297974, "logits/rejected": -0.3957245647907257, "logps/chosen": -1.7268153429031372, "logps/rejected": -2.0152997970581055, "loss": 1.848, "nll_loss": 1.794105887413025, "rewards/accuracies": 1.0, "rewards/chosen": -0.1726815402507782, "rewards/margins": 0.028848443180322647, "rewards/rejected": -0.20152997970581055, "step": 252 }, { "epoch": 0.3987391646966115, "grad_norm": 0.24223408102989197, "learning_rate": 3.883117064835967e-06, "log_odds_chosen": 0.3959362506866455, "log_odds_ratio": -0.5255994200706482, "logits/chosen": 0.1985481083393097, "logits/rejected": -0.7414513230323792, "logps/chosen": -1.6674610376358032, "logps/rejected": -2.0037710666656494, "loss": 1.8186, "nll_loss": 1.7660025358200073, "rewards/accuracies": 0.875, "rewards/chosen": -0.1667460948228836, "rewards/margins": 0.033631011843681335, "rewards/rejected": -0.20037710666656494, "step": 253 }, { "epoch": 0.40031520882584715, "grad_norm": 0.26332396268844604, "learning_rate": 3.881258259207688e-06, "log_odds_chosen": 0.3393678367137909, "log_odds_ratio": -0.5432149171829224, "logits/chosen": 0.30877983570098877, "logits/rejected": -0.7882847785949707, "logps/chosen": -1.6376413106918335, "logps/rejected": -1.919863224029541, "loss": 1.7661, "nll_loss": 1.7118016481399536, "rewards/accuracies": 0.875, "rewards/chosen": -0.1637641191482544, "rewards/margins": 0.028222184628248215, "rewards/rejected": -0.1919863075017929, "step": 254 }, { "epoch": 0.40189125295508277, "grad_norm": 0.25575923919677734, "learning_rate": 3.8793852415718165e-06, "log_odds_chosen": 0.4229394495487213, "log_odds_ratio": -0.5054378509521484, "logits/chosen": 0.21786652505397797, "logits/rejected": -1.0462470054626465, "logps/chosen": -1.561218023300171, "logps/rejected": -1.9078154563903809, "loss": 1.694, "nll_loss": 1.6435015201568604, "rewards/accuracies": 1.0, "rewards/chosen": -0.15612182021141052, "rewards/margins": 0.0346597358584404, "rewards/rejected": -0.19078153371810913, "step": 255 }, { "epoch": 0.4034672970843184, "grad_norm": 0.24122343957424164, "learning_rate": 3.877498026078107e-06, "log_odds_chosen": 0.2585289180278778, "log_odds_ratio": -0.5783178210258484, "logits/chosen": 0.22445496916770935, "logits/rejected": -0.9274032711982727, "logps/chosen": -1.7019916772842407, "logps/rejected": -1.9216302633285522, "loss": 1.8365, "nll_loss": 1.7786757946014404, "rewards/accuracies": 0.75, "rewards/chosen": -0.1701991707086563, "rewards/margins": 0.02196386270225048, "rewards/rejected": -0.19216305017471313, "step": 256 }, { "epoch": 0.40504334121355395, "grad_norm": 0.25378501415252686, "learning_rate": 3.875596626983573e-06, "log_odds_chosen": 0.48093894124031067, "log_odds_ratio": -0.4933563768863678, "logits/chosen": 0.24534080922603607, "logits/rejected": -0.61388099193573, "logps/chosen": -1.6251795291900635, "logps/rejected": -2.02630352973938, "loss": 1.7574, "nll_loss": 1.708074688911438, "rewards/accuracies": 1.0, "rewards/chosen": -0.1625179499387741, "rewards/margins": 0.040112413465976715, "rewards/rejected": -0.20263037085533142, "step": 257 }, { "epoch": 0.4066193853427896, "grad_norm": 0.26203182339668274, "learning_rate": 3.873681058652374e-06, "log_odds_chosen": 0.27626505494117737, "log_odds_ratio": -0.5716174244880676, "logits/chosen": 0.2242291271686554, "logits/rejected": -0.705990731716156, "logps/chosen": -1.742016077041626, "logps/rejected": -1.976877212524414, "loss": 1.8763, "nll_loss": 1.819146990776062, "rewards/accuracies": 1.0, "rewards/chosen": -0.1742016226053238, "rewards/margins": 0.023486101999878883, "rewards/rejected": -0.19768773019313812, "step": 258 }, { "epoch": 0.4081954294720252, "grad_norm": 0.26595333218574524, "learning_rate": 3.871751335555715e-06, "log_odds_chosen": 0.26442596316337585, "log_odds_ratio": -0.5714837312698364, "logits/chosen": 0.20778290927410126, "logits/rejected": -0.8641613125801086, "logps/chosen": -1.6800938844680786, "logps/rejected": -1.9005059003829956, "loss": 1.809, "nll_loss": 1.7518802881240845, "rewards/accuracies": 1.0, "rewards/chosen": -0.16800937056541443, "rewards/margins": 0.0220412015914917, "rewards/rejected": -0.19005057215690613, "step": 259 }, { "epoch": 0.4097714736012608, "grad_norm": 0.23981213569641113, "learning_rate": 3.869807472271731e-06, "log_odds_chosen": 0.3423271179199219, "log_odds_ratio": -0.5396283864974976, "logits/chosen": 0.23563726246356964, "logits/rejected": -0.7463378310203552, "logps/chosen": -1.6491752862930298, "logps/rejected": -1.9362696409225464, "loss": 1.8109, "nll_loss": 1.7569705247879028, "rewards/accuracies": 1.0, "rewards/chosen": -0.16491752862930298, "rewards/margins": 0.028709445148706436, "rewards/rejected": -0.19362697005271912, "step": 260 }, { "epoch": 0.41134751773049644, "grad_norm": 0.24014748632907867, "learning_rate": 3.8678494834853826e-06, "log_odds_chosen": 0.34554576873779297, "log_odds_ratio": -0.5426000356674194, "logits/chosen": 0.20350177586078644, "logits/rejected": -0.6877405643463135, "logps/chosen": -1.6484317779541016, "logps/rejected": -1.934998631477356, "loss": 1.7826, "nll_loss": 1.728297472000122, "rewards/accuracies": 0.875, "rewards/chosen": -0.16484320163726807, "rewards/margins": 0.028656674548983574, "rewards/rejected": -0.1934998780488968, "step": 261 }, { "epoch": 0.41292356185973206, "grad_norm": 0.27845731377601624, "learning_rate": 3.865877383988339e-06, "log_odds_chosen": 0.41938862204551697, "log_odds_ratio": -0.5120880603790283, "logits/chosen": 0.20454849302768707, "logits/rejected": -0.6759002208709717, "logps/chosen": -1.6775566339492798, "logps/rejected": -2.0273048877716064, "loss": 1.8329, "nll_loss": 1.7817234992980957, "rewards/accuracies": 1.0, "rewards/chosen": -0.16775566339492798, "rewards/margins": 0.0349748432636261, "rewards/rejected": -0.20273049175739288, "step": 262 }, { "epoch": 0.4144996059889677, "grad_norm": 0.2552221417427063, "learning_rate": 3.863891188678869e-06, "log_odds_chosen": 0.16435928642749786, "log_odds_ratio": -0.6247628927230835, "logits/chosen": 0.17906969785690308, "logits/rejected": -0.8283835649490356, "logps/chosen": -1.5928330421447754, "logps/rejected": -1.7304943799972534, "loss": 1.7381, "nll_loss": 1.6756339073181152, "rewards/accuracies": 0.75, "rewards/chosen": -0.1592833399772644, "rewards/margins": 0.013766113668680191, "rewards/rejected": -0.1730494201183319, "step": 263 }, { "epoch": 0.4160756501182033, "grad_norm": 0.24987082183361053, "learning_rate": 3.8618909125617305e-06, "log_odds_chosen": 0.5285029411315918, "log_odds_ratio": -0.4670139253139496, "logits/chosen": 0.16765527427196503, "logits/rejected": -1.05913507938385, "logps/chosen": -1.5894949436187744, "logps/rejected": -2.031071662902832, "loss": 1.7407, "nll_loss": 1.693982720375061, "rewards/accuracies": 1.0, "rewards/chosen": -0.15894947946071625, "rewards/margins": 0.04415770620107651, "rewards/rejected": -0.20310717821121216, "step": 264 }, { "epoch": 0.4176516942474389, "grad_norm": 0.24382364749908447, "learning_rate": 3.859876570748054e-06, "log_odds_chosen": 0.3537558913230896, "log_odds_ratio": -0.5339797735214233, "logits/chosen": 0.2316458523273468, "logits/rejected": -0.3801443874835968, "logps/chosen": -1.6417627334594727, "logps/rejected": -1.9363051652908325, "loss": 1.792, "nll_loss": 1.7385859489440918, "rewards/accuracies": 1.0, "rewards/chosen": -0.16417627036571503, "rewards/margins": 0.02945425920188427, "rewards/rejected": -0.19363053143024445, "step": 265 }, { "epoch": 0.41922773837667454, "grad_norm": 0.23967225849628448, "learning_rate": 3.857848178455231e-06, "log_odds_chosen": 0.11959446966648102, "log_odds_ratio": -0.6412428617477417, "logits/chosen": 0.2559909522533417, "logits/rejected": -1.0443730354309082, "logps/chosen": -1.656022071838379, "logps/rejected": -1.7599290609359741, "loss": 1.8213, "nll_loss": 1.7571834325790405, "rewards/accuracies": 0.75, "rewards/chosen": -0.16560222208499908, "rewards/margins": 0.010390684939920902, "rewards/rejected": -0.1759929060935974, "step": 266 }, { "epoch": 0.42080378250591016, "grad_norm": 0.27286192774772644, "learning_rate": 3.855805751006794e-06, "log_odds_chosen": 0.4806629419326782, "log_odds_ratio": -0.49105098843574524, "logits/chosen": 0.2224351465702057, "logits/rejected": -1.0106374025344849, "logps/chosen": -1.6792700290679932, "logps/rejected": -2.0848042964935303, "loss": 1.8049, "nll_loss": 1.7558059692382812, "rewards/accuracies": 1.0, "rewards/chosen": -0.16792699694633484, "rewards/margins": 0.04055342823266983, "rewards/rejected": -0.20848044753074646, "step": 267 }, { "epoch": 0.4223798266351458, "grad_norm": 0.22964340448379517, "learning_rate": 3.853749303832308e-06, "log_odds_chosen": 0.34756800532341003, "log_odds_ratio": -0.5398483872413635, "logits/chosen": 0.19423283636569977, "logits/rejected": -0.5895228981971741, "logps/chosen": -1.6056160926818848, "logps/rejected": -1.892835021018982, "loss": 1.7439, "nll_loss": 1.6899384260177612, "rewards/accuracies": 1.0, "rewards/chosen": -0.16056160628795624, "rewards/margins": 0.02872188575565815, "rewards/rejected": -0.18928351998329163, "step": 268 }, { "epoch": 0.4239558707643814, "grad_norm": 0.2715766429901123, "learning_rate": 3.8516788524672495e-06, "log_odds_chosen": 0.2077798843383789, "log_odds_ratio": -0.6008598804473877, "logits/chosen": 0.22610792517662048, "logits/rejected": -0.9308719038963318, "logps/chosen": -1.6854466199874878, "logps/rejected": -1.8608953952789307, "loss": 1.8208, "nll_loss": 1.7607052326202393, "rewards/accuracies": 0.625, "rewards/chosen": -0.16854466497898102, "rewards/margins": 0.017544886097311974, "rewards/rejected": -0.18608956038951874, "step": 269 }, { "epoch": 0.425531914893617, "grad_norm": 0.23874664306640625, "learning_rate": 3.849594412552889e-06, "log_odds_chosen": 0.3525705933570862, "log_odds_ratio": -0.5381215810775757, "logits/chosen": 0.24739238619804382, "logits/rejected": -0.6722042560577393, "logps/chosen": -1.587384581565857, "logps/rejected": -1.871896505355835, "loss": 1.727, "nll_loss": 1.673226237297058, "rewards/accuracies": 1.0, "rewards/chosen": -0.15873846411705017, "rewards/margins": 0.028451191261410713, "rewards/rejected": -0.18718963861465454, "step": 270 }, { "epoch": 0.42710795902285265, "grad_norm": 0.2558384835720062, "learning_rate": 3.847495999836175e-06, "log_odds_chosen": 0.3543975353240967, "log_odds_ratio": -0.5356498956680298, "logits/chosen": 0.2624046504497528, "logits/rejected": -0.9314246773719788, "logps/chosen": -1.666176438331604, "logps/rejected": -1.9635179042816162, "loss": 1.7931, "nll_loss": 1.7394909858703613, "rewards/accuracies": 1.0, "rewards/chosen": -0.16661766171455383, "rewards/margins": 0.029734138399362564, "rewards/rejected": -0.1963518112897873, "step": 271 }, { "epoch": 0.42868400315208827, "grad_norm": 0.23578688502311707, "learning_rate": 3.845383630169613e-06, "log_odds_chosen": 0.3167238235473633, "log_odds_ratio": -0.5535402894020081, "logits/chosen": 0.11884280294179916, "logits/rejected": -0.9415445923805237, "logps/chosen": -1.5773344039916992, "logps/rejected": -1.837627649307251, "loss": 1.7049, "nll_loss": 1.6495603322982788, "rewards/accuracies": 0.875, "rewards/chosen": -0.15773345530033112, "rewards/margins": 0.026029333472251892, "rewards/rejected": -0.18376277387142181, "step": 272 }, { "epoch": 0.4302600472813239, "grad_norm": 0.2651219367980957, "learning_rate": 3.843257319511147e-06, "log_odds_chosen": 0.5530175566673279, "log_odds_ratio": -0.46196067333221436, "logits/chosen": 0.17332234978675842, "logits/rejected": -1.021928310394287, "logps/chosen": -1.5769469738006592, "logps/rejected": -2.0343515872955322, "loss": 1.7041, "nll_loss": 1.6578803062438965, "rewards/accuracies": 1.0, "rewards/chosen": -0.15769469738006592, "rewards/margins": 0.04574044048786163, "rewards/rejected": -0.20343513786792755, "step": 273 }, { "epoch": 0.4318360914105595, "grad_norm": 0.24971356987953186, "learning_rate": 3.841117083924039e-06, "log_odds_chosen": 0.32874542474746704, "log_odds_ratio": -0.5475614666938782, "logits/chosen": 0.16287937760353088, "logits/rejected": -1.0936428308486938, "logps/chosen": -1.6155405044555664, "logps/rejected": -1.888677716255188, "loss": 1.7473, "nll_loss": 1.692505121231079, "rewards/accuracies": 1.0, "rewards/chosen": -0.16155406832695007, "rewards/margins": 0.0273137167096138, "rewards/rejected": -0.18886777758598328, "step": 274 }, { "epoch": 0.43341213553979513, "grad_norm": 0.2504657506942749, "learning_rate": 3.838962939576746e-06, "log_odds_chosen": 0.4173721373081207, "log_odds_ratio": -0.51390141248703, "logits/chosen": 0.13147510588169098, "logits/rejected": -0.8347434401512146, "logps/chosen": -1.5679432153701782, "logps/rejected": -1.9134660959243774, "loss": 1.72, "nll_loss": 1.668624758720398, "rewards/accuracies": 1.0, "rewards/chosen": -0.15679430961608887, "rewards/margins": 0.03455227613449097, "rewards/rejected": -0.19134658575057983, "step": 275 }, { "epoch": 0.43498817966903075, "grad_norm": 0.2594338357448578, "learning_rate": 3.8367949027427985e-06, "log_odds_chosen": 0.3050197958946228, "log_odds_ratio": -0.5566756129264832, "logits/chosen": 0.19317705929279327, "logits/rejected": -0.7772097587585449, "logps/chosen": -1.683677077293396, "logps/rejected": -1.9412704706192017, "loss": 1.8395, "nll_loss": 1.7838023900985718, "rewards/accuracies": 1.0, "rewards/chosen": -0.16836771368980408, "rewards/margins": 0.02575933374464512, "rewards/rejected": -0.19412705302238464, "step": 276 }, { "epoch": 0.43656422379826637, "grad_norm": 0.26758840680122375, "learning_rate": 3.834612989800681e-06, "log_odds_chosen": 0.5289045572280884, "log_odds_ratio": -0.46756529808044434, "logits/chosen": 0.21106746792793274, "logits/rejected": -1.300022006034851, "logps/chosen": -1.6692185401916504, "logps/rejected": -2.1159589290618896, "loss": 1.7865, "nll_loss": 1.7397515773773193, "rewards/accuracies": 1.0, "rewards/chosen": -0.16692185401916504, "rewards/margins": 0.04467405378818512, "rewards/rejected": -0.21159592270851135, "step": 277 }, { "epoch": 0.438140267927502, "grad_norm": 0.37664347887039185, "learning_rate": 3.832417217233703e-06, "log_odds_chosen": 0.3042469024658203, "log_odds_ratio": -0.555115282535553, "logits/chosen": 0.17117249965667725, "logits/rejected": -0.787204921245575, "logps/chosen": -1.6579128503799438, "logps/rejected": -1.9126213788986206, "loss": 1.7924, "nll_loss": 1.7368648052215576, "rewards/accuracies": 1.0, "rewards/chosen": -0.16579128801822662, "rewards/margins": 0.02547084540128708, "rewards/rejected": -0.1912621259689331, "step": 278 }, { "epoch": 0.4397163120567376, "grad_norm": 0.2642952501773834, "learning_rate": 3.8302076016298775e-06, "log_odds_chosen": 0.4446471929550171, "log_odds_ratio": -0.49887239933013916, "logits/chosen": 0.11896737664937973, "logits/rejected": -1.0080976486206055, "logps/chosen": -1.5889983177185059, "logps/rejected": -1.9574190378189087, "loss": 1.7293, "nll_loss": 1.6794246435165405, "rewards/accuracies": 1.0, "rewards/chosen": -0.15889984369277954, "rewards/margins": 0.036842066794633865, "rewards/rejected": -0.1957419216632843, "step": 279 }, { "epoch": 0.44129235618597323, "grad_norm": 0.2610625922679901, "learning_rate": 3.827984159681796e-06, "log_odds_chosen": 0.34331244230270386, "log_odds_ratio": -0.541645884513855, "logits/chosen": 0.10684026777744293, "logits/rejected": -0.8201386332511902, "logps/chosen": -1.6439259052276611, "logps/rejected": -1.9289183616638184, "loss": 1.7716, "nll_loss": 1.7174153327941895, "rewards/accuracies": 0.875, "rewards/chosen": -0.1643926203250885, "rewards/margins": 0.028499236330389977, "rewards/rejected": -0.19289185106754303, "step": 280 }, { "epoch": 0.4428684003152088, "grad_norm": 0.24843472242355347, "learning_rate": 3.825746908186498e-06, "log_odds_chosen": 0.3994872272014618, "log_odds_ratio": -0.5212578773498535, "logits/chosen": 0.2376633733510971, "logits/rejected": -1.0532779693603516, "logps/chosen": -1.6814634799957275, "logps/rejected": -2.018228769302368, "loss": 1.7922, "nll_loss": 1.740039348602295, "rewards/accuracies": 1.0, "rewards/chosen": -0.16814635694026947, "rewards/margins": 0.03367652744054794, "rewards/rejected": -0.201822891831398, "step": 281 }, { "epoch": 0.4444444444444444, "grad_norm": 0.2748637795448303, "learning_rate": 3.823495864045352e-06, "log_odds_chosen": 0.4453040361404419, "log_odds_ratio": -0.4974423050880432, "logits/chosen": 0.20578338205814362, "logits/rejected": -0.5144822001457214, "logps/chosen": -1.6322267055511475, "logps/rejected": -2.0018556118011475, "loss": 1.7625, "nll_loss": 1.7127240896224976, "rewards/accuracies": 1.0, "rewards/chosen": -0.16322267055511475, "rewards/margins": 0.036962881684303284, "rewards/rejected": -0.20018555223941803, "step": 282 }, { "epoch": 0.44602048857368004, "grad_norm": 0.2273551970720291, "learning_rate": 3.8212310442639205e-06, "log_odds_chosen": 0.33169132471084595, "log_odds_ratio": -0.5450347065925598, "logits/chosen": 0.2068972885608673, "logits/rejected": -0.695864200592041, "logps/chosen": -1.5796043872833252, "logps/rejected": -1.8531723022460938, "loss": 1.7234, "nll_loss": 1.6689225435256958, "rewards/accuracies": 0.875, "rewards/chosen": -0.1579604595899582, "rewards/margins": 0.027356795966625214, "rewards/rejected": -0.1853172332048416, "step": 283 }, { "epoch": 0.44759653270291566, "grad_norm": 0.24065622687339783, "learning_rate": 3.8189524659518355e-06, "log_odds_chosen": 0.40192341804504395, "log_odds_ratio": -0.5161522626876831, "logits/chosen": 0.2697753310203552, "logits/rejected": -0.8584244847297668, "logps/chosen": -1.6505751609802246, "logps/rejected": -1.9848883152008057, "loss": 1.7755, "nll_loss": 1.7239261865615845, "rewards/accuracies": 0.875, "rewards/chosen": -0.16505752503871918, "rewards/margins": 0.03343129903078079, "rewards/rejected": -0.19848881661891937, "step": 284 }, { "epoch": 0.4491725768321513, "grad_norm": 0.244610995054245, "learning_rate": 3.816660146322667e-06, "log_odds_chosen": 0.36820292472839355, "log_odds_ratio": -0.5295414328575134, "logits/chosen": 0.1906885802745819, "logits/rejected": -0.700128436088562, "logps/chosen": -1.5470272302627563, "logps/rejected": -1.847809076309204, "loss": 1.6794, "nll_loss": 1.626416802406311, "rewards/accuracies": 1.0, "rewards/chosen": -0.15470272302627563, "rewards/margins": 0.03007819503545761, "rewards/rejected": -0.18478091061115265, "step": 285 }, { "epoch": 0.4507486209613869, "grad_norm": 0.23016570508480072, "learning_rate": 3.814354102693797e-06, "log_odds_chosen": 0.3836941719055176, "log_odds_ratio": -0.5216916799545288, "logits/chosen": 0.21484431624412537, "logits/rejected": -0.9419076442718506, "logps/chosen": -1.6132217645645142, "logps/rejected": -1.9288195371627808, "loss": 1.7432, "nll_loss": 1.6910346746444702, "rewards/accuracies": 1.0, "rewards/chosen": -0.16132217645645142, "rewards/margins": 0.0315597802400589, "rewards/rejected": -0.19288195669651031, "step": 286 }, { "epoch": 0.4523246650906225, "grad_norm": 0.23653464019298553, "learning_rate": 3.8120343524862814e-06, "log_odds_chosen": 0.3473433256149292, "log_odds_ratio": -0.535927951335907, "logits/chosen": 0.18108825385570526, "logits/rejected": -0.833530843257904, "logps/chosen": -1.6525256633758545, "logps/rejected": -1.9398300647735596, "loss": 1.7645, "nll_loss": 1.710868239402771, "rewards/accuracies": 1.0, "rewards/chosen": -0.16525256633758545, "rewards/margins": 0.028730444610118866, "rewards/rejected": -0.1939830183982849, "step": 287 }, { "epoch": 0.45390070921985815, "grad_norm": 0.23629866540431976, "learning_rate": 3.809700913224726e-06, "log_odds_chosen": 0.36637431383132935, "log_odds_ratio": -0.5327513217926025, "logits/chosen": 0.15463852882385254, "logits/rejected": -1.287902593612671, "logps/chosen": -1.5792927742004395, "logps/rejected": -1.8798011541366577, "loss": 1.7056, "nll_loss": 1.6522890329360962, "rewards/accuracies": 1.0, "rewards/chosen": -0.15792928636074066, "rewards/margins": 0.03005082532763481, "rewards/rejected": -0.18798011541366577, "step": 288 }, { "epoch": 0.45547675334909377, "grad_norm": 0.23628529906272888, "learning_rate": 3.8073538025371494e-06, "log_odds_chosen": 0.4771001935005188, "log_odds_ratio": -0.48583680391311646, "logits/chosen": 0.2631601393222809, "logits/rejected": -1.0782270431518555, "logps/chosen": -1.551235318183899, "logps/rejected": -1.9436652660369873, "loss": 1.6886, "nll_loss": 1.6399903297424316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1551235318183899, "rewards/margins": 0.03924299776554108, "rewards/rejected": -0.19436652958393097, "step": 289 }, { "epoch": 0.4570527974783294, "grad_norm": 0.2573354244232178, "learning_rate": 3.804993038154852e-06, "log_odds_chosen": 0.3571556806564331, "log_odds_ratio": -0.5380254983901978, "logits/chosen": 0.18856900930404663, "logits/rejected": -0.9549089670181274, "logps/chosen": -1.5852500200271606, "logps/rejected": -1.88179349899292, "loss": 1.7309, "nll_loss": 1.6771280765533447, "rewards/accuracies": 0.875, "rewards/chosen": -0.1585249900817871, "rewards/margins": 0.029654357582330704, "rewards/rejected": -0.1881793588399887, "step": 290 }, { "epoch": 0.458628841607565, "grad_norm": 0.2339908331632614, "learning_rate": 3.8026186379122816e-06, "log_odds_chosen": 0.2612074613571167, "log_odds_ratio": -0.5736123323440552, "logits/chosen": 0.1575014591217041, "logits/rejected": -0.9461207985877991, "logps/chosen": -1.6632664203643799, "logps/rejected": -1.8798828125, "loss": 1.7845, "nll_loss": 1.72710120677948, "rewards/accuracies": 1.0, "rewards/chosen": -0.16632665693759918, "rewards/margins": 0.02166163921356201, "rewards/rejected": -0.18798828125, "step": 291 }, { "epoch": 0.46020488573680063, "grad_norm": 0.232927143573761, "learning_rate": 3.8002306197468983e-06, "log_odds_chosen": 0.16624774038791656, "log_odds_ratio": -0.6159506440162659, "logits/chosen": 0.15579693019390106, "logits/rejected": -1.0309805870056152, "logps/chosen": -1.6420388221740723, "logps/rejected": -1.777790904045105, "loss": 1.7816, "nll_loss": 1.7199894189834595, "rewards/accuracies": 0.75, "rewards/chosen": -0.16420388221740723, "rewards/margins": 0.013575192540884018, "rewards/rejected": -0.17777907848358154, "step": 292 }, { "epoch": 0.46178092986603625, "grad_norm": 0.26111435890197754, "learning_rate": 3.7978290016990367e-06, "log_odds_chosen": 0.4844040274620056, "log_odds_ratio": -0.4840275049209595, "logits/chosen": 0.12877169251441956, "logits/rejected": -1.0049679279327393, "logps/chosen": -1.6433277130126953, "logps/rejected": -2.0521345138549805, "loss": 1.7643, "nll_loss": 1.7158782482147217, "rewards/accuracies": 1.0, "rewards/chosen": -0.16433276236057281, "rewards/margins": 0.040880680084228516, "rewards/rejected": -0.20521345734596252, "step": 293 }, { "epoch": 0.46335697399527187, "grad_norm": 0.2526698112487793, "learning_rate": 3.795413801911776e-06, "log_odds_chosen": 0.5316009521484375, "log_odds_ratio": -0.46823519468307495, "logits/chosen": 0.1174880787730217, "logits/rejected": -1.0741727352142334, "logps/chosen": -1.5681180953979492, "logps/rejected": -2.0108184814453125, "loss": 1.7117, "nll_loss": 1.6648805141448975, "rewards/accuracies": 1.0, "rewards/chosen": -0.15681181848049164, "rewards/margins": 0.04427003860473633, "rewards/rejected": -0.20108187198638916, "step": 294 }, { "epoch": 0.4649330181245075, "grad_norm": 0.26307472586631775, "learning_rate": 3.7929850386307965e-06, "log_odds_chosen": 0.32403603196144104, "log_odds_ratio": -0.549573540687561, "logits/chosen": 0.2209596186876297, "logits/rejected": -0.8648887872695923, "logps/chosen": -1.775384783744812, "logps/rejected": -2.0520851612091064, "loss": 1.8883, "nll_loss": 1.8333498239517212, "rewards/accuracies": 0.875, "rewards/chosen": -0.17753848433494568, "rewards/margins": 0.02767005003988743, "rewards/rejected": -0.20520853996276855, "step": 295 }, { "epoch": 0.4665090622537431, "grad_norm": 0.24105559289455414, "learning_rate": 3.790542730204245e-06, "log_odds_chosen": 0.391379177570343, "log_odds_ratio": -0.5190234184265137, "logits/chosen": 0.17468759417533875, "logits/rejected": -1.050643801689148, "logps/chosen": -1.705775260925293, "logps/rejected": -2.037628412246704, "loss": 1.8141, "nll_loss": 1.7621614933013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.1705775409936905, "rewards/margins": 0.033185333013534546, "rewards/rejected": -0.20376285910606384, "step": 296 }, { "epoch": 0.46808510638297873, "grad_norm": 0.2376202493906021, "learning_rate": 3.7880868950825935e-06, "log_odds_chosen": 0.40684062242507935, "log_odds_ratio": -0.510847806930542, "logits/chosen": 0.13849994540214539, "logits/rejected": -1.200305700302124, "logps/chosen": -1.6610805988311768, "logps/rejected": -2.001574754714966, "loss": 1.7604, "nll_loss": 1.7093130350112915, "rewards/accuracies": 1.0, "rewards/chosen": -0.16610805690288544, "rewards/margins": 0.034049421548843384, "rewards/rejected": -0.20015747845172882, "step": 297 }, { "epoch": 0.46966115051221435, "grad_norm": 0.2616525888442993, "learning_rate": 3.7856175518185058e-06, "log_odds_chosen": 0.3999539613723755, "log_odds_ratio": -0.5190368294715881, "logits/chosen": 0.015357280150055885, "logits/rejected": -1.1991338729858398, "logps/chosen": -1.6416277885437012, "logps/rejected": -1.9745447635650635, "loss": 1.7682, "nll_loss": 1.7162597179412842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1641627848148346, "rewards/margins": 0.033291686326265335, "rewards/rejected": -0.19745448231697083, "step": 298 }, { "epoch": 0.47123719464145, "grad_norm": 0.23752158880233765, "learning_rate": 3.7831347190666883e-06, "log_odds_chosen": 0.4781672954559326, "log_odds_ratio": -0.488000750541687, "logits/chosen": 0.08399657905101776, "logits/rejected": -1.1212432384490967, "logps/chosen": -1.6346526145935059, "logps/rejected": -2.035963773727417, "loss": 1.7627, "nll_loss": 1.713881015777588, "rewards/accuracies": 0.875, "rewards/chosen": -0.16346527636051178, "rewards/margins": 0.0401310995221138, "rewards/rejected": -0.20359636843204498, "step": 299 }, { "epoch": 0.4728132387706856, "grad_norm": 0.25213325023651123, "learning_rate": 3.780638415583759e-06, "log_odds_chosen": 0.2852388620376587, "log_odds_ratio": -0.5705251097679138, "logits/chosen": 0.21283775568008423, "logits/rejected": -0.935249924659729, "logps/chosen": -1.6187589168548584, "logps/rejected": -1.8579552173614502, "loss": 1.7571, "nll_loss": 1.7000482082366943, "rewards/accuracies": 0.75, "rewards/chosen": -0.1618758887052536, "rewards/margins": 0.023919638246297836, "rewards/rejected": -0.18579553067684174, "step": 300 }, { "epoch": 0.4743892828999212, "grad_norm": 0.24251310527324677, "learning_rate": 3.7781286602280967e-06, "log_odds_chosen": 0.17071868479251862, "log_odds_ratio": -0.6216680407524109, "logits/chosen": 0.14868459105491638, "logits/rejected": -1.1415449380874634, "logps/chosen": -1.5997627973556519, "logps/rejected": -1.7335293292999268, "loss": 1.7454, "nll_loss": 1.6831833124160767, "rewards/accuracies": 0.75, "rewards/chosen": -0.1599762886762619, "rewards/margins": 0.013376658782362938, "rewards/rejected": -0.1733529418706894, "step": 301 }, { "epoch": 0.47596532702915684, "grad_norm": 0.22967545688152313, "learning_rate": 3.7756054719597044e-06, "log_odds_chosen": 0.26898688077926636, "log_odds_ratio": -0.5931567549705505, "logits/chosen": 0.033872295171022415, "logits/rejected": -1.1282514333724976, "logps/chosen": -1.6345136165618896, "logps/rejected": -1.8664180040359497, "loss": 1.7516, "nll_loss": 1.6922358274459839, "rewards/accuracies": 0.75, "rewards/chosen": -0.16345134377479553, "rewards/margins": 0.023190462961792946, "rewards/rejected": -0.18664182722568512, "step": 302 }, { "epoch": 0.47754137115839246, "grad_norm": 0.2190561145544052, "learning_rate": 3.773068869840066e-06, "log_odds_chosen": 0.19828103482723236, "log_odds_ratio": -0.6074354648590088, "logits/chosen": 0.2555277347564697, "logits/rejected": -1.1666932106018066, "logps/chosen": -1.6585826873779297, "logps/rejected": -1.8199939727783203, "loss": 1.7779, "nll_loss": 1.7171512842178345, "rewards/accuracies": 0.75, "rewards/chosen": -0.16585825383663177, "rewards/margins": 0.016141142696142197, "rewards/rejected": -0.18199938535690308, "step": 303 }, { "epoch": 0.4791174152876281, "grad_norm": 0.21842867136001587, "learning_rate": 3.770518873031997e-06, "log_odds_chosen": 0.44869300723075867, "log_odds_ratio": -0.4964669644832611, "logits/chosen": 0.03254036605358124, "logits/rejected": -0.8346494436264038, "logps/chosen": -1.5328748226165771, "logps/rejected": -1.8997446298599243, "loss": 1.674, "nll_loss": 1.6243867874145508, "rewards/accuracies": 1.0, "rewards/chosen": -0.15328750014305115, "rewards/margins": 0.03668695688247681, "rewards/rejected": -0.18997445702552795, "step": 304 }, { "epoch": 0.4806934594168637, "grad_norm": 0.253165602684021, "learning_rate": 3.7679555007995065e-06, "log_odds_chosen": 0.41193148493766785, "log_odds_ratio": -0.5260132551193237, "logits/chosen": 0.14702853560447693, "logits/rejected": -1.0447622537612915, "logps/chosen": -1.6558337211608887, "logps/rejected": -2.0012173652648926, "loss": 1.7843, "nll_loss": 1.731735110282898, "rewards/accuracies": 0.875, "rewards/chosen": -0.16558335721492767, "rewards/margins": 0.0345383882522583, "rewards/rejected": -0.20012176036834717, "step": 305 }, { "epoch": 0.48226950354609927, "grad_norm": 0.24374498426914215, "learning_rate": 3.7653787725076464e-06, "log_odds_chosen": 0.2701460123062134, "log_odds_ratio": -0.5720412731170654, "logits/chosen": 0.1489667445421219, "logits/rejected": -0.9628247618675232, "logps/chosen": -1.6143256425857544, "logps/rejected": -1.8385647535324097, "loss": 1.7374, "nll_loss": 1.680199146270752, "rewards/accuracies": 0.875, "rewards/chosen": -0.16143256425857544, "rewards/margins": 0.022423917427659035, "rewards/rejected": -0.18385647237300873, "step": 306 }, { "epoch": 0.4838455476753349, "grad_norm": 0.2575761675834656, "learning_rate": 3.7627887076223685e-06, "log_odds_chosen": 0.3698280155658722, "log_odds_ratio": -0.5293493270874023, "logits/chosen": 0.17162802815437317, "logits/rejected": -0.7795068621635437, "logps/chosen": -1.6772853136062622, "logps/rejected": -1.9871926307678223, "loss": 1.783, "nll_loss": 1.7300152778625488, "rewards/accuracies": 1.0, "rewards/chosen": -0.16772854328155518, "rewards/margins": 0.03099072352051735, "rewards/rejected": -0.19871927797794342, "step": 307 }, { "epoch": 0.4854215918045705, "grad_norm": 0.2139917016029358, "learning_rate": 3.7601853257103765e-06, "log_odds_chosen": 0.22644855082035065, "log_odds_ratio": -0.5928743481636047, "logits/chosen": 0.06793497502803802, "logits/rejected": -1.0903844833374023, "logps/chosen": -1.582783579826355, "logps/rejected": -1.7714552879333496, "loss": 1.6918, "nll_loss": 1.632529616355896, "rewards/accuracies": 0.875, "rewards/chosen": -0.15827836096286774, "rewards/margins": 0.018867187201976776, "rewards/rejected": -0.17714554071426392, "step": 308 }, { "epoch": 0.48699763593380613, "grad_norm": 0.21651345491409302, "learning_rate": 3.7575686464389767e-06, "log_odds_chosen": 0.3462998867034912, "log_odds_ratio": -0.5444170832633972, "logits/chosen": 0.10276569426059723, "logits/rejected": -1.1056041717529297, "logps/chosen": -1.5598326921463013, "logps/rejected": -1.8447059392929077, "loss": 1.6921, "nll_loss": 1.6376224756240845, "rewards/accuracies": 0.875, "rewards/chosen": -0.15598325431346893, "rewards/margins": 0.02848733589053154, "rewards/rejected": -0.18447057902812958, "step": 309 }, { "epoch": 0.48857368006304175, "grad_norm": 0.2089070826768875, "learning_rate": 3.7549386895759315e-06, "log_odds_chosen": 0.38229963183403015, "log_odds_ratio": -0.5227848291397095, "logits/chosen": 0.06746693700551987, "logits/rejected": -1.028963327407837, "logps/chosen": -1.5149016380310059, "logps/rejected": -1.8246971368789673, "loss": 1.6366, "nll_loss": 1.5843473672866821, "rewards/accuracies": 1.0, "rewards/chosen": -0.15149016678333282, "rewards/margins": 0.030979545786976814, "rewards/rejected": -0.1824697107076645, "step": 310 }, { "epoch": 0.49014972419227737, "grad_norm": 0.20956042408943176, "learning_rate": 3.7522954749893086e-06, "log_odds_chosen": 0.5883792042732239, "log_odds_ratio": -0.45304739475250244, "logits/chosen": -0.004524541087448597, "logits/rejected": -1.15907621383667, "logps/chosen": -1.5441385507583618, "logps/rejected": -2.0315380096435547, "loss": 1.6764, "nll_loss": 1.631089448928833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544138640165329, "rewards/margins": 0.048739951103925705, "rewards/rejected": -0.2031538188457489, "step": 311 }, { "epoch": 0.491725768321513, "grad_norm": 0.23161104321479797, "learning_rate": 3.749639022647332e-06, "log_odds_chosen": 0.35890865325927734, "log_odds_ratio": -0.5340113639831543, "logits/chosen": 0.06597714126110077, "logits/rejected": -1.0388400554656982, "logps/chosen": -1.6328809261322021, "logps/rejected": -1.932662010192871, "loss": 1.7561, "nll_loss": 1.7027454376220703, "rewards/accuracies": 1.0, "rewards/chosen": -0.16328811645507812, "rewards/margins": 0.02997809275984764, "rewards/rejected": -0.19326619803905487, "step": 312 }, { "epoch": 0.4933018124507486, "grad_norm": 0.20770005881786346, "learning_rate": 3.7469693526182304e-06, "log_odds_chosen": 0.513086199760437, "log_odds_ratio": -0.485725075006485, "logits/chosen": 0.0205635167658329, "logits/rejected": -1.4314470291137695, "logps/chosen": -1.581476092338562, "logps/rejected": -2.0070858001708984, "loss": 1.6904, "nll_loss": 1.6417973041534424, "rewards/accuracies": 0.875, "rewards/chosen": -0.15814761817455292, "rewards/margins": 0.04256095737218857, "rewards/rejected": -0.2007085680961609, "step": 313 }, { "epoch": 0.49487785657998423, "grad_norm": 0.21436423063278198, "learning_rate": 3.744286485070085e-06, "log_odds_chosen": 0.5086329579353333, "log_odds_ratio": -0.47319746017456055, "logits/chosen": 0.03913354501128197, "logits/rejected": -1.1685231924057007, "logps/chosen": -1.5282073020935059, "logps/rejected": -1.9466618299484253, "loss": 1.6421, "nll_loss": 1.594788908958435, "rewards/accuracies": 1.0, "rewards/chosen": -0.15282073616981506, "rewards/margins": 0.041845470666885376, "rewards/rejected": -0.19466620683670044, "step": 314 }, { "epoch": 0.49645390070921985, "grad_norm": 0.23891815543174744, "learning_rate": 3.7415904402706795e-06, "log_odds_chosen": 0.4192996621131897, "log_odds_ratio": -0.514187216758728, "logits/chosen": 0.06727111339569092, "logits/rejected": -1.1843537092208862, "logps/chosen": -1.7258471250534058, "logps/rejected": -2.082641363143921, "loss": 1.835, "nll_loss": 1.7836283445358276, "rewards/accuracies": 1.0, "rewards/chosen": -0.17258471250534058, "rewards/margins": 0.03567943722009659, "rewards/rejected": -0.20826414227485657, "step": 315 }, { "epoch": 0.4980299448384555, "grad_norm": 0.21085584163665771, "learning_rate": 3.7388812385873435e-06, "log_odds_chosen": 0.3644852042198181, "log_odds_ratio": -0.5296192169189453, "logits/chosen": 0.030755888670682907, "logits/rejected": -1.189257264137268, "logps/chosen": -1.5624669790267944, "logps/rejected": -1.8610371351242065, "loss": 1.6768, "nll_loss": 1.6238601207733154, "rewards/accuracies": 1.0, "rewards/chosen": -0.15624670684337616, "rewards/margins": 0.02985702082514763, "rewards/rejected": -0.1861037313938141, "step": 316 }, { "epoch": 0.4996059889676911, "grad_norm": 0.23275373876094818, "learning_rate": 3.7361589004868033e-06, "log_odds_chosen": 0.5013652443885803, "log_odds_ratio": -0.479345440864563, "logits/chosen": 0.14082355797290802, "logits/rejected": -1.2382307052612305, "logps/chosen": -1.6419178247451782, "logps/rejected": -2.0661187171936035, "loss": 1.7418, "nll_loss": 1.6938456296920776, "rewards/accuracies": 1.0, "rewards/chosen": -0.16419179737567902, "rewards/margins": 0.042420096695423126, "rewards/rejected": -0.20661188662052155, "step": 317 }, { "epoch": 0.5011820330969267, "grad_norm": 0.2070867121219635, "learning_rate": 3.733423446535022e-06, "log_odds_chosen": 0.5261310338973999, "log_odds_ratio": -0.47375503182411194, "logits/chosen": 0.12559077143669128, "logits/rejected": -1.3008148670196533, "logps/chosen": -1.634326457977295, "logps/rejected": -2.0824835300445557, "loss": 1.7359, "nll_loss": 1.688564658164978, "rewards/accuracies": 1.0, "rewards/chosen": -0.16343267261981964, "rewards/margins": 0.044815681874752045, "rewards/rejected": -0.20824836194515228, "step": 318 }, { "epoch": 0.5027580772261623, "grad_norm": 0.20222921669483185, "learning_rate": 3.7306748973970476e-06, "log_odds_chosen": 0.39474886655807495, "log_odds_ratio": -0.5250571370124817, "logits/chosen": 0.03667742758989334, "logits/rejected": -1.4291538000106812, "logps/chosen": -1.5048575401306152, "logps/rejected": -1.823891282081604, "loss": 1.6483, "nll_loss": 1.5958224534988403, "rewards/accuracies": 0.875, "rewards/chosen": -0.15048575401306152, "rewards/margins": 0.03190337494015694, "rewards/rejected": -0.18238912522792816, "step": 319 }, { "epoch": 0.5043341213553979, "grad_norm": 0.27620604634284973, "learning_rate": 3.7279132738368564e-06, "log_odds_chosen": 0.18474048376083374, "log_odds_ratio": -0.6092777252197266, "logits/chosen": 0.1035664826631546, "logits/rejected": -1.2585889101028442, "logps/chosen": -1.7019753456115723, "logps/rejected": -1.8543686866760254, "loss": 1.8124, "nll_loss": 1.7514902353286743, "rewards/accuracies": 0.75, "rewards/chosen": -0.17019754648208618, "rewards/margins": 0.015239320695400238, "rewards/rejected": -0.18543685972690582, "step": 320 }, { "epoch": 0.5059101654846335, "grad_norm": 0.21710968017578125, "learning_rate": 3.725138596717195e-06, "log_odds_chosen": 0.44385021924972534, "log_odds_ratio": -0.4991372227668762, "logits/chosen": 0.023607883602380753, "logits/rejected": -1.283747911453247, "logps/chosen": -1.5559569597244263, "logps/rejected": -1.9218320846557617, "loss": 1.6659, "nll_loss": 1.6159745454788208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15559569001197815, "rewards/margins": 0.03658752888441086, "rewards/rejected": -0.1921832263469696, "step": 321 }, { "epoch": 0.5074862096138691, "grad_norm": 0.20364168286323547, "learning_rate": 3.7223508869994244e-06, "log_odds_chosen": 0.4654800295829773, "log_odds_ratio": -0.4937525987625122, "logits/chosen": 0.04612987861037254, "logits/rejected": -1.3605108261108398, "logps/chosen": -1.6008849143981934, "logps/rejected": -1.9879066944122314, "loss": 1.7137, "nll_loss": 1.6643142700195312, "rewards/accuracies": 1.0, "rewards/chosen": -0.16008850932121277, "rewards/margins": 0.038702160120010376, "rewards/rejected": -0.19879068434238434, "step": 322 }, { "epoch": 0.5090622537431048, "grad_norm": 0.23145321011543274, "learning_rate": 3.7195501657433594e-06, "log_odds_chosen": 0.39249351620674133, "log_odds_ratio": -0.5205338597297668, "logits/chosen": -0.0046775080263614655, "logits/rejected": -1.3155083656311035, "logps/chosen": -1.6052358150482178, "logps/rejected": -1.9290968179702759, "loss": 1.7238, "nll_loss": 1.6717445850372314, "rewards/accuracies": 1.0, "rewards/chosen": -0.16052357852458954, "rewards/margins": 0.03238610923290253, "rewards/rejected": -0.19290968775749207, "step": 323 }, { "epoch": 0.5106382978723404, "grad_norm": 0.19913478195667267, "learning_rate": 3.716736454107111e-06, "log_odds_chosen": 0.5085100531578064, "log_odds_ratio": -0.47588035464286804, "logits/chosen": 0.002479949500411749, "logits/rejected": -1.0518862009048462, "logps/chosen": -1.5002402067184448, "logps/rejected": -1.9133625030517578, "loss": 1.6117, "nll_loss": 1.5641216039657593, "rewards/accuracies": 1.0, "rewards/chosen": -0.15002401173114777, "rewards/margins": 0.04131225496530533, "rewards/rejected": -0.1913362741470337, "step": 324 }, { "epoch": 0.512214342001576, "grad_norm": 0.21510443091392517, "learning_rate": 3.7139097733469277e-06, "log_odds_chosen": 0.5286773443222046, "log_odds_ratio": -0.46826720237731934, "logits/chosen": 0.02427489310503006, "logits/rejected": -1.2997654676437378, "logps/chosen": -1.54619562625885, "logps/rejected": -1.982952356338501, "loss": 1.6601, "nll_loss": 1.6132692098617554, "rewards/accuracies": 1.0, "rewards/chosen": -0.15461957454681396, "rewards/margins": 0.04367566481232643, "rewards/rejected": -0.1982952207326889, "step": 325 }, { "epoch": 0.5137903861308116, "grad_norm": 0.22013559937477112, "learning_rate": 3.711070144817032e-06, "log_odds_chosen": 0.4152149558067322, "log_odds_ratio": -0.512770414352417, "logits/chosen": 0.005936339497566223, "logits/rejected": -1.122101068496704, "logps/chosen": -1.633888840675354, "logps/rejected": -1.9839468002319336, "loss": 1.754, "nll_loss": 1.7027238607406616, "rewards/accuracies": 1.0, "rewards/chosen": -0.16338886320590973, "rewards/margins": 0.03500579297542572, "rewards/rejected": -0.19839467108249664, "step": 326 }, { "epoch": 0.5153664302600472, "grad_norm": 0.24476923048496246, "learning_rate": 3.708217589969461e-06, "log_odds_chosen": 0.5117456912994385, "log_odds_ratio": -0.4778427481651306, "logits/chosen": -0.02480306476354599, "logits/rejected": -1.3842414617538452, "logps/chosen": -1.6081162691116333, "logps/rejected": -2.0358729362487793, "loss": 1.7307, "nll_loss": 1.682942509651184, "rewards/accuracies": 1.0, "rewards/chosen": -0.160811647772789, "rewards/margins": 0.042775679379701614, "rewards/rejected": -0.20358730852603912, "step": 327 }, { "epoch": 0.5169424743892829, "grad_norm": 0.2023211419582367, "learning_rate": 3.705352130353904e-06, "log_odds_chosen": 0.4642333984375, "log_odds_ratio": -0.4990030825138092, "logits/chosen": 0.04158155620098114, "logits/rejected": -1.2979916334152222, "logps/chosen": -1.5249441862106323, "logps/rejected": -1.9054869413375854, "loss": 1.6543, "nll_loss": 1.6043576002120972, "rewards/accuracies": 1.0, "rewards/chosen": -0.152494415640831, "rewards/margins": 0.038054272532463074, "rewards/rejected": -0.19054868817329407, "step": 328 }, { "epoch": 0.5185185185185185, "grad_norm": 0.2157369703054428, "learning_rate": 3.7024737876175404e-06, "log_odds_chosen": 0.4267793893814087, "log_odds_ratio": -0.5041587352752686, "logits/chosen": -0.02120812237262726, "logits/rejected": -1.1328731775283813, "logps/chosen": -1.5772178173065186, "logps/rejected": -1.9269858598709106, "loss": 1.683, "nll_loss": 1.6326076984405518, "rewards/accuracies": 1.0, "rewards/chosen": -0.15772177278995514, "rewards/margins": 0.03497680649161339, "rewards/rejected": -0.19269859790802002, "step": 329 }, { "epoch": 0.5200945626477541, "grad_norm": 0.22613677382469177, "learning_rate": 3.699582583504874e-06, "log_odds_chosen": 0.2962914705276489, "log_odds_ratio": -0.5632017254829407, "logits/chosen": 0.033639900386333466, "logits/rejected": -1.3186163902282715, "logps/chosen": -1.7170500755310059, "logps/rejected": -1.966399908065796, "loss": 1.815, "nll_loss": 1.7586567401885986, "rewards/accuracies": 0.875, "rewards/chosen": -0.17170500755310059, "rewards/margins": 0.024934988468885422, "rewards/rejected": -0.1966399997472763, "step": 330 }, { "epoch": 0.5216706067769897, "grad_norm": 0.20476558804512024, "learning_rate": 3.696678539857571e-06, "log_odds_chosen": 0.40065276622772217, "log_odds_ratio": -0.5229502320289612, "logits/chosen": -0.00485864095389843, "logits/rejected": -1.3043968677520752, "logps/chosen": -1.6962597370147705, "logps/rejected": -2.0372161865234375, "loss": 1.8021, "nll_loss": 1.7498358488082886, "rewards/accuracies": 0.875, "rewards/chosen": -0.16962596774101257, "rewards/margins": 0.034095652401447296, "rewards/rejected": -0.20372162759304047, "step": 331 }, { "epoch": 0.5232466509062254, "grad_norm": 0.20712663233280182, "learning_rate": 3.6937616786142956e-06, "log_odds_chosen": 0.5233447551727295, "log_odds_ratio": -0.47018271684646606, "logits/chosen": 0.010211546905338764, "logits/rejected": -1.0574318170547485, "logps/chosen": -1.5134122371673584, "logps/rejected": -1.9436025619506836, "loss": 1.6422, "nll_loss": 1.595203161239624, "rewards/accuracies": 1.0, "rewards/chosen": -0.15134122967720032, "rewards/margins": 0.04301903396844864, "rewards/rejected": -0.19436024129390717, "step": 332 }, { "epoch": 0.524822695035461, "grad_norm": 0.18907472491264343, "learning_rate": 3.6908320218105393e-06, "log_odds_chosen": 0.3291381895542145, "log_odds_ratio": -0.5515700578689575, "logits/chosen": -0.01022535003721714, "logits/rejected": -1.2733728885650635, "logps/chosen": -1.5107743740081787, "logps/rejected": -1.7811356782913208, "loss": 1.6236, "nll_loss": 1.5684043169021606, "rewards/accuracies": 0.875, "rewards/chosen": -0.15107741951942444, "rewards/margins": 0.027036139741539955, "rewards/rejected": -0.17811356484889984, "step": 333 }, { "epoch": 0.5263987391646966, "grad_norm": 0.19910985231399536, "learning_rate": 3.6878895915784607e-06, "log_odds_chosen": 0.48929572105407715, "log_odds_ratio": -0.483676016330719, "logits/chosen": 0.03388974070549011, "logits/rejected": -1.230672836303711, "logps/chosen": -1.4776809215545654, "logps/rejected": -1.8740699291229248, "loss": 1.5862, "nll_loss": 1.5378473997116089, "rewards/accuracies": 1.0, "rewards/chosen": -0.14776809513568878, "rewards/margins": 0.03963891416788101, "rewards/rejected": -0.187406986951828, "step": 334 }, { "epoch": 0.5279747832939322, "grad_norm": 0.19112317264080048, "learning_rate": 3.6849344101467147e-06, "log_odds_chosen": 0.3993302285671234, "log_odds_ratio": -0.5199868679046631, "logits/chosen": 0.02075035311281681, "logits/rejected": -1.4165470600128174, "logps/chosen": -1.5604709386825562, "logps/rejected": -1.890925407409668, "loss": 1.6795, "nll_loss": 1.6274938583374023, "rewards/accuracies": 0.875, "rewards/chosen": -0.15604707598686218, "rewards/margins": 0.03304546698927879, "rewards/rejected": -0.18909254670143127, "step": 335 }, { "epoch": 0.5295508274231678, "grad_norm": 0.2065410017967224, "learning_rate": 3.6819664998402857e-06, "log_odds_chosen": 0.3870427906513214, "log_odds_ratio": -0.521834671497345, "logits/chosen": 0.007402241230010986, "logits/rejected": -1.2406339645385742, "logps/chosen": -1.597013235092163, "logps/rejected": -1.9189107418060303, "loss": 1.7091, "nll_loss": 1.65691339969635, "rewards/accuracies": 1.0, "rewards/chosen": -0.15970134735107422, "rewards/margins": 0.032189756631851196, "rewards/rejected": -0.19189107418060303, "step": 336 }, { "epoch": 0.5311268715524035, "grad_norm": 0.22015894949436188, "learning_rate": 3.6789858830803186e-06, "log_odds_chosen": 0.4236854314804077, "log_odds_ratio": -0.5088356733322144, "logits/chosen": 0.009572651237249374, "logits/rejected": -1.2554011344909668, "logps/chosen": -1.649095058441162, "logps/rejected": -2.0025815963745117, "loss": 1.7505, "nll_loss": 1.6996192932128906, "rewards/accuracies": 1.0, "rewards/chosen": -0.16490954160690308, "rewards/margins": 0.035348646342754364, "rewards/rejected": -0.20025816559791565, "step": 337 }, { "epoch": 0.5327029156816391, "grad_norm": 0.20396317541599274, "learning_rate": 3.6759925823839486e-06, "log_odds_chosen": 0.3307921886444092, "log_odds_ratio": -0.5461194515228271, "logits/chosen": -0.017404936254024506, "logits/rejected": -1.1315640211105347, "logps/chosen": -1.5301204919815063, "logps/rejected": -1.7987494468688965, "loss": 1.6634, "nll_loss": 1.6087586879730225, "rewards/accuracies": 1.0, "rewards/chosen": -0.15301203727722168, "rewards/margins": 0.02686290442943573, "rewards/rejected": -0.1798749417066574, "step": 338 }, { "epoch": 0.5342789598108747, "grad_norm": 0.19404453039169312, "learning_rate": 3.672986620364134e-06, "log_odds_chosen": 0.4503750205039978, "log_odds_ratio": -0.49645406007766724, "logits/chosen": 0.02146240696310997, "logits/rejected": -1.14137601852417, "logps/chosen": -1.6056911945343018, "logps/rejected": -1.9799296855926514, "loss": 1.7114, "nll_loss": 1.6618030071258545, "rewards/accuracies": 1.0, "rewards/chosen": -0.16056913137435913, "rewards/margins": 0.03742384910583496, "rewards/rejected": -0.1979929804801941, "step": 339 }, { "epoch": 0.5358550039401103, "grad_norm": 0.19405515491962433, "learning_rate": 3.669968019729481e-06, "log_odds_chosen": 0.5453786849975586, "log_odds_ratio": -0.46696317195892334, "logits/chosen": -0.03269782289862633, "logits/rejected": -1.4044153690338135, "logps/chosen": -1.4841482639312744, "logps/rejected": -1.9323790073394775, "loss": 1.5994, "nll_loss": 1.5526580810546875, "rewards/accuracies": 1.0, "rewards/chosen": -0.14841482043266296, "rewards/margins": 0.04482308030128479, "rewards/rejected": -0.19323790073394775, "step": 340 }, { "epoch": 0.5374310480693459, "grad_norm": 0.20472969114780426, "learning_rate": 3.666936803284076e-06, "log_odds_chosen": 0.47615405917167664, "log_odds_ratio": -0.4855659306049347, "logits/chosen": 0.01596236228942871, "logits/rejected": -1.2273820638656616, "logps/chosen": -1.6264029741287231, "logps/rejected": -2.0267691612243652, "loss": 1.7196, "nll_loss": 1.6710734367370605, "rewards/accuracies": 1.0, "rewards/chosen": -0.1626402884721756, "rewards/margins": 0.0400366336107254, "rewards/rejected": -0.202676922082901, "step": 341 }, { "epoch": 0.5390070921985816, "grad_norm": 0.21434363722801208, "learning_rate": 3.663892993927312e-06, "log_odds_chosen": 0.5617402791976929, "log_odds_ratio": -0.4550952911376953, "logits/chosen": -0.059917159378528595, "logits/rejected": -1.359694004058838, "logps/chosen": -1.575050950050354, "logps/rejected": -2.043088674545288, "loss": 1.6888, "nll_loss": 1.643282175064087, "rewards/accuracies": 1.0, "rewards/chosen": -0.1575051099061966, "rewards/margins": 0.046803757548332214, "rewards/rejected": -0.20430885255336761, "step": 342 }, { "epoch": 0.5405831363278172, "grad_norm": 0.20112687349319458, "learning_rate": 3.6608366146537136e-06, "log_odds_chosen": 0.6060886383056641, "log_odds_ratio": -0.45338305830955505, "logits/chosen": -0.08892233669757843, "logits/rejected": -1.231791377067566, "logps/chosen": -1.5024844408035278, "logps/rejected": -2.009472608566284, "loss": 1.6204, "nll_loss": 1.575110912322998, "rewards/accuracies": 1.0, "rewards/chosen": -0.1502484530210495, "rewards/margins": 0.05069882422685623, "rewards/rejected": -0.20094728469848633, "step": 343 }, { "epoch": 0.5421591804570528, "grad_norm": 0.20276731252670288, "learning_rate": 3.6577676885527674e-06, "log_odds_chosen": 0.47172704339027405, "log_odds_ratio": -0.4901059567928314, "logits/chosen": -0.064692422747612, "logits/rejected": -1.493216633796692, "logps/chosen": -1.5645720958709717, "logps/rejected": -1.954676866531372, "loss": 1.664, "nll_loss": 1.6149814128875732, "rewards/accuracies": 1.0, "rewards/chosen": -0.15645720064640045, "rewards/margins": 0.03901049122214317, "rewards/rejected": -0.19546771049499512, "step": 344 }, { "epoch": 0.5437352245862884, "grad_norm": 0.19876694679260254, "learning_rate": 3.654686238808744e-06, "log_odds_chosen": 0.4601633548736572, "log_odds_ratio": -0.49714383482933044, "logits/chosen": -0.09634008258581161, "logits/rejected": -1.3992743492126465, "logps/chosen": -1.657270908355713, "logps/rejected": -2.0440926551818848, "loss": 1.7581, "nll_loss": 1.7084193229675293, "rewards/accuracies": 1.0, "rewards/chosen": -0.16572707891464233, "rewards/margins": 0.03868217021226883, "rewards/rejected": -0.20440925657749176, "step": 345 }, { "epoch": 0.545311268715524, "grad_norm": 0.18044152855873108, "learning_rate": 3.6515922887005245e-06, "log_odds_chosen": 0.6581941843032837, "log_odds_ratio": -0.42069223523139954, "logits/chosen": -0.19447211921215057, "logits/rejected": -1.4484854936599731, "logps/chosen": -1.4948885440826416, "logps/rejected": -2.034649133682251, "loss": 1.6052, "nll_loss": 1.563119888305664, "rewards/accuracies": 1.0, "rewards/chosen": -0.14948883652687073, "rewards/margins": 0.053976062685251236, "rewards/rejected": -0.20346491038799286, "step": 346 }, { "epoch": 0.5468873128447597, "grad_norm": 0.1821555495262146, "learning_rate": 3.6484858616014236e-06, "log_odds_chosen": 0.29319724440574646, "log_odds_ratio": -0.5593742728233337, "logits/chosen": -0.02489875629544258, "logits/rejected": -1.4301215410232544, "logps/chosen": -1.6183066368103027, "logps/rejected": -1.8595659732818604, "loss": 1.7166, "nll_loss": 1.6606316566467285, "rewards/accuracies": 1.0, "rewards/chosen": -0.16183066368103027, "rewards/margins": 0.024125942960381508, "rewards/rejected": -0.18595659732818604, "step": 347 }, { "epoch": 0.5484633569739953, "grad_norm": 0.20152583718299866, "learning_rate": 3.6453669809790154e-06, "log_odds_chosen": 0.34444230794906616, "log_odds_ratio": -0.5457963943481445, "logits/chosen": 0.003659643232822418, "logits/rejected": -1.1108278036117554, "logps/chosen": -1.548649549484253, "logps/rejected": -1.8318045139312744, "loss": 1.6556, "nll_loss": 1.6010490655899048, "rewards/accuracies": 1.0, "rewards/chosen": -0.15486496686935425, "rewards/margins": 0.02831549569964409, "rewards/rejected": -0.18318045139312744, "step": 348 }, { "epoch": 0.5500394011032309, "grad_norm": 0.19325992465019226, "learning_rate": 3.642235670394952e-06, "log_odds_chosen": 0.43656274676322937, "log_odds_ratio": -0.5032880902290344, "logits/chosen": -0.02493971772491932, "logits/rejected": -1.0908689498901367, "logps/chosen": -1.5585966110229492, "logps/rejected": -1.9134851694107056, "loss": 1.6744, "nll_loss": 1.6240770816802979, "rewards/accuracies": 1.0, "rewards/chosen": -0.15585967898368835, "rewards/margins": 0.035488829016685486, "rewards/rejected": -0.19134849309921265, "step": 349 }, { "epoch": 0.5516154452324665, "grad_norm": 0.19222001731395721, "learning_rate": 3.63909195350479e-06, "log_odds_chosen": 0.41014277935028076, "log_odds_ratio": -0.5183743238449097, "logits/chosen": 0.04076732322573662, "logits/rejected": -1.2510112524032593, "logps/chosen": -1.6457445621490479, "logps/rejected": -1.9860320091247559, "loss": 1.7474, "nll_loss": 1.6955746412277222, "rewards/accuracies": 0.875, "rewards/chosen": -0.16457447409629822, "rewards/margins": 0.03402874246239662, "rewards/rejected": -0.19860321283340454, "step": 350 }, { "epoch": 0.5531914893617021, "grad_norm": 0.19920876622200012, "learning_rate": 3.635935854057809e-06, "log_odds_chosen": 0.3992159962654114, "log_odds_ratio": -0.5221322774887085, "logits/chosen": -0.14892584085464478, "logits/rejected": -0.9987085461616516, "logps/chosen": -1.5974595546722412, "logps/rejected": -1.9293639659881592, "loss": 1.7044, "nll_loss": 1.6521823406219482, "rewards/accuracies": 0.875, "rewards/chosen": -0.1597459614276886, "rewards/margins": 0.03319043666124344, "rewards/rejected": -0.19293639063835144, "step": 351 }, { "epoch": 0.5547675334909378, "grad_norm": 0.19184619188308716, "learning_rate": 3.6327673958968327e-06, "log_odds_chosen": 0.5156201720237732, "log_odds_ratio": -0.4716494679450989, "logits/chosen": -0.10240821540355682, "logits/rejected": -1.2988749742507935, "logps/chosen": -1.4805288314819336, "logps/rejected": -1.8984272480010986, "loss": 1.6073, "nll_loss": 1.5601266622543335, "rewards/accuracies": 1.0, "rewards/chosen": -0.1480528861284256, "rewards/margins": 0.04178984463214874, "rewards/rejected": -0.18984271585941315, "step": 352 }, { "epoch": 0.5563435776201734, "grad_norm": 0.17695419490337372, "learning_rate": 3.6295866029580483e-06, "log_odds_chosen": 0.49165239930152893, "log_odds_ratio": -0.4805663824081421, "logits/chosen": -0.09917198866605759, "logits/rejected": -1.3583862781524658, "logps/chosen": -1.5266773700714111, "logps/rejected": -1.9281859397888184, "loss": 1.623, "nll_loss": 1.5749820470809937, "rewards/accuracies": 1.0, "rewards/chosen": -0.15266773104667664, "rewards/margins": 0.04015086218714714, "rewards/rejected": -0.19281861186027527, "step": 353 }, { "epoch": 0.557919621749409, "grad_norm": 0.18373258411884308, "learning_rate": 3.626393499270829e-06, "log_odds_chosen": 0.45253658294677734, "log_odds_ratio": -0.5001296997070312, "logits/chosen": -0.09345138818025589, "logits/rejected": -1.326229453086853, "logps/chosen": -1.4635684490203857, "logps/rejected": -1.8293596506118774, "loss": 1.5919, "nll_loss": 1.541857361793518, "rewards/accuracies": 1.0, "rewards/chosen": -0.14635683596134186, "rewards/margins": 0.03657911717891693, "rewards/rejected": -0.18293596804141998, "step": 354 }, { "epoch": 0.5594956658786446, "grad_norm": 0.19679243862628937, "learning_rate": 3.6231881089575466e-06, "log_odds_chosen": 0.6074280142784119, "log_odds_ratio": -0.43782979249954224, "logits/chosen": -0.16901959478855133, "logits/rejected": -1.4631741046905518, "logps/chosen": -1.5110644102096558, "logps/rejected": -2.0135817527770996, "loss": 1.5938, "nll_loss": 1.550012230873108, "rewards/accuracies": 1.0, "rewards/chosen": -0.15110644698143005, "rewards/margins": 0.05025171861052513, "rewards/rejected": -0.2013581544160843, "step": 355 }, { "epoch": 0.5610717100078803, "grad_norm": 0.2090955525636673, "learning_rate": 3.6199704562333945e-06, "log_odds_chosen": 0.4590110778808594, "log_odds_ratio": -0.4973413050174713, "logits/chosen": -0.13931547105312347, "logits/rejected": -1.3538612127304077, "logps/chosen": -1.4821869134902954, "logps/rejected": -1.8512073755264282, "loss": 1.5913, "nll_loss": 1.5415163040161133, "rewards/accuracies": 1.0, "rewards/chosen": -0.14821870625019073, "rewards/margins": 0.03690203279256821, "rewards/rejected": -0.18512074649333954, "step": 356 }, { "epoch": 0.5626477541371159, "grad_norm": 0.20289309322834015, "learning_rate": 3.6167405654062024e-06, "log_odds_chosen": 0.4794601798057556, "log_odds_ratio": -0.49567416310310364, "logits/chosen": -0.07716728746891022, "logits/rejected": -1.2874313592910767, "logps/chosen": -1.541725516319275, "logps/rejected": -1.9342597723007202, "loss": 1.6395, "nll_loss": 1.589914321899414, "rewards/accuracies": 1.0, "rewards/chosen": -0.15417256951332092, "rewards/margins": 0.03925342112779617, "rewards/rejected": -0.1934259682893753, "step": 357 }, { "epoch": 0.5642237982663515, "grad_norm": 0.1778980940580368, "learning_rate": 3.6134984608762515e-06, "log_odds_chosen": 0.5081315636634827, "log_odds_ratio": -0.4739688038825989, "logits/chosen": -0.08499579131603241, "logits/rejected": -1.3757938146591187, "logps/chosen": -1.4640223979949951, "logps/rejected": -1.8742594718933105, "loss": 1.5833, "nll_loss": 1.5359253883361816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1464022397994995, "rewards/margins": 0.041023701429367065, "rewards/rejected": -0.18742592632770538, "step": 358 }, { "epoch": 0.5657998423955871, "grad_norm": 0.2062489539384842, "learning_rate": 3.6102441671360945e-06, "log_odds_chosen": 0.4876347780227661, "log_odds_ratio": -0.48857545852661133, "logits/chosen": -0.12350551038980484, "logits/rejected": -1.2674330472946167, "logps/chosen": -1.6090378761291504, "logps/rejected": -2.016418218612671, "loss": 1.7003, "nll_loss": 1.651473879814148, "rewards/accuracies": 1.0, "rewards/chosen": -0.16090378165245056, "rewards/margins": 0.04073803871870041, "rewards/rejected": -0.20164184272289276, "step": 359 }, { "epoch": 0.5673758865248227, "grad_norm": 0.19282633066177368, "learning_rate": 3.6069777087703654e-06, "log_odds_chosen": 0.5007855296134949, "log_odds_ratio": -0.48002350330352783, "logits/chosen": -0.10314866900444031, "logits/rejected": -1.2184945344924927, "logps/chosen": -1.533939242362976, "logps/rejected": -1.9453692436218262, "loss": 1.6501, "nll_loss": 1.602098822593689, "rewards/accuracies": 1.0, "rewards/chosen": -0.1533939242362976, "rewards/margins": 0.041142985224723816, "rewards/rejected": -0.19453692436218262, "step": 360 }, { "epoch": 0.5689519306540584, "grad_norm": 0.1999576985836029, "learning_rate": 3.6036991104555973e-06, "log_odds_chosen": 0.4542830288410187, "log_odds_ratio": -0.5044661164283752, "logits/chosen": -0.08808690309524536, "logits/rejected": -1.3358922004699707, "logps/chosen": -1.6718225479125977, "logps/rejected": -2.0570645332336426, "loss": 1.7592, "nll_loss": 1.7087852954864502, "rewards/accuracies": 0.875, "rewards/chosen": -0.16718226671218872, "rewards/margins": 0.03852420300245285, "rewards/rejected": -0.20570647716522217, "step": 361 }, { "epoch": 0.570527974783294, "grad_norm": 0.19183827936649323, "learning_rate": 3.600408396960034e-06, "log_odds_chosen": 0.5055266618728638, "log_odds_ratio": -0.4799540042877197, "logits/chosen": -0.06735289096832275, "logits/rejected": -0.9921278953552246, "logps/chosen": -1.4769415855407715, "logps/rejected": -1.8809959888458252, "loss": 1.5713, "nll_loss": 1.523301601409912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476941704750061, "rewards/margins": 0.04040544107556343, "rewards/rejected": -0.18809959292411804, "step": 362 }, { "epoch": 0.5721040189125296, "grad_norm": 0.19298037886619568, "learning_rate": 3.5971055931434447e-06, "log_odds_chosen": 0.4434927701950073, "log_odds_ratio": -0.5033714771270752, "logits/chosen": -0.04916198179125786, "logits/rejected": -1.233707308769226, "logps/chosen": -1.5274627208709717, "logps/rejected": -1.8934059143066406, "loss": 1.6346, "nll_loss": 1.5842169523239136, "rewards/accuracies": 0.875, "rewards/chosen": -0.1527462899684906, "rewards/margins": 0.03659432381391525, "rewards/rejected": -0.18934059143066406, "step": 363 }, { "epoch": 0.5736800630417652, "grad_norm": 0.1897989809513092, "learning_rate": 3.5937907239569343e-06, "log_odds_chosen": 0.47198542952537537, "log_odds_ratio": -0.4953606128692627, "logits/chosen": -0.1299748420715332, "logits/rejected": -1.3614236116409302, "logps/chosen": -1.579548954963684, "logps/rejected": -1.9681391716003418, "loss": 1.6868, "nll_loss": 1.6372454166412354, "rewards/accuracies": 0.875, "rewards/chosen": -0.15795490145683289, "rewards/margins": 0.038859013468027115, "rewards/rejected": -0.1968139261007309, "step": 364 }, { "epoch": 0.5752561071710008, "grad_norm": 0.22458185255527496, "learning_rate": 3.5904638144427572e-06, "log_odds_chosen": 0.2747001647949219, "log_odds_ratio": -0.5726035833358765, "logits/chosen": -0.09000087529420853, "logits/rejected": -1.1089969873428345, "logps/chosen": -1.6369290351867676, "logps/rejected": -1.8634740114212036, "loss": 1.7333, "nll_loss": 1.6760823726654053, "rewards/accuracies": 0.75, "rewards/chosen": -0.1636928915977478, "rewards/margins": 0.02265450730919838, "rewards/rejected": -0.18634741008281708, "step": 365 }, { "epoch": 0.5768321513002365, "grad_norm": 0.2086506485939026, "learning_rate": 3.5871248897341246e-06, "log_odds_chosen": 0.5135898590087891, "log_odds_ratio": -0.4752338230609894, "logits/chosen": -0.12942443788051605, "logits/rejected": -1.0795626640319824, "logps/chosen": -1.4760322570800781, "logps/rejected": -1.8992903232574463, "loss": 1.598, "nll_loss": 1.5504556894302368, "rewards/accuracies": 1.0, "rewards/chosen": -0.14760322868824005, "rewards/margins": 0.042325813323259354, "rewards/rejected": -0.1899290531873703, "step": 366 }, { "epoch": 0.578408195429472, "grad_norm": 0.18183429539203644, "learning_rate": 3.5837739750550182e-06, "log_odds_chosen": 0.4922761619091034, "log_odds_ratio": -0.4857975244522095, "logits/chosen": -0.15072286128997803, "logits/rejected": -1.6683162450790405, "logps/chosen": -1.5550795793533325, "logps/rejected": -1.9653193950653076, "loss": 1.6554, "nll_loss": 1.6068187952041626, "rewards/accuracies": 1.0, "rewards/chosen": -0.15550795197486877, "rewards/margins": 0.04102398827672005, "rewards/rejected": -0.19653193652629852, "step": 367 }, { "epoch": 0.5799842395587076, "grad_norm": 0.1786677986383438, "learning_rate": 3.5804110957199977e-06, "log_odds_chosen": 0.5304347276687622, "log_odds_ratio": -0.4684828817844391, "logits/chosen": -0.031741030514240265, "logits/rejected": -1.2201794385910034, "logps/chosen": -1.533761739730835, "logps/rejected": -1.97342848777771, "loss": 1.6311, "nll_loss": 1.584226369857788, "rewards/accuracies": 1.0, "rewards/chosen": -0.15337617695331573, "rewards/margins": 0.04396669566631317, "rewards/rejected": -0.1973428726196289, "step": 368 }, { "epoch": 0.5815602836879432, "grad_norm": 0.19358626008033752, "learning_rate": 3.577036277134011e-06, "log_odds_chosen": 0.6033509373664856, "log_odds_ratio": -0.44030019640922546, "logits/chosen": -0.12189745157957077, "logits/rejected": -1.4489309787750244, "logps/chosen": -1.5189951658248901, "logps/rejected": -2.0174660682678223, "loss": 1.6305, "nll_loss": 1.5864982604980469, "rewards/accuracies": 1.0, "rewards/chosen": -0.1518995314836502, "rewards/margins": 0.04984709620475769, "rewards/rejected": -0.2017466127872467, "step": 369 }, { "epoch": 0.5831363278171788, "grad_norm": 0.20722126960754395, "learning_rate": 3.5736495447922e-06, "log_odds_chosen": 0.38122087717056274, "log_odds_ratio": -0.5253958106040955, "logits/chosen": -0.11854588240385056, "logits/rejected": -1.445725440979004, "logps/chosen": -1.643795371055603, "logps/rejected": -1.960480809211731, "loss": 1.7461, "nll_loss": 1.693605899810791, "rewards/accuracies": 1.0, "rewards/chosen": -0.1643795371055603, "rewards/margins": 0.031668562442064285, "rewards/rejected": -0.19604811072349548, "step": 370 }, { "epoch": 0.5847123719464145, "grad_norm": 0.1782001107931137, "learning_rate": 3.5702509242797096e-06, "log_odds_chosen": 0.7012959718704224, "log_odds_ratio": -0.41842737793922424, "logits/chosen": -0.13191047310829163, "logits/rejected": -1.4372180700302124, "logps/chosen": -1.4734841585159302, "logps/rejected": -2.0532171726226807, "loss": 1.5859, "nll_loss": 1.5440880060195923, "rewards/accuracies": 1.0, "rewards/chosen": -0.14734841883182526, "rewards/margins": 0.05797329545021057, "rewards/rejected": -0.20532171428203583, "step": 371 }, { "epoch": 0.5862884160756501, "grad_norm": 0.189020574092865, "learning_rate": 3.566840441271495e-06, "log_odds_chosen": 0.663593053817749, "log_odds_ratio": -0.43108314275741577, "logits/chosen": -0.09653455764055252, "logits/rejected": -1.4999427795410156, "logps/chosen": -1.4899102449417114, "logps/rejected": -2.0343017578125, "loss": 1.578, "nll_loss": 1.5348646640777588, "rewards/accuracies": 1.0, "rewards/chosen": -0.14899101853370667, "rewards/margins": 0.05443914607167244, "rewards/rejected": -0.2034301608800888, "step": 372 }, { "epoch": 0.5878644602048857, "grad_norm": 0.1943143755197525, "learning_rate": 3.5634181215321265e-06, "log_odds_chosen": 0.6287661790847778, "log_odds_ratio": -0.43060097098350525, "logits/chosen": -0.07774099707603455, "logits/rejected": -1.3174422979354858, "logps/chosen": -1.5436866283416748, "logps/rejected": -2.067762613296509, "loss": 1.6544, "nll_loss": 1.6113276481628418, "rewards/accuracies": 1.0, "rewards/chosen": -0.15436868369579315, "rewards/margins": 0.052407585084438324, "rewards/rejected": -0.20677624642848969, "step": 373 }, { "epoch": 0.5894405043341213, "grad_norm": 0.1871887594461441, "learning_rate": 3.5599839909155947e-06, "log_odds_chosen": 0.49737420678138733, "log_odds_ratio": -0.4843059182167053, "logits/chosen": -0.10240314900875092, "logits/rejected": -1.391071081161499, "logps/chosen": -1.5068795680999756, "logps/rejected": -1.9151724576950073, "loss": 1.6079, "nll_loss": 1.5594788789749146, "rewards/accuracies": 0.875, "rewards/chosen": -0.15068796277046204, "rewards/margins": 0.04082927852869034, "rewards/rejected": -0.19151723384857178, "step": 374 }, { "epoch": 0.5910165484633569, "grad_norm": 0.1926075965166092, "learning_rate": 3.556538075365116e-06, "log_odds_chosen": 0.5647552013397217, "log_odds_ratio": -0.4604189991950989, "logits/chosen": -0.05626612901687622, "logits/rejected": -1.3970637321472168, "logps/chosen": -1.5002104043960571, "logps/rejected": -1.962066411972046, "loss": 1.6008, "nll_loss": 1.5547971725463867, "rewards/accuracies": 1.0, "rewards/chosen": -0.1500210165977478, "rewards/margins": 0.046185605227947235, "rewards/rejected": -0.19620664417743683, "step": 375 }, { "epoch": 0.5925925925925926, "grad_norm": 0.20022441446781158, "learning_rate": 3.5530804009129367e-06, "log_odds_chosen": 0.5052685737609863, "log_odds_ratio": -0.4745987355709076, "logits/chosen": -0.11242527514696121, "logits/rejected": -1.191476583480835, "logps/chosen": -1.5387141704559326, "logps/rejected": -1.9558299779891968, "loss": 1.6266, "nll_loss": 1.5791561603546143, "rewards/accuracies": 1.0, "rewards/chosen": -0.15387141704559326, "rewards/margins": 0.041711580008268356, "rewards/rejected": -0.19558300077915192, "step": 376 }, { "epoch": 0.5941686367218282, "grad_norm": 0.1937384456396103, "learning_rate": 3.5496109936801368e-06, "log_odds_chosen": 0.49315646290779114, "log_odds_ratio": -0.48415568470954895, "logits/chosen": -0.17054055631160736, "logits/rejected": -1.4667962789535522, "logps/chosen": -1.5606714487075806, "logps/rejected": -1.9743766784667969, "loss": 1.6571, "nll_loss": 1.6086863279342651, "rewards/accuracies": 1.0, "rewards/chosen": -0.1560671329498291, "rewards/margins": 0.04137052595615387, "rewards/rejected": -0.19743765890598297, "step": 377 }, { "epoch": 0.5957446808510638, "grad_norm": 0.20795175433158875, "learning_rate": 3.546129879876429e-06, "log_odds_chosen": 0.3628261685371399, "log_odds_ratio": -0.5408048629760742, "logits/chosen": -0.05689922347664833, "logits/rejected": -1.130873203277588, "logps/chosen": -1.6209430694580078, "logps/rejected": -1.9227240085601807, "loss": 1.7149, "nll_loss": 1.6608681678771973, "rewards/accuracies": 0.875, "rewards/chosen": -0.1620943248271942, "rewards/margins": 0.03017808124423027, "rewards/rejected": -0.19227240979671478, "step": 378 }, { "epoch": 0.5973207249802994, "grad_norm": 0.19484388828277588, "learning_rate": 3.5426370857999662e-06, "log_odds_chosen": 0.3801731467247009, "log_odds_ratio": -0.5251167416572571, "logits/chosen": -0.10485132038593292, "logits/rejected": -1.2549301385879517, "logps/chosen": -1.6024380922317505, "logps/rejected": -1.9180117845535278, "loss": 1.7071, "nll_loss": 1.6545681953430176, "rewards/accuracies": 1.0, "rewards/chosen": -0.16024382412433624, "rewards/margins": 0.03155737742781639, "rewards/rejected": -0.19180117547512054, "step": 379 }, { "epoch": 0.598896769109535, "grad_norm": 0.19403113424777985, "learning_rate": 3.53913263783714e-06, "log_odds_chosen": 0.4171554744243622, "log_odds_ratio": -0.5190439820289612, "logits/chosen": -0.12759403884410858, "logits/rejected": -1.392691731452942, "logps/chosen": -1.555301308631897, "logps/rejected": -1.904205083847046, "loss": 1.6481, "nll_loss": 1.5961991548538208, "rewards/accuracies": 0.875, "rewards/chosen": -0.15553012490272522, "rewards/margins": 0.03489039093255997, "rewards/rejected": -0.1904205083847046, "step": 380 }, { "epoch": 0.6004728132387707, "grad_norm": 0.1852397322654724, "learning_rate": 3.53561656246238e-06, "log_odds_chosen": 0.6395785808563232, "log_odds_ratio": -0.42720136046409607, "logits/chosen": -0.2312593162059784, "logits/rejected": -1.6027368307113647, "logps/chosen": -1.5752849578857422, "logps/rejected": -2.1103007793426514, "loss": 1.6683, "nll_loss": 1.6255991458892822, "rewards/accuracies": 1.0, "rewards/chosen": -0.15752847492694855, "rewards/margins": 0.05350159481167793, "rewards/rejected": -0.21103009581565857, "step": 381 }, { "epoch": 0.6020488573680063, "grad_norm": 0.18580475449562073, "learning_rate": 3.532088886237956e-06, "log_odds_chosen": 0.539408802986145, "log_odds_ratio": -0.4628356993198395, "logits/chosen": -0.14560621976852417, "logits/rejected": -1.546494483947754, "logps/chosen": -1.6014196872711182, "logps/rejected": -2.0526225566864014, "loss": 1.6812, "nll_loss": 1.634964942932129, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601419895887375, "rewards/margins": 0.045120254158973694, "rewards/rejected": -0.20526225864887238, "step": 382 }, { "epoch": 0.6036249014972419, "grad_norm": 0.17321372032165527, "learning_rate": 3.528549635813778e-06, "log_odds_chosen": 0.5498687028884888, "log_odds_ratio": -0.46246442198753357, "logits/chosen": -0.18676355481147766, "logits/rejected": -1.3866394758224487, "logps/chosen": -1.4379013776779175, "logps/rejected": -1.884263277053833, "loss": 1.5412, "nll_loss": 1.494981288909912, "rewards/accuracies": 1.0, "rewards/chosen": -0.143790140748024, "rewards/margins": 0.04463617503643036, "rewards/rejected": -0.18842631578445435, "step": 383 }, { "epoch": 0.6052009456264775, "grad_norm": 0.20354455709457397, "learning_rate": 3.524998837927192e-06, "log_odds_chosen": 0.587373673915863, "log_odds_ratio": -0.4441196024417877, "logits/chosen": -0.14221185445785522, "logits/rejected": -1.3197717666625977, "logps/chosen": -1.5544791221618652, "logps/rejected": -2.0435311794281006, "loss": 1.6329, "nll_loss": 1.5884504318237305, "rewards/accuracies": 1.0, "rewards/chosen": -0.15544790029525757, "rewards/margins": 0.04890521243214607, "rewards/rejected": -0.20435310900211334, "step": 384 }, { "epoch": 0.6067769897557131, "grad_norm": 0.1994301825761795, "learning_rate": 3.5214365194027797e-06, "log_odds_chosen": 0.5964666604995728, "log_odds_ratio": -0.44221487641334534, "logits/chosen": -0.15480360388755798, "logits/rejected": -1.4440878629684448, "logps/chosen": -1.4780986309051514, "logps/rejected": -1.9680850505828857, "loss": 1.57, "nll_loss": 1.5257560014724731, "rewards/accuracies": 1.0, "rewards/chosen": -0.14780986309051514, "rewards/margins": 0.048998646438121796, "rewards/rejected": -0.19680851697921753, "step": 385 }, { "epoch": 0.6083530338849488, "grad_norm": 0.21158069372177124, "learning_rate": 3.517862707152157e-06, "log_odds_chosen": 0.45025360584259033, "log_odds_ratio": -0.5009865164756775, "logits/chosen": -0.06984852999448776, "logits/rejected": -1.1637235879898071, "logps/chosen": -1.6143461465835571, "logps/rejected": -1.9897408485412598, "loss": 1.695, "nll_loss": 1.6448723077774048, "rewards/accuracies": 1.0, "rewards/chosen": -0.1614346206188202, "rewards/margins": 0.03753947466611862, "rewards/rejected": -0.1989741027355194, "step": 386 }, { "epoch": 0.6099290780141844, "grad_norm": 0.21827860176563263, "learning_rate": 3.5142774281737674e-06, "log_odds_chosen": 0.6315152645111084, "log_odds_ratio": -0.4288613796234131, "logits/chosen": -0.15267856419086456, "logits/rejected": -1.3205287456512451, "logps/chosen": -1.6036657094955444, "logps/rejected": -2.1343271732330322, "loss": 1.6804, "nll_loss": 1.6375137567520142, "rewards/accuracies": 1.0, "rewards/chosen": -0.16036657989025116, "rewards/margins": 0.053066130727529526, "rewards/rejected": -0.21343271434307098, "step": 387 }, { "epoch": 0.61150512214342, "grad_norm": 0.17510953545570374, "learning_rate": 3.5106807095526817e-06, "log_odds_chosen": 0.6356069445610046, "log_odds_ratio": -0.4359210133552551, "logits/chosen": -0.1717139482498169, "logits/rejected": -1.565706729888916, "logps/chosen": -1.5656462907791138, "logps/rejected": -2.093924045562744, "loss": 1.6578, "nll_loss": 1.6142207384109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.1565646231174469, "rewards/margins": 0.05282779037952423, "rewards/rejected": -0.20939242839813232, "step": 388 }, { "epoch": 0.6130811662726556, "grad_norm": 0.18727731704711914, "learning_rate": 3.5070725784603905e-06, "log_odds_chosen": 0.537490963935852, "log_odds_ratio": -0.4669988751411438, "logits/chosen": -0.24123258888721466, "logits/rejected": -1.2403115034103394, "logps/chosen": -1.439449429512024, "logps/rejected": -1.8756340742111206, "loss": 1.5259, "nll_loss": 1.4791667461395264, "rewards/accuracies": 1.0, "rewards/chosen": -0.14394494891166687, "rewards/margins": 0.04361846297979355, "rewards/rejected": -0.18756340444087982, "step": 389 }, { "epoch": 0.6146572104018913, "grad_norm": 0.22596481442451477, "learning_rate": 3.503453062154602e-06, "log_odds_chosen": 0.4628780484199524, "log_odds_ratio": -0.49970224499702454, "logits/chosen": -0.1650674045085907, "logits/rejected": -1.2386726140975952, "logps/chosen": -1.602417230606079, "logps/rejected": -1.9950282573699951, "loss": 1.6994, "nll_loss": 1.6494615077972412, "rewards/accuracies": 0.875, "rewards/chosen": -0.1602417379617691, "rewards/margins": 0.0392610989511013, "rewards/rejected": -0.1995028257369995, "step": 390 }, { "epoch": 0.6162332545311269, "grad_norm": 0.19225400686264038, "learning_rate": 3.499822187979032e-06, "log_odds_chosen": 0.45917797088623047, "log_odds_ratio": -0.4991348385810852, "logits/chosen": -0.09038020670413971, "logits/rejected": -1.4409539699554443, "logps/chosen": -1.5594383478164673, "logps/rejected": -1.9403319358825684, "loss": 1.6617, "nll_loss": 1.6117753982543945, "rewards/accuracies": 0.875, "rewards/chosen": -0.1559438407421112, "rewards/margins": 0.03808935359120369, "rewards/rejected": -0.1940331906080246, "step": 391 }, { "epoch": 0.6178092986603625, "grad_norm": 0.1856825351715088, "learning_rate": 3.496179983363202e-06, "log_odds_chosen": 0.41265982389450073, "log_odds_ratio": -0.5119627118110657, "logits/chosen": -0.11190656572580338, "logits/rejected": -1.3574274778366089, "logps/chosen": -1.5668977499008179, "logps/rejected": -1.9084656238555908, "loss": 1.6514, "nll_loss": 1.6002510786056519, "rewards/accuracies": 1.0, "rewards/chosen": -0.15668979287147522, "rewards/margins": 0.034156784415245056, "rewards/rejected": -0.19084656238555908, "step": 392 }, { "epoch": 0.6193853427895981, "grad_norm": 0.20805980265140533, "learning_rate": 3.4925264758222268e-06, "log_odds_chosen": 0.6294342279434204, "log_odds_ratio": -0.43072307109832764, "logits/chosen": -0.16953016817569733, "logits/rejected": -1.2341816425323486, "logps/chosen": -1.5001755952835083, "logps/rejected": -2.0215494632720947, "loss": 1.5961, "nll_loss": 1.5530593395233154, "rewards/accuracies": 1.0, "rewards/chosen": -0.15001757442951202, "rewards/margins": 0.05213739350438118, "rewards/rejected": -0.2021549493074417, "step": 393 }, { "epoch": 0.6209613869188337, "grad_norm": 0.24518869817256927, "learning_rate": 3.488861692956611e-06, "log_odds_chosen": 0.5471794009208679, "log_odds_ratio": -0.4630282521247864, "logits/chosen": -0.19086423516273499, "logits/rejected": -1.339902639389038, "logps/chosen": -1.5402213335037231, "logps/rejected": -1.9952548742294312, "loss": 1.6296, "nll_loss": 1.5832523107528687, "rewards/accuracies": 1.0, "rewards/chosen": -0.15402214229106903, "rewards/margins": 0.04550333693623543, "rewards/rejected": -0.19952546060085297, "step": 394 }, { "epoch": 0.6225374310480694, "grad_norm": 0.2510071098804474, "learning_rate": 3.4851856624520394e-06, "log_odds_chosen": 0.5228825807571411, "log_odds_ratio": -0.46843814849853516, "logits/chosen": -0.13727201521396637, "logits/rejected": -1.3861788511276245, "logps/chosen": -1.572906494140625, "logps/rejected": -2.005622386932373, "loss": 1.6575, "nll_loss": 1.6106876134872437, "rewards/accuracies": 1.0, "rewards/chosen": -0.15729066729545593, "rewards/margins": 0.04327157884836197, "rewards/rejected": -0.2005622535943985, "step": 395 }, { "epoch": 0.624113475177305, "grad_norm": 0.20526158809661865, "learning_rate": 3.4814984120791664e-06, "log_odds_chosen": 0.5153719782829285, "log_odds_ratio": -0.4714086055755615, "logits/chosen": -0.13660681247711182, "logits/rejected": -1.2547574043273926, "logps/chosen": -1.5410287380218506, "logps/rejected": -1.964991807937622, "loss": 1.636, "nll_loss": 1.5889039039611816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541028618812561, "rewards/margins": 0.04239630699157715, "rewards/rejected": -0.19649919867515564, "step": 396 }, { "epoch": 0.6256895193065406, "grad_norm": 0.18158891797065735, "learning_rate": 3.477799969693407e-06, "log_odds_chosen": 0.47999995946884155, "log_odds_ratio": -0.490679532289505, "logits/chosen": -0.09590557217597961, "logits/rejected": -1.3949567079544067, "logps/chosen": -1.541025996208191, "logps/rejected": -1.9312851428985596, "loss": 1.6301, "nll_loss": 1.5809931755065918, "rewards/accuracies": 1.0, "rewards/chosen": -0.15410259366035461, "rewards/margins": 0.039025940001010895, "rewards/rejected": -0.1931285411119461, "step": 397 }, { "epoch": 0.6272655634357762, "grad_norm": 0.2004203349351883, "learning_rate": 3.474090363234728e-06, "log_odds_chosen": 0.766968309879303, "log_odds_ratio": -0.39646148681640625, "logits/chosen": -0.2562759518623352, "logits/rejected": -1.4337137937545776, "logps/chosen": -1.4752384424209595, "logps/rejected": -2.112752914428711, "loss": 1.5586, "nll_loss": 1.5189671516418457, "rewards/accuracies": 1.0, "rewards/chosen": -0.14752383530139923, "rewards/margins": 0.0637514516711235, "rewards/rejected": -0.21127529442310333, "step": 398 }, { "epoch": 0.6288416075650118, "grad_norm": 0.19742122292518616, "learning_rate": 3.4703696207274325e-06, "log_odds_chosen": 0.5179776549339294, "log_odds_ratio": -0.474680095911026, "logits/chosen": -0.14589636027812958, "logits/rejected": -1.2598799467086792, "logps/chosen": -1.556343913078308, "logps/rejected": -1.9830336570739746, "loss": 1.6301, "nll_loss": 1.5826400518417358, "rewards/accuracies": 1.0, "rewards/chosen": -0.15563438832759857, "rewards/margins": 0.042668960988521576, "rewards/rejected": -0.19830335676670074, "step": 399 }, { "epoch": 0.6304176516942475, "grad_norm": 0.18192359805107117, "learning_rate": 3.4666377702799545e-06, "log_odds_chosen": 0.5299201011657715, "log_odds_ratio": -0.4676084518432617, "logits/chosen": -0.24987564980983734, "logits/rejected": -1.5932269096374512, "logps/chosen": -1.46696138381958, "logps/rejected": -1.8967633247375488, "loss": 1.568, "nll_loss": 1.521193265914917, "rewards/accuracies": 1.0, "rewards/chosen": -0.14669615030288696, "rewards/margins": 0.042980194091796875, "rewards/rejected": -0.18967632949352264, "step": 400 }, { "epoch": 0.6319936958234831, "grad_norm": 0.19527654349803925, "learning_rate": 3.4628948400846417e-06, "log_odds_chosen": 0.6314361691474915, "log_odds_ratio": -0.4365447759628296, "logits/chosen": -0.1739773005247116, "logits/rejected": -1.4769560098648071, "logps/chosen": -1.5479360818862915, "logps/rejected": -2.0758628845214844, "loss": 1.637, "nll_loss": 1.5933518409729004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1547936052083969, "rewards/margins": 0.05279267579317093, "rewards/rejected": -0.20758628845214844, "step": 401 }, { "epoch": 0.6335697399527187, "grad_norm": 0.2035856693983078, "learning_rate": 3.4591408584175426e-06, "log_odds_chosen": 0.5278856158256531, "log_odds_ratio": -0.4733438193798065, "logits/chosen": -0.18014143407344818, "logits/rejected": -1.3621004819869995, "logps/chosen": -1.5849114656448364, "logps/rejected": -2.027892589569092, "loss": 1.6766, "nll_loss": 1.6292688846588135, "rewards/accuracies": 1.0, "rewards/chosen": -0.1584911346435547, "rewards/margins": 0.044298142194747925, "rewards/rejected": -0.2027892768383026, "step": 402 }, { "epoch": 0.6351457840819543, "grad_norm": 0.19495651125907898, "learning_rate": 3.4553758536381974e-06, "log_odds_chosen": 0.5365288853645325, "log_odds_ratio": -0.4719133973121643, "logits/chosen": -0.1622263491153717, "logits/rejected": -1.3355566263198853, "logps/chosen": -1.5399608612060547, "logps/rejected": -1.984842300415039, "loss": 1.6272, "nll_loss": 1.580039381980896, "rewards/accuracies": 1.0, "rewards/chosen": -0.153996080160141, "rewards/margins": 0.04448813945055008, "rewards/rejected": -0.19848422706127167, "step": 403 }, { "epoch": 0.6367218282111899, "grad_norm": 0.20605534315109253, "learning_rate": 3.451599854189418e-06, "log_odds_chosen": 0.5477701425552368, "log_odds_ratio": -0.46024447679519653, "logits/chosen": -0.13585253059864044, "logits/rejected": -1.0337761640548706, "logps/chosen": -1.54197359085083, "logps/rejected": -1.995017647743225, "loss": 1.6418, "nll_loss": 1.5958125591278076, "rewards/accuracies": 1.0, "rewards/chosen": -0.15419737994670868, "rewards/margins": 0.045304395258426666, "rewards/rejected": -0.19950176775455475, "step": 404 }, { "epoch": 0.6382978723404256, "grad_norm": 0.19387783110141754, "learning_rate": 3.4478128885970765e-06, "log_odds_chosen": 0.6080644130706787, "log_odds_ratio": -0.4360560178756714, "logits/chosen": -0.179177924990654, "logits/rejected": -1.3908740282058716, "logps/chosen": -1.604873776435852, "logps/rejected": -2.1161084175109863, "loss": 1.6909, "nll_loss": 1.6472656726837158, "rewards/accuracies": 1.0, "rewards/chosen": -0.16048739850521088, "rewards/margins": 0.051123470067977905, "rewards/rejected": -0.21161086857318878, "step": 405 }, { "epoch": 0.6398739164696612, "grad_norm": 0.190069779753685, "learning_rate": 3.44401498546989e-06, "log_odds_chosen": 0.42473104596138, "log_odds_ratio": -0.5118191242218018, "logits/chosen": -0.1188054233789444, "logits/rejected": -1.49931001663208, "logps/chosen": -1.5570849180221558, "logps/rejected": -1.9073197841644287, "loss": 1.6278, "nll_loss": 1.5765697956085205, "rewards/accuracies": 1.0, "rewards/chosen": -0.15570849180221558, "rewards/margins": 0.035023488104343414, "rewards/rejected": -0.1907319873571396, "step": 406 }, { "epoch": 0.6414499605988968, "grad_norm": 0.19107168912887573, "learning_rate": 3.4402061734992005e-06, "log_odds_chosen": 0.5350978374481201, "log_odds_ratio": -0.46566373109817505, "logits/chosen": -0.15249407291412354, "logits/rejected": -1.402602195739746, "logps/chosen": -1.5002124309539795, "logps/rejected": -1.9390612840652466, "loss": 1.5941, "nll_loss": 1.5475372076034546, "rewards/accuracies": 1.0, "rewards/chosen": -0.1500212401151657, "rewards/margins": 0.043884895741939545, "rewards/rejected": -0.19390614330768585, "step": 407 }, { "epoch": 0.6430260047281324, "grad_norm": 0.2040640264749527, "learning_rate": 3.4363864814587656e-06, "log_odds_chosen": 0.49293413758277893, "log_odds_ratio": -0.48022550344467163, "logits/chosen": -0.2637179493904114, "logits/rejected": -0.9641510248184204, "logps/chosen": -1.475205421447754, "logps/rejected": -1.8777835369110107, "loss": 1.5716, "nll_loss": 1.5235683917999268, "rewards/accuracies": 1.0, "rewards/chosen": -0.1475205421447754, "rewards/margins": 0.04025781527161598, "rewards/rejected": -0.18777838349342346, "step": 408 }, { "epoch": 0.644602048857368, "grad_norm": 0.20765496790409088, "learning_rate": 3.4325559382045343e-06, "log_odds_chosen": 0.4098273515701294, "log_odds_ratio": -0.5131589770317078, "logits/chosen": -0.23338492214679718, "logits/rejected": -1.4860385656356812, "logps/chosen": -1.4819140434265137, "logps/rejected": -1.8101638555526733, "loss": 1.5729, "nll_loss": 1.5215588808059692, "rewards/accuracies": 1.0, "rewards/chosen": -0.1481914222240448, "rewards/margins": 0.03282497450709343, "rewards/rejected": -0.18101638555526733, "step": 409 }, { "epoch": 0.6461780929866037, "grad_norm": 0.19961215555667877, "learning_rate": 3.4287145726744295e-06, "log_odds_chosen": 0.5392709374427795, "log_odds_ratio": -0.4628680646419525, "logits/chosen": -0.26481306552886963, "logits/rejected": -1.3813103437423706, "logps/chosen": -1.4664371013641357, "logps/rejected": -1.904196858406067, "loss": 1.5695, "nll_loss": 1.523188829421997, "rewards/accuracies": 1.0, "rewards/chosen": -0.146643728017807, "rewards/margins": 0.04377596825361252, "rewards/rejected": -0.19041968882083893, "step": 410 }, { "epoch": 0.6477541371158393, "grad_norm": 0.20400294661521912, "learning_rate": 3.4248624138881335e-06, "log_odds_chosen": 0.42437130212783813, "log_odds_ratio": -0.5254440307617188, "logits/chosen": -0.1493159532546997, "logits/rejected": -1.1513835191726685, "logps/chosen": -1.6029716730117798, "logps/rejected": -1.9682285785675049, "loss": 1.6887, "nll_loss": 1.6361618041992188, "rewards/accuracies": 0.875, "rewards/chosen": -0.16029717028141022, "rewards/margins": 0.03652569651603699, "rewards/rejected": -0.1968228816986084, "step": 411 }, { "epoch": 0.6493301812450749, "grad_norm": 0.20339736342430115, "learning_rate": 3.4209994909468672e-06, "log_odds_chosen": 0.6561870574951172, "log_odds_ratio": -0.42825421690940857, "logits/chosen": -0.28305602073669434, "logits/rejected": -1.04371976852417, "logps/chosen": -1.5072612762451172, "logps/rejected": -2.0518369674682617, "loss": 1.5933, "nll_loss": 1.5505071878433228, "rewards/accuracies": 1.0, "rewards/chosen": -0.15072615444660187, "rewards/margins": 0.054457567632198334, "rewards/rejected": -0.2051836997270584, "step": 412 }, { "epoch": 0.6509062253743105, "grad_norm": 0.19428101181983948, "learning_rate": 3.4171258330331667e-06, "log_odds_chosen": 0.43498852849006653, "log_odds_ratio": -0.5014755129814148, "logits/chosen": -0.12146922200918198, "logits/rejected": -1.1085965633392334, "logps/chosen": -1.593203067779541, "logps/rejected": -1.9516777992248535, "loss": 1.6774, "nll_loss": 1.627271056175232, "rewards/accuracies": 1.0, "rewards/chosen": -0.15932030975818634, "rewards/margins": 0.03584747388958931, "rewards/rejected": -0.19516779482364655, "step": 413 }, { "epoch": 0.6524822695035462, "grad_norm": 0.19590893387794495, "learning_rate": 3.4132414694106684e-06, "log_odds_chosen": 0.6712747812271118, "log_odds_ratio": -0.4207912087440491, "logits/chosen": -0.16028505563735962, "logits/rejected": -1.4717087745666504, "logps/chosen": -1.498020052909851, "logps/rejected": -2.0545201301574707, "loss": 1.5811, "nll_loss": 1.5389834642410278, "rewards/accuracies": 1.0, "rewards/chosen": -0.14980201423168182, "rewards/margins": 0.055649999529123306, "rewards/rejected": -0.20545199513435364, "step": 414 }, { "epoch": 0.6540583136327817, "grad_norm": 0.20174475014209747, "learning_rate": 3.409346429423884e-06, "log_odds_chosen": 0.4537242650985718, "log_odds_ratio": -0.4941532015800476, "logits/chosen": -0.010996952652931213, "logits/rejected": -1.3585155010223389, "logps/chosen": -1.5672545433044434, "logps/rejected": -1.9409823417663574, "loss": 1.6455, "nll_loss": 1.5961326360702515, "rewards/accuracies": 1.0, "rewards/chosen": -0.1567254513502121, "rewards/margins": 0.03737279772758484, "rewards/rejected": -0.19409826397895813, "step": 415 }, { "epoch": 0.6556343577620173, "grad_norm": 0.19159899652004242, "learning_rate": 3.40544074249798e-06, "log_odds_chosen": 0.6167906522750854, "log_odds_ratio": -0.4354286193847656, "logits/chosen": -0.18299099802970886, "logits/rejected": -1.46063232421875, "logps/chosen": -1.5565929412841797, "logps/rejected": -2.0718507766723633, "loss": 1.6407, "nll_loss": 1.5971307754516602, "rewards/accuracies": 1.0, "rewards/chosen": -0.15565930306911469, "rewards/margins": 0.0515257902443409, "rewards/rejected": -0.20718510448932648, "step": 416 }, { "epoch": 0.6572104018912529, "grad_norm": 0.20851117372512817, "learning_rate": 3.401524438138556e-06, "log_odds_chosen": 0.45928677916526794, "log_odds_ratio": -0.49514341354370117, "logits/chosen": -0.13402409851551056, "logits/rejected": -1.1145453453063965, "logps/chosen": -1.5616354942321777, "logps/rejected": -1.9419609308242798, "loss": 1.6266, "nll_loss": 1.5770922899246216, "rewards/accuracies": 1.0, "rewards/chosen": -0.1561635434627533, "rewards/margins": 0.03803255409002304, "rewards/rejected": -0.19419609010219574, "step": 417 }, { "epoch": 0.6587864460204885, "grad_norm": 0.19792215526103973, "learning_rate": 3.39759754593142e-06, "log_odds_chosen": 0.4961496889591217, "log_odds_ratio": -0.4824981689453125, "logits/chosen": -0.1806434839963913, "logits/rejected": -1.2893097400665283, "logps/chosen": -1.6215399503707886, "logps/rejected": -2.039522409439087, "loss": 1.6951, "nll_loss": 1.646848440170288, "rewards/accuracies": 1.0, "rewards/chosen": -0.16215398907661438, "rewards/margins": 0.04179824888706207, "rewards/rejected": -0.20395225286483765, "step": 418 }, { "epoch": 0.6603624901497241, "grad_norm": 0.2001093477010727, "learning_rate": 3.3936600955423683e-06, "log_odds_chosen": 0.7080036997795105, "log_odds_ratio": -0.4158882200717926, "logits/chosen": -0.10217966884374619, "logits/rejected": -1.1086851358413696, "logps/chosen": -1.5155251026153564, "logps/rejected": -2.1040117740631104, "loss": 1.603, "nll_loss": 1.5614415407180786, "rewards/accuracies": 1.0, "rewards/chosen": -0.15155251324176788, "rewards/margins": 0.05884869024157524, "rewards/rejected": -0.21040120720863342, "step": 419 }, { "epoch": 0.6619385342789598, "grad_norm": 0.1921870857477188, "learning_rate": 3.3897121167169573e-06, "log_odds_chosen": 0.39313969016075134, "log_odds_ratio": -0.5258656144142151, "logits/chosen": -0.22022226452827454, "logits/rejected": -1.3231024742126465, "logps/chosen": -1.4893752336502075, "logps/rejected": -1.808854341506958, "loss": 1.5882, "nll_loss": 1.535656452178955, "rewards/accuracies": 0.875, "rewards/chosen": -0.14893752336502075, "rewards/margins": 0.03194789960980415, "rewards/rejected": -0.1808854341506958, "step": 420 }, { "epoch": 0.6635145784081954, "grad_norm": 0.20021358132362366, "learning_rate": 3.38575363928028e-06, "log_odds_chosen": 0.6059412360191345, "log_odds_ratio": -0.4419128894805908, "logits/chosen": -0.14556629955768585, "logits/rejected": -1.1496225595474243, "logps/chosen": -1.4978344440460205, "logps/rejected": -1.997003197669983, "loss": 1.5713, "nll_loss": 1.52711820602417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497834473848343, "rewards/margins": 0.04991687089204788, "rewards/rejected": -0.19970029592514038, "step": 421 }, { "epoch": 0.665090622537431, "grad_norm": 0.19506679475307465, "learning_rate": 3.3817846931367452e-06, "log_odds_chosen": 0.415115624666214, "log_odds_ratio": -0.5117157101631165, "logits/chosen": -0.22510936856269836, "logits/rejected": -1.2167201042175293, "logps/chosen": -1.4807124137878418, "logps/rejected": -1.8138527870178223, "loss": 1.5701, "nll_loss": 1.5189181566238403, "rewards/accuracies": 1.0, "rewards/chosen": -0.1480712592601776, "rewards/margins": 0.033314019441604614, "rewards/rejected": -0.18138529360294342, "step": 422 }, { "epoch": 0.6666666666666666, "grad_norm": 0.19481457769870758, "learning_rate": 3.377805308269844e-06, "log_odds_chosen": 0.6872407793998718, "log_odds_ratio": -0.42233163118362427, "logits/chosen": -0.11342249810695648, "logits/rejected": -1.5072932243347168, "logps/chosen": -1.5372941493988037, "logps/rejected": -2.1134226322174072, "loss": 1.6132, "nll_loss": 1.5709176063537598, "rewards/accuracies": 1.0, "rewards/chosen": -0.15372943878173828, "rewards/margins": 0.05761285126209259, "rewards/rejected": -0.21134227514266968, "step": 423 }, { "epoch": 0.6682427107959023, "grad_norm": 0.18579819798469543, "learning_rate": 3.3738155147419275e-06, "log_odds_chosen": 0.6220219135284424, "log_odds_ratio": -0.44024914503097534, "logits/chosen": -0.235686257481575, "logits/rejected": -1.2404701709747314, "logps/chosen": -1.5340425968170166, "logps/rejected": -2.0529518127441406, "loss": 1.5954, "nll_loss": 1.5513544082641602, "rewards/accuracies": 1.0, "rewards/chosen": -0.15340426564216614, "rewards/margins": 0.05189090967178345, "rewards/rejected": -0.2052951604127884, "step": 424 }, { "epoch": 0.6698187549251379, "grad_norm": 0.19320693612098694, "learning_rate": 3.3698153426939824e-06, "log_odds_chosen": 0.7084161639213562, "log_odds_ratio": -0.4107830226421356, "logits/chosen": -0.2280699759721756, "logits/rejected": -1.0708644390106201, "logps/chosen": -1.463280439376831, "logps/rejected": -2.0446677207946777, "loss": 1.5378, "nll_loss": 1.4967448711395264, "rewards/accuracies": 1.0, "rewards/chosen": -0.14632803201675415, "rewards/margins": 0.05813872069120407, "rewards/rejected": -0.2044667750597, "step": 425 }, { "epoch": 0.6713947990543735, "grad_norm": 0.20736993849277496, "learning_rate": 3.3658048223453954e-06, "log_odds_chosen": 0.662844717502594, "log_odds_ratio": -0.4233693480491638, "logits/chosen": -0.1891903281211853, "logits/rejected": -1.40634024143219, "logps/chosen": -1.6040951013565063, "logps/rejected": -2.1638996601104736, "loss": 1.6514, "nll_loss": 1.6090670824050903, "rewards/accuracies": 1.0, "rewards/chosen": -0.16040951013565063, "rewards/margins": 0.05598045140504837, "rewards/rejected": -0.2163899689912796, "step": 426 }, { "epoch": 0.6729708431836091, "grad_norm": 0.1907954066991806, "learning_rate": 3.3617839839937337e-06, "log_odds_chosen": 0.6645872592926025, "log_odds_ratio": -0.4254325330257416, "logits/chosen": -0.11419974267482758, "logits/rejected": -1.4858274459838867, "logps/chosen": -1.5667697191238403, "logps/rejected": -2.1278765201568604, "loss": 1.6334, "nll_loss": 1.5908530950546265, "rewards/accuracies": 1.0, "rewards/chosen": -0.15667694807052612, "rewards/margins": 0.056110695004463196, "rewards/rejected": -0.2127876579761505, "step": 427 }, { "epoch": 0.6745468873128447, "grad_norm": 0.20678722858428955, "learning_rate": 3.3577528580145107e-06, "log_odds_chosen": 0.3481862545013428, "log_odds_ratio": -0.5371396541595459, "logits/chosen": -0.14534202218055725, "logits/rejected": -1.29691481590271, "logps/chosen": -1.585597276687622, "logps/rejected": -1.8742812871932983, "loss": 1.6676, "nll_loss": 1.6138746738433838, "rewards/accuracies": 1.0, "rewards/chosen": -0.15855972468852997, "rewards/margins": 0.028868405148386955, "rewards/rejected": -0.18742814660072327, "step": 428 }, { "epoch": 0.6761229314420804, "grad_norm": 0.19353458285331726, "learning_rate": 3.353711474860956e-06, "log_odds_chosen": 0.5981341600418091, "log_odds_ratio": -0.45493775606155396, "logits/chosen": -0.1791481226682663, "logits/rejected": -1.231849193572998, "logps/chosen": -1.5348036289215088, "logps/rejected": -2.0394744873046875, "loss": 1.597, "nll_loss": 1.5514580011367798, "rewards/accuracies": 1.0, "rewards/chosen": -0.15348035097122192, "rewards/margins": 0.05046708881855011, "rewards/rejected": -0.20394745469093323, "step": 429 }, { "epoch": 0.677698975571316, "grad_norm": 0.18463017046451569, "learning_rate": 3.3496598650637916e-06, "log_odds_chosen": 0.569009006023407, "log_odds_ratio": -0.45516982674598694, "logits/chosen": -0.20293018221855164, "logits/rejected": -1.1590790748596191, "logps/chosen": -1.435080647468567, "logps/rejected": -1.8941256999969482, "loss": 1.5186, "nll_loss": 1.4731093645095825, "rewards/accuracies": 1.0, "rewards/chosen": -0.14350807666778564, "rewards/margins": 0.0459044985473156, "rewards/rejected": -0.18941256403923035, "step": 430 }, { "epoch": 0.6792750197005516, "grad_norm": 0.18710088729858398, "learning_rate": 3.3455980592309923e-06, "log_odds_chosen": 0.619581401348114, "log_odds_ratio": -0.4386385977268219, "logits/chosen": -0.23650380969047546, "logits/rejected": -1.2405811548233032, "logps/chosen": -1.4119963645935059, "logps/rejected": -1.9160082340240479, "loss": 1.5042, "nll_loss": 1.460310459136963, "rewards/accuracies": 1.0, "rewards/chosen": -0.14119963347911835, "rewards/margins": 0.05040118098258972, "rewards/rejected": -0.19160079956054688, "step": 431 }, { "epoch": 0.6808510638297872, "grad_norm": 0.1992560178041458, "learning_rate": 3.341526088047562e-06, "log_odds_chosen": 0.5567487478256226, "log_odds_ratio": -0.4619826078414917, "logits/chosen": -0.17194384336471558, "logits/rejected": -1.293556571006775, "logps/chosen": -1.6019841432571411, "logps/rejected": -2.070882558822632, "loss": 1.662, "nll_loss": 1.6158294677734375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601984202861786, "rewards/margins": 0.046889837831258774, "rewards/rejected": -0.20708826184272766, "step": 432 }, { "epoch": 0.6824271079590228, "grad_norm": 0.226112961769104, "learning_rate": 3.3374439822752972e-06, "log_odds_chosen": 0.44421204924583435, "log_odds_ratio": -0.49906378984451294, "logits/chosen": -0.12680430710315704, "logits/rejected": -1.1431465148925781, "logps/chosen": -1.5594537258148193, "logps/rejected": -1.9281821250915527, "loss": 1.649, "nll_loss": 1.5991100072860718, "rewards/accuracies": 1.0, "rewards/chosen": -0.15594536066055298, "rewards/margins": 0.036872848868370056, "rewards/rejected": -0.19281822443008423, "step": 433 }, { "epoch": 0.6840031520882585, "grad_norm": 0.207797572016716, "learning_rate": 3.333351772752559e-06, "log_odds_chosen": 0.5869032144546509, "log_odds_ratio": -0.4558618366718292, "logits/chosen": -0.23912350833415985, "logits/rejected": -1.287145972251892, "logps/chosen": -1.6291857957839966, "logps/rejected": -2.120633602142334, "loss": 1.6932, "nll_loss": 1.6475987434387207, "rewards/accuracies": 1.0, "rewards/chosen": -0.1629185825586319, "rewards/margins": 0.049144770950078964, "rewards/rejected": -0.21206337213516235, "step": 434 }, { "epoch": 0.6855791962174941, "grad_norm": 0.20063838362693787, "learning_rate": 3.3292494903940338e-06, "log_odds_chosen": 0.7061201333999634, "log_odds_ratio": -0.40912342071533203, "logits/chosen": -0.1544758826494217, "logits/rejected": -1.3016716241836548, "logps/chosen": -1.5344663858413696, "logps/rejected": -2.1211295127868652, "loss": 1.6071, "nll_loss": 1.5661424398422241, "rewards/accuracies": 1.0, "rewards/chosen": -0.15344664454460144, "rewards/margins": 0.05866629630327225, "rewards/rejected": -0.2121129333972931, "step": 435 }, { "epoch": 0.6871552403467297, "grad_norm": 0.1838790476322174, "learning_rate": 3.3251371661905063e-06, "log_odds_chosen": 0.6065340638160706, "log_odds_ratio": -0.44656994938850403, "logits/chosen": -0.1865832805633545, "logits/rejected": -1.140378713607788, "logps/chosen": -1.3614064455032349, "logps/rejected": -1.8547477722167969, "loss": 1.4488, "nll_loss": 1.4041305780410767, "rewards/accuracies": 1.0, "rewards/chosen": -0.1361406445503235, "rewards/margins": 0.04933411255478859, "rewards/rejected": -0.18547475337982178, "step": 436 }, { "epoch": 0.6887312844759653, "grad_norm": 0.21491163969039917, "learning_rate": 3.321014831208622e-06, "log_odds_chosen": 0.5981911420822144, "log_odds_ratio": -0.44269564747810364, "logits/chosen": -0.19441677629947662, "logits/rejected": -1.1200268268585205, "logps/chosen": -1.4213942289352417, "logps/rejected": -1.9019947052001953, "loss": 1.5169, "nll_loss": 1.4725940227508545, "rewards/accuracies": 1.0, "rewards/chosen": -0.14213941991329193, "rewards/margins": 0.04806005209684372, "rewards/rejected": -0.19019947946071625, "step": 437 }, { "epoch": 0.6903073286052009, "grad_norm": 0.2163185477256775, "learning_rate": 3.316882516590652e-06, "log_odds_chosen": 0.6079857349395752, "log_odds_ratio": -0.43692946434020996, "logits/chosen": -0.18331696093082428, "logits/rejected": -1.3628792762756348, "logps/chosen": -1.4688149690628052, "logps/rejected": -1.966677188873291, "loss": 1.5508, "nll_loss": 1.5071358680725098, "rewards/accuracies": 1.0, "rewards/chosen": -0.14688150584697723, "rewards/margins": 0.04978622496128082, "rewards/rejected": -0.19666773080825806, "step": 438 }, { "epoch": 0.6918833727344366, "grad_norm": 0.21325580775737762, "learning_rate": 3.31274025355426e-06, "log_odds_chosen": 0.5819729566574097, "log_odds_ratio": -0.4491526484489441, "logits/chosen": -0.22322604060173035, "logits/rejected": -1.189731478691101, "logps/chosen": -1.5592323541641235, "logps/rejected": -2.043937921524048, "loss": 1.6116, "nll_loss": 1.5666409730911255, "rewards/accuracies": 1.0, "rewards/chosen": -0.15592323243618011, "rewards/margins": 0.04847054183483124, "rewards/rejected": -0.20439377427101135, "step": 439 }, { "epoch": 0.6934594168636722, "grad_norm": 0.20833250880241394, "learning_rate": 3.308588073392265e-06, "log_odds_chosen": 0.5521525144577026, "log_odds_ratio": -0.46361684799194336, "logits/chosen": -0.2142220437526703, "logits/rejected": -1.1978386640548706, "logps/chosen": -1.5905332565307617, "logps/rejected": -2.054898738861084, "loss": 1.6463, "nll_loss": 1.599968433380127, "rewards/accuracies": 1.0, "rewards/chosen": -0.15905332565307617, "rewards/margins": 0.046436551958322525, "rewards/rejected": -0.2054898738861084, "step": 440 }, { "epoch": 0.6950354609929078, "grad_norm": 0.19477160274982452, "learning_rate": 3.3044260074724035e-06, "log_odds_chosen": 0.6352304220199585, "log_odds_ratio": -0.43725699186325073, "logits/chosen": -0.20722348988056183, "logits/rejected": -1.4436430931091309, "logps/chosen": -1.5010461807250977, "logps/rejected": -2.0235297679901123, "loss": 1.5782, "nll_loss": 1.5344798564910889, "rewards/accuracies": 1.0, "rewards/chosen": -0.15010464191436768, "rewards/margins": 0.05224834755063057, "rewards/rejected": -0.20235297083854675, "step": 441 }, { "epoch": 0.6966115051221434, "grad_norm": 0.18534111976623535, "learning_rate": 3.300254087237097e-06, "log_odds_chosen": 0.5580976605415344, "log_odds_ratio": -0.457084059715271, "logits/chosen": -0.16514423489570618, "logits/rejected": -1.3200604915618896, "logps/chosen": -1.3929381370544434, "logps/rejected": -1.8415474891662598, "loss": 1.4993, "nll_loss": 1.4535483121871948, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392938196659088, "rewards/margins": 0.044860921800136566, "rewards/rejected": -0.18415474891662598, "step": 442 }, { "epoch": 0.698187549251379, "grad_norm": 0.26748111844062805, "learning_rate": 3.2960723442032105e-06, "log_odds_chosen": 0.7100386619567871, "log_odds_ratio": -0.4068644642829895, "logits/chosen": -0.23274515569210052, "logits/rejected": -1.637979507446289, "logps/chosen": -1.590896725654602, "logps/rejected": -2.1930923461914062, "loss": 1.6465, "nll_loss": 1.6058528423309326, "rewards/accuracies": 1.0, "rewards/chosen": -0.15908968448638916, "rewards/margins": 0.06021953374147415, "rewards/rejected": -0.21930919587612152, "step": 443 }, { "epoch": 0.6997635933806147, "grad_norm": 0.1990683227777481, "learning_rate": 3.291880809961814e-06, "log_odds_chosen": 0.6279516220092773, "log_odds_ratio": -0.4397643506526947, "logits/chosen": -0.15238967537879944, "logits/rejected": -1.2192305326461792, "logps/chosen": -1.5531084537506104, "logps/rejected": -2.074535369873047, "loss": 1.6241, "nll_loss": 1.5800902843475342, "rewards/accuracies": 1.0, "rewards/chosen": -0.15531083941459656, "rewards/margins": 0.0521426796913147, "rewards/rejected": -0.20745351910591125, "step": 444 }, { "epoch": 0.7013396375098503, "grad_norm": 0.19820798933506012, "learning_rate": 3.2876795161779473e-06, "log_odds_chosen": 0.7250336408615112, "log_odds_ratio": -0.3975331783294678, "logits/chosen": -0.18375059962272644, "logits/rejected": -1.543222188949585, "logps/chosen": -1.5645023584365845, "logps/rejected": -2.1766436100006104, "loss": 1.6162, "nll_loss": 1.5764946937561035, "rewards/accuracies": 1.0, "rewards/chosen": -0.15645024180412292, "rewards/margins": 0.0612141489982605, "rewards/rejected": -0.21766439080238342, "step": 445 }, { "epoch": 0.7029156816390859, "grad_norm": 0.19689838588237762, "learning_rate": 3.2834684945903776e-06, "log_odds_chosen": 0.5597304105758667, "log_odds_ratio": -0.45593225955963135, "logits/chosen": -0.2338670790195465, "logits/rejected": -1.2486504316329956, "logps/chosen": -1.4915810823440552, "logps/rejected": -1.9466543197631836, "loss": 1.572, "nll_loss": 1.5264508724212646, "rewards/accuracies": 1.0, "rewards/chosen": -0.14915812015533447, "rewards/margins": 0.045507319271564484, "rewards/rejected": -0.19466543197631836, "step": 446 }, { "epoch": 0.7044917257683215, "grad_norm": 0.2056231051683426, "learning_rate": 3.2792477770113624e-06, "log_odds_chosen": 0.5060315728187561, "log_odds_ratio": -0.47748908400535583, "logits/chosen": -0.32436949014663696, "logits/rejected": -1.376452922821045, "logps/chosen": -1.5770741701126099, "logps/rejected": -1.9976955652236938, "loss": 1.6348, "nll_loss": 1.5870327949523926, "rewards/accuracies": 1.0, "rewards/chosen": -0.15770742297172546, "rewards/margins": 0.04206214100122452, "rewards/rejected": -0.19976955652236938, "step": 447 }, { "epoch": 0.7060677698975572, "grad_norm": 0.2036747932434082, "learning_rate": 3.275017395326407e-06, "log_odds_chosen": 0.4934311509132385, "log_odds_ratio": -0.48520928621292114, "logits/chosen": -0.13775332272052765, "logits/rejected": -1.1801694631576538, "logps/chosen": -1.4680533409118652, "logps/rejected": -1.8637882471084595, "loss": 1.5528, "nll_loss": 1.5042613744735718, "rewards/accuracies": 0.875, "rewards/chosen": -0.14680534601211548, "rewards/margins": 0.039573490619659424, "rewards/rejected": -0.1863788366317749, "step": 448 }, { "epoch": 0.7076438140267928, "grad_norm": 0.1903351992368698, "learning_rate": 3.2707773814940244e-06, "log_odds_chosen": 0.6554431915283203, "log_odds_ratio": -0.42446404695510864, "logits/chosen": -0.15731389820575714, "logits/rejected": -1.3429402112960815, "logps/chosen": -1.4967751502990723, "logps/rejected": -2.0305981636047363, "loss": 1.5792, "nll_loss": 1.5367555618286133, "rewards/accuracies": 1.0, "rewards/chosen": -0.14967751502990723, "rewards/margins": 0.05338229984045029, "rewards/rejected": -0.20305980741977692, "step": 449 }, { "epoch": 0.7092198581560284, "grad_norm": 0.18632066249847412, "learning_rate": 3.2665277675454935e-06, "log_odds_chosen": 0.6589217185974121, "log_odds_ratio": -0.43469709157943726, "logits/chosen": -0.22264309227466583, "logits/rejected": -1.58710515499115, "logps/chosen": -1.5806881189346313, "logps/rejected": -2.1421871185302734, "loss": 1.6344, "nll_loss": 1.590897798538208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15806882083415985, "rewards/margins": 0.05614989995956421, "rewards/rejected": -0.21421872079372406, "step": 450 }, { "epoch": 0.710795902285264, "grad_norm": 0.1898472160100937, "learning_rate": 3.262268585584619e-06, "log_odds_chosen": 0.6008990406990051, "log_odds_ratio": -0.440044105052948, "logits/chosen": -0.1463158279657364, "logits/rejected": -1.3233999013900757, "logps/chosen": -1.5258179903030396, "logps/rejected": -2.0252106189727783, "loss": 1.5941, "nll_loss": 1.5501309633255005, "rewards/accuracies": 1.0, "rewards/chosen": -0.15258179605007172, "rewards/margins": 0.04993927478790283, "rewards/rejected": -0.20252105593681335, "step": 451 }, { "epoch": 0.7123719464144996, "grad_norm": 0.20325587689876556, "learning_rate": 3.2579998677874853e-06, "log_odds_chosen": 0.691182017326355, "log_odds_ratio": -0.41538918018341064, "logits/chosen": -0.23037710785865784, "logits/rejected": -1.2760132551193237, "logps/chosen": -1.5629483461380005, "logps/rejected": -2.1457886695861816, "loss": 1.6173, "nll_loss": 1.5757336616516113, "rewards/accuracies": 1.0, "rewards/chosen": -0.1562948226928711, "rewards/margins": 0.05828403681516647, "rewards/rejected": -0.21457885205745697, "step": 452 }, { "epoch": 0.7139479905437353, "grad_norm": 0.18862774968147278, "learning_rate": 3.2537216464022155e-06, "log_odds_chosen": 0.6802084445953369, "log_odds_ratio": -0.41921448707580566, "logits/chosen": -0.20463165640830994, "logits/rejected": -1.2661035060882568, "logps/chosen": -1.5623295307159424, "logps/rejected": -2.129488945007324, "loss": 1.6273, "nll_loss": 1.585338830947876, "rewards/accuracies": 1.0, "rewards/chosen": -0.15623293817043304, "rewards/margins": 0.0567159429192543, "rewards/rejected": -0.21294888854026794, "step": 453 }, { "epoch": 0.7155240346729709, "grad_norm": 0.1871948093175888, "learning_rate": 3.2494339537487314e-06, "log_odds_chosen": 0.5408557653427124, "log_odds_ratio": -0.46073442697525024, "logits/chosen": -0.23007997870445251, "logits/rejected": -1.5315394401550293, "logps/chosen": -1.5641494989395142, "logps/rejected": -2.0117013454437256, "loss": 1.6397, "nll_loss": 1.5936379432678223, "rewards/accuracies": 1.0, "rewards/chosen": -0.1564149558544159, "rewards/margins": 0.04475518316030502, "rewards/rejected": -0.2011701464653015, "step": 454 }, { "epoch": 0.7171000788022065, "grad_norm": 0.18707512319087982, "learning_rate": 3.2451368222185006e-06, "log_odds_chosen": 0.454687237739563, "log_odds_ratio": -0.4977053105831146, "logits/chosen": -0.2190241664648056, "logits/rejected": -1.1657060384750366, "logps/chosen": -1.5094616413116455, "logps/rejected": -1.8778409957885742, "loss": 1.5921, "nll_loss": 1.5422812700271606, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094618499279022, "rewards/margins": 0.03683791682124138, "rewards/rejected": -0.1877840906381607, "step": 455 }, { "epoch": 0.7186761229314421, "grad_norm": 0.17670530080795288, "learning_rate": 3.2408302842743007e-06, "log_odds_chosen": 0.6599798798561096, "log_odds_ratio": -0.4245462715625763, "logits/chosen": -0.1710800975561142, "logits/rejected": -1.3067679405212402, "logps/chosen": -1.4607981443405151, "logps/rejected": -2.002277374267578, "loss": 1.5484, "nll_loss": 1.505940556526184, "rewards/accuracies": 1.0, "rewards/chosen": -0.14607983827590942, "rewards/margins": 0.05414789542555809, "rewards/rejected": -0.20022772252559662, "step": 456 }, { "epoch": 0.7202521670606777, "grad_norm": 0.18531948328018188, "learning_rate": 3.2365143724499684e-06, "log_odds_chosen": 0.590911865234375, "log_odds_ratio": -0.4425351023674011, "logits/chosen": -0.1856268346309662, "logits/rejected": -1.3595974445343018, "logps/chosen": -1.5068074464797974, "logps/rejected": -1.9935933351516724, "loss": 1.5948, "nll_loss": 1.5505702495574951, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506807506084442, "rewards/margins": 0.04867858439683914, "rewards/rejected": -0.19935932755470276, "step": 457 }, { "epoch": 0.7218282111899134, "grad_norm": 0.19168265163898468, "learning_rate": 3.2321891193501564e-06, "log_odds_chosen": 0.5883606672286987, "log_odds_ratio": -0.4528968632221222, "logits/chosen": -0.16482576727867126, "logits/rejected": -0.8930314779281616, "logps/chosen": -1.510907530784607, "logps/rejected": -1.9958248138427734, "loss": 1.5783, "nll_loss": 1.532994031906128, "rewards/accuracies": 1.0, "rewards/chosen": -0.15109075605869293, "rewards/margins": 0.04849172383546829, "rewards/rejected": -0.19958247244358063, "step": 458 }, { "epoch": 0.723404255319149, "grad_norm": 0.18463407456874847, "learning_rate": 3.2278545576500858e-06, "log_odds_chosen": 0.8210570812225342, "log_odds_ratio": -0.37329068779945374, "logits/chosen": -0.08877343684434891, "logits/rejected": -1.078304648399353, "logps/chosen": -1.3643009662628174, "logps/rejected": -2.0353879928588867, "loss": 1.4512, "nll_loss": 1.4138658046722412, "rewards/accuracies": 1.0, "rewards/chosen": -0.13643008470535278, "rewards/margins": 0.06710872799158096, "rewards/rejected": -0.20353882014751434, "step": 459 }, { "epoch": 0.7249802994483846, "grad_norm": 0.19575412571430206, "learning_rate": 3.223510720095299e-06, "log_odds_chosen": 0.785904049873352, "log_odds_ratio": -0.38319867849349976, "logits/chosen": -0.2679174542427063, "logits/rejected": -1.3926105499267578, "logps/chosen": -1.5412414073944092, "logps/rejected": -2.202934980392456, "loss": 1.5979, "nll_loss": 1.5595486164093018, "rewards/accuracies": 1.0, "rewards/chosen": -0.15412414073944092, "rewards/margins": 0.0661693587899208, "rewards/rejected": -0.22029350697994232, "step": 460 }, { "epoch": 0.7265563435776202, "grad_norm": 0.18656474351882935, "learning_rate": 3.2191576395014158e-06, "log_odds_chosen": 0.7210904359817505, "log_odds_ratio": -0.3992398679256439, "logits/chosen": -0.20938719809055328, "logits/rejected": -1.283248782157898, "logps/chosen": -1.4764115810394287, "logps/rejected": -2.072150707244873, "loss": 1.5461, "nll_loss": 1.5061570405960083, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476411670446396, "rewards/margins": 0.059573911130428314, "rewards/rejected": -0.2072150707244873, "step": 461 }, { "epoch": 0.7281323877068558, "grad_norm": 0.18474119901657104, "learning_rate": 3.2147953487538794e-06, "log_odds_chosen": 0.6938648223876953, "log_odds_ratio": -0.4105943739414215, "logits/chosen": -0.14119039475917816, "logits/rejected": -1.3704811334609985, "logps/chosen": -1.4421080350875854, "logps/rejected": -2.0102922916412354, "loss": 1.514, "nll_loss": 1.4729448556900024, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442108154296875, "rewards/margins": 0.05681842565536499, "rewards/rejected": -0.2010292410850525, "step": 462 }, { "epoch": 0.7297084318360915, "grad_norm": 0.19482626020908356, "learning_rate": 3.2104238808077133e-06, "log_odds_chosen": 0.5768538117408752, "log_odds_ratio": -0.45441049337387085, "logits/chosen": -0.169452965259552, "logits/rejected": -1.0683661699295044, "logps/chosen": -1.4814167022705078, "logps/rejected": -1.9551403522491455, "loss": 1.5563, "nll_loss": 1.5108129978179932, "rewards/accuracies": 1.0, "rewards/chosen": -0.14814168214797974, "rewards/margins": 0.04737236350774765, "rewards/rejected": -0.1955140382051468, "step": 463 }, { "epoch": 0.731284475965327, "grad_norm": 0.18169742822647095, "learning_rate": 3.2060432686872704e-06, "log_odds_chosen": 0.8345743417739868, "log_odds_ratio": -0.3699982464313507, "logits/chosen": -0.2313491553068161, "logits/rejected": -1.2104275226593018, "logps/chosen": -1.387764811515808, "logps/rejected": -2.067291736602783, "loss": 1.4733, "nll_loss": 1.436316728591919, "rewards/accuracies": 1.0, "rewards/chosen": -0.138776496052742, "rewards/margins": 0.06795267760753632, "rewards/rejected": -0.20672915875911713, "step": 464 }, { "epoch": 0.7328605200945626, "grad_norm": 0.18539521098136902, "learning_rate": 3.201653545485982e-06, "log_odds_chosen": 0.6590836048126221, "log_odds_ratio": -0.42436298727989197, "logits/chosen": -0.14550940692424774, "logits/rejected": -1.2948503494262695, "logps/chosen": -1.5323055982589722, "logps/rejected": -2.083099365234375, "loss": 1.5889, "nll_loss": 1.5465004444122314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15323056280612946, "rewards/margins": 0.0550793781876564, "rewards/rejected": -0.20830994844436646, "step": 465 }, { "epoch": 0.7344365642237982, "grad_norm": 0.2062307447195053, "learning_rate": 3.197254744366111e-06, "log_odds_chosen": 0.67624831199646, "log_odds_ratio": -0.41811689734458923, "logits/chosen": -0.1269284039735794, "logits/rejected": -1.2436720132827759, "logps/chosen": -1.442671775817871, "logps/rejected": -1.994502305984497, "loss": 1.519, "nll_loss": 1.4771640300750732, "rewards/accuracies": 1.0, "rewards/chosen": -0.14426717162132263, "rewards/margins": 0.0551830530166626, "rewards/rejected": -0.19945020973682404, "step": 466 }, { "epoch": 0.7360126083530338, "grad_norm": 0.19510377943515778, "learning_rate": 3.192846898558498e-06, "log_odds_chosen": 0.5479187369346619, "log_odds_ratio": -0.4584289491176605, "logits/chosen": -0.17342276871204376, "logits/rejected": -1.2843399047851562, "logps/chosen": -1.5753339529037476, "logps/rejected": -2.0328989028930664, "loss": 1.6463, "nll_loss": 1.600473403930664, "rewards/accuracies": 1.0, "rewards/chosen": -0.15753339231014252, "rewards/margins": 0.045756496489048004, "rewards/rejected": -0.20328989624977112, "step": 467 }, { "epoch": 0.7375886524822695, "grad_norm": 0.19615499675273895, "learning_rate": 3.188430041362313e-06, "log_odds_chosen": 0.5022854804992676, "log_odds_ratio": -0.4808230698108673, "logits/chosen": -0.1259315460920334, "logits/rejected": -1.3108328580856323, "logps/chosen": -1.5695524215698242, "logps/rejected": -1.9884154796600342, "loss": 1.6285, "nll_loss": 1.580439567565918, "rewards/accuracies": 1.0, "rewards/chosen": -0.15695525705814362, "rewards/margins": 0.04188628867268562, "rewards/rejected": -0.19884154200553894, "step": 468 }, { "epoch": 0.7391646966115051, "grad_norm": 0.20162400603294373, "learning_rate": 3.184004206144803e-06, "log_odds_chosen": 0.7329556941986084, "log_odds_ratio": -0.4027223289012909, "logits/chosen": -0.23247480392456055, "logits/rejected": -1.2680878639221191, "logps/chosen": -1.4653428792953491, "logps/rejected": -2.0662801265716553, "loss": 1.5357, "nll_loss": 1.4954301118850708, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465342789888382, "rewards/margins": 0.06009373068809509, "rewards/rejected": -0.20662802457809448, "step": 469 }, { "epoch": 0.7407407407407407, "grad_norm": 0.18583819270133972, "learning_rate": 3.1795694263410386e-06, "log_odds_chosen": 0.7982565760612488, "log_odds_ratio": -0.38432639837265015, "logits/chosen": -0.19295667111873627, "logits/rejected": -1.293751835823059, "logps/chosen": -1.4557034969329834, "logps/rejected": -2.122990608215332, "loss": 1.5197, "nll_loss": 1.48124098777771, "rewards/accuracies": 1.0, "rewards/chosen": -0.14557035267353058, "rewards/margins": 0.06672872602939606, "rewards/rejected": -0.21229907870292664, "step": 470 }, { "epoch": 0.7423167848699763, "grad_norm": 0.1854601353406906, "learning_rate": 3.1751257354536634e-06, "log_odds_chosen": 0.552662193775177, "log_odds_ratio": -0.4665309488773346, "logits/chosen": -0.1528901308774948, "logits/rejected": -1.371885061264038, "logps/chosen": -1.4518961906433105, "logps/rejected": -1.9063466787338257, "loss": 1.5332, "nll_loss": 1.4865120649337769, "rewards/accuracies": 0.875, "rewards/chosen": -0.14518961310386658, "rewards/margins": 0.04544505476951599, "rewards/rejected": -0.19063468277454376, "step": 471 }, { "epoch": 0.7438928289992119, "grad_norm": 0.1899978667497635, "learning_rate": 3.1706731670526394e-06, "log_odds_chosen": 0.6217374205589294, "log_odds_ratio": -0.43196773529052734, "logits/chosen": -0.2525237500667572, "logits/rejected": -1.3043723106384277, "logps/chosen": -1.5042320489883423, "logps/rejected": -2.0150396823883057, "loss": 1.5598, "nll_loss": 1.5165841579437256, "rewards/accuracies": 1.0, "rewards/chosen": -0.15042319893836975, "rewards/margins": 0.05108076333999634, "rewards/rejected": -0.2015039622783661, "step": 472 }, { "epoch": 0.7454688731284476, "grad_norm": 0.1951638162136078, "learning_rate": 3.166211754774994e-06, "log_odds_chosen": 0.6629724502563477, "log_odds_ratio": -0.422730028629303, "logits/chosen": -0.20018966495990753, "logits/rejected": -1.4212433099746704, "logps/chosen": -1.5557457208633423, "logps/rejected": -2.1103403568267822, "loss": 1.6437, "nll_loss": 1.6013872623443604, "rewards/accuracies": 1.0, "rewards/chosen": -0.15557457506656647, "rewards/margins": 0.055459462106227875, "rewards/rejected": -0.21103402972221375, "step": 473 }, { "epoch": 0.7470449172576832, "grad_norm": 0.19168664515018463, "learning_rate": 3.1617415323245665e-06, "log_odds_chosen": 0.6726161241531372, "log_odds_ratio": -0.42290619015693665, "logits/chosen": -0.2400115728378296, "logits/rejected": -1.3783491849899292, "logps/chosen": -1.4658328294754028, "logps/rejected": -2.0105373859405518, "loss": 1.5266, "nll_loss": 1.4843122959136963, "rewards/accuracies": 1.0, "rewards/chosen": -0.14658328890800476, "rewards/margins": 0.054470453411340714, "rewards/rejected": -0.20105375349521637, "step": 474 }, { "epoch": 0.7486209613869188, "grad_norm": 0.18189279735088348, "learning_rate": 3.157262533471752e-06, "log_odds_chosen": 0.7140947580337524, "log_odds_ratio": -0.40235432982444763, "logits/chosen": -0.15209892392158508, "logits/rejected": -1.3825089931488037, "logps/chosen": -1.5169684886932373, "logps/rejected": -2.1144258975982666, "loss": 1.5855, "nll_loss": 1.54523503780365, "rewards/accuracies": 1.0, "rewards/chosen": -0.15169686079025269, "rewards/margins": 0.05974572151899338, "rewards/rejected": -0.21144257485866547, "step": 475 }, { "epoch": 0.7501970055161544, "grad_norm": 0.19609789550304413, "learning_rate": 3.1527747920532468e-06, "log_odds_chosen": 0.565768837928772, "log_odds_ratio": -0.4514068365097046, "logits/chosen": -0.16712833940982819, "logits/rejected": -1.1320550441741943, "logps/chosen": -1.4766864776611328, "logps/rejected": -1.9391474723815918, "loss": 1.5527, "nll_loss": 1.5075533390045166, "rewards/accuracies": 1.0, "rewards/chosen": -0.14766864478588104, "rewards/margins": 0.04624609276652336, "rewards/rejected": -0.1939147412776947, "step": 476 }, { "epoch": 0.75177304964539, "grad_norm": 0.19328206777572632, "learning_rate": 3.148278341971795e-06, "log_odds_chosen": 0.65244460105896, "log_odds_ratio": -0.4249870181083679, "logits/chosen": -0.22436018288135529, "logits/rejected": -1.223185420036316, "logps/chosen": -1.5077931880950928, "logps/rejected": -2.0486738681793213, "loss": 1.5739, "nll_loss": 1.5314006805419922, "rewards/accuracies": 1.0, "rewards/chosen": -0.15077932178974152, "rewards/margins": 0.054088056087493896, "rewards/rejected": -0.2048673778772354, "step": 477 }, { "epoch": 0.7533490937746257, "grad_norm": 0.20173610746860504, "learning_rate": 3.143773217195929e-06, "log_odds_chosen": 0.73219895362854, "log_odds_ratio": -0.4004945158958435, "logits/chosen": -0.2401634305715561, "logits/rejected": -1.3643122911453247, "logps/chosen": -1.5717616081237793, "logps/rejected": -2.18953800201416, "loss": 1.6288, "nll_loss": 1.5887385606765747, "rewards/accuracies": 1.0, "rewards/chosen": -0.1571761518716812, "rewards/margins": 0.06177765130996704, "rewards/rejected": -0.21895381808280945, "step": 478 }, { "epoch": 0.7549251379038613, "grad_norm": 0.19656141102313995, "learning_rate": 3.139259451759714e-06, "log_odds_chosen": 0.5744882225990295, "log_odds_ratio": -0.4510755240917206, "logits/chosen": -0.24257460236549377, "logits/rejected": -1.3083908557891846, "logps/chosen": -1.5712709426879883, "logps/rejected": -2.050100564956665, "loss": 1.644, "nll_loss": 1.5989316701889038, "rewards/accuracies": 1.0, "rewards/chosen": -0.15712709724903107, "rewards/margins": 0.047882966697216034, "rewards/rejected": -0.2050100713968277, "step": 479 }, { "epoch": 0.7565011820330969, "grad_norm": 0.18602579832077026, "learning_rate": 3.134737079762493e-06, "log_odds_chosen": 0.6003292202949524, "log_odds_ratio": -0.4424300193786621, "logits/chosen": -0.1941242814064026, "logits/rejected": -1.311800479888916, "logps/chosen": -1.501556634902954, "logps/rejected": -1.9944710731506348, "loss": 1.5673, "nll_loss": 1.5230939388275146, "rewards/accuracies": 1.0, "rewards/chosen": -0.15015564858913422, "rewards/margins": 0.04929143935441971, "rewards/rejected": -0.19944709539413452, "step": 480 }, { "epoch": 0.7580772261623325, "grad_norm": 0.20337559282779694, "learning_rate": 3.130206135368626e-06, "log_odds_chosen": 0.6041734218597412, "log_odds_ratio": -0.4474312365055084, "logits/chosen": -0.24175474047660828, "logits/rejected": -0.9721249341964722, "logps/chosen": -1.425898790359497, "logps/rejected": -1.9149250984191895, "loss": 1.5167, "nll_loss": 1.471928596496582, "rewards/accuracies": 1.0, "rewards/chosen": -0.14258988201618195, "rewards/margins": 0.04890260472893715, "rewards/rejected": -0.19149249792099, "step": 481 }, { "epoch": 0.7596532702915682, "grad_norm": 0.19193829596042633, "learning_rate": 3.1256666528072327e-06, "log_odds_chosen": 0.7867165803909302, "log_odds_ratio": -0.38077130913734436, "logits/chosen": -0.23528993129730225, "logits/rejected": -1.075020670890808, "logps/chosen": -1.472687840461731, "logps/rejected": -2.124319553375244, "loss": 1.5375, "nll_loss": 1.499396800994873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14726878702640533, "rewards/margins": 0.06516318768262863, "rewards/rejected": -0.21243198215961456, "step": 482 }, { "epoch": 0.7612293144208038, "grad_norm": 0.18599654734134674, "learning_rate": 3.121118666371937e-06, "log_odds_chosen": 0.595000147819519, "log_odds_ratio": -0.4452923834323883, "logits/chosen": -0.15184305608272552, "logits/rejected": -1.538995623588562, "logps/chosen": -1.5383219718933105, "logps/rejected": -2.033515453338623, "loss": 1.5951, "nll_loss": 1.550559639930725, "rewards/accuracies": 1.0, "rewards/chosen": -0.15383221209049225, "rewards/margins": 0.049519333988428116, "rewards/rejected": -0.20335155725479126, "step": 483 }, { "epoch": 0.7628053585500394, "grad_norm": 0.21291442215442657, "learning_rate": 3.1165622104206034e-06, "log_odds_chosen": 0.770659863948822, "log_odds_ratio": -0.3848019242286682, "logits/chosen": -0.3678995370864868, "logits/rejected": -1.2183688879013062, "logps/chosen": -1.4637348651885986, "logps/rejected": -2.1029365062713623, "loss": 1.5321, "nll_loss": 1.4936531782150269, "rewards/accuracies": 1.0, "rewards/chosen": -0.14637349545955658, "rewards/margins": 0.06392017006874084, "rewards/rejected": -0.21029365062713623, "step": 484 }, { "epoch": 0.764381402679275, "grad_norm": 0.18177950382232666, "learning_rate": 3.1119973193750816e-06, "log_odds_chosen": 0.6704604029655457, "log_odds_ratio": -0.41766875982284546, "logits/chosen": -0.2663368880748749, "logits/rejected": -1.2851604223251343, "logps/chosen": -1.475534439086914, "logps/rejected": -2.027827739715576, "loss": 1.536, "nll_loss": 1.4942355155944824, "rewards/accuracies": 1.0, "rewards/chosen": -0.1475534588098526, "rewards/margins": 0.05522932484745979, "rewards/rejected": -0.2027827799320221, "step": 485 }, { "epoch": 0.7659574468085106, "grad_norm": 0.21737127006053925, "learning_rate": 3.1074240277209408e-06, "log_odds_chosen": 0.6194983124732971, "log_odds_ratio": -0.4379619061946869, "logits/chosen": -0.24437181651592255, "logits/rejected": -1.2440481185913086, "logps/chosen": -1.494814157485962, "logps/rejected": -2.00154709815979, "loss": 1.5423, "nll_loss": 1.4985466003417969, "rewards/accuracies": 1.0, "rewards/chosen": -0.1494814157485962, "rewards/margins": 0.05067329481244087, "rewards/rejected": -0.20015469193458557, "step": 486 }, { "epoch": 0.7675334909377463, "grad_norm": 0.20863457024097443, "learning_rate": 3.102842370007217e-06, "log_odds_chosen": 0.6833222508430481, "log_odds_ratio": -0.41733595728874207, "logits/chosen": -0.16676893830299377, "logits/rejected": -1.15752375125885, "logps/chosen": -1.5041608810424805, "logps/rejected": -2.0647218227386475, "loss": 1.5703, "nll_loss": 1.5285258293151855, "rewards/accuracies": 1.0, "rewards/chosen": -0.1504160761833191, "rewards/margins": 0.05605611205101013, "rewards/rejected": -0.20647220313549042, "step": 487 }, { "epoch": 0.7691095350669819, "grad_norm": 0.1944776177406311, "learning_rate": 3.0982523808461454e-06, "log_odds_chosen": 0.5529102683067322, "log_odds_ratio": -0.4605258107185364, "logits/chosen": -0.15431943535804749, "logits/rejected": -1.2589623928070068, "logps/chosen": -1.5484925508499146, "logps/rejected": -2.0021567344665527, "loss": 1.6075, "nll_loss": 1.561496376991272, "rewards/accuracies": 1.0, "rewards/chosen": -0.15484926104545593, "rewards/margins": 0.04536642134189606, "rewards/rejected": -0.2002156674861908, "step": 488 }, { "epoch": 0.7706855791962175, "grad_norm": 0.18545880913734436, "learning_rate": 3.0936540949129006e-06, "log_odds_chosen": 0.6402296423912048, "log_odds_ratio": -0.4339551031589508, "logits/chosen": -0.2207900583744049, "logits/rejected": -1.1871285438537598, "logps/chosen": -1.5241451263427734, "logps/rejected": -2.057253122329712, "loss": 1.5905, "nll_loss": 1.5471105575561523, "rewards/accuracies": 1.0, "rewards/chosen": -0.15241453051567078, "rewards/margins": 0.053310781717300415, "rewards/rejected": -0.20572529733181, "step": 489 }, { "epoch": 0.7722616233254531, "grad_norm": 0.178094744682312, "learning_rate": 3.0890475469453378e-06, "log_odds_chosen": 0.7427234053611755, "log_odds_ratio": -0.40040361881256104, "logits/chosen": -0.2081877589225769, "logits/rejected": -1.3924274444580078, "logps/chosen": -1.4031827449798584, "logps/rejected": -2.006999969482422, "loss": 1.4721, "nll_loss": 1.4321045875549316, "rewards/accuracies": 1.0, "rewards/chosen": -0.14031827449798584, "rewards/margins": 0.06038173660635948, "rewards/rejected": -0.20069998502731323, "step": 490 }, { "epoch": 0.7738376674546887, "grad_norm": 0.18201249837875366, "learning_rate": 3.0844327717437263e-06, "log_odds_chosen": 0.6974368691444397, "log_odds_ratio": -0.41516321897506714, "logits/chosen": -0.21113747358322144, "logits/rejected": -1.2565526962280273, "logps/chosen": -1.4272940158843994, "logps/rejected": -2.0010313987731934, "loss": 1.4995, "nll_loss": 1.4580097198486328, "rewards/accuracies": 1.0, "rewards/chosen": -0.14272941648960114, "rewards/margins": 0.057373758405447006, "rewards/rejected": -0.20010316371917725, "step": 491 }, { "epoch": 0.7754137115839244, "grad_norm": 0.1853957176208496, "learning_rate": 3.0798098041704892e-06, "log_odds_chosen": 0.5092182159423828, "log_odds_ratio": -0.47541776299476624, "logits/chosen": -0.15838466584682465, "logits/rejected": -1.3052377700805664, "logps/chosen": -1.361024260520935, "logps/rejected": -1.7661519050598145, "loss": 1.4535, "nll_loss": 1.405916690826416, "rewards/accuracies": 1.0, "rewards/chosen": -0.13610242307186127, "rewards/margins": 0.04051277041435242, "rewards/rejected": -0.1766151785850525, "step": 492 }, { "epoch": 0.77698975571316, "grad_norm": 0.1893599033355713, "learning_rate": 3.0751786791499368e-06, "log_odds_chosen": 0.6547857522964478, "log_odds_ratio": -0.42588385939598083, "logits/chosen": -0.17713405191898346, "logits/rejected": -1.2708368301391602, "logps/chosen": -1.5448169708251953, "logps/rejected": -2.0935890674591064, "loss": 1.6139, "nll_loss": 1.5712815523147583, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544817090034485, "rewards/margins": 0.05487719178199768, "rewards/rejected": -0.20935890078544617, "step": 493 }, { "epoch": 0.7785657998423956, "grad_norm": 0.18722397089004517, "learning_rate": 3.070539431668008e-06, "log_odds_chosen": 0.6233894228935242, "log_odds_ratio": -0.43421441316604614, "logits/chosen": -0.1989862620830536, "logits/rejected": -1.2033601999282837, "logps/chosen": -1.5463478565216064, "logps/rejected": -2.0686848163604736, "loss": 1.6068, "nll_loss": 1.5633586645126343, "rewards/accuracies": 1.0, "rewards/chosen": -0.15463480353355408, "rewards/margins": 0.05223367363214493, "rewards/rejected": -0.2068684697151184, "step": 494 }, { "epoch": 0.7801418439716312, "grad_norm": 0.19007954001426697, "learning_rate": 3.0658920967720018e-06, "log_odds_chosen": 0.7926431894302368, "log_odds_ratio": -0.38211071491241455, "logits/chosen": -0.3403100073337555, "logits/rejected": -1.2223634719848633, "logps/chosen": -1.4851934909820557, "logps/rejected": -2.1482720375061035, "loss": 1.5559, "nll_loss": 1.5176681280136108, "rewards/accuracies": 1.0, "rewards/chosen": -0.148519366979599, "rewards/margins": 0.06630785018205643, "rewards/rejected": -0.21482720971107483, "step": 495 }, { "epoch": 0.7817178881008668, "grad_norm": 0.1839301884174347, "learning_rate": 3.0612367095703116e-06, "log_odds_chosen": 0.7492038607597351, "log_odds_ratio": -0.39107295870780945, "logits/chosen": -0.18261493742465973, "logits/rejected": -1.4831030368804932, "logps/chosen": -1.5249770879745483, "logps/rejected": -2.153897762298584, "loss": 1.5805, "nll_loss": 1.5413737297058105, "rewards/accuracies": 1.0, "rewards/chosen": -0.15249772369861603, "rewards/margins": 0.06289205700159073, "rewards/rejected": -0.21538978815078735, "step": 496 }, { "epoch": 0.7832939322301025, "grad_norm": 0.19066324830055237, "learning_rate": 3.056573305232167e-06, "log_odds_chosen": 0.7923998832702637, "log_odds_ratio": -0.380237877368927, "logits/chosen": -0.20253872871398926, "logits/rejected": -1.41769278049469, "logps/chosen": -1.4783601760864258, "logps/rejected": -2.1322264671325684, "loss": 1.5446, "nll_loss": 1.5065717697143555, "rewards/accuracies": 1.0, "rewards/chosen": -0.14783601462841034, "rewards/margins": 0.06538661569356918, "rewards/rejected": -0.21322263777256012, "step": 497 }, { "epoch": 0.7848699763593381, "grad_norm": 0.19244952499866486, "learning_rate": 3.051901918987359e-06, "log_odds_chosen": 0.7435587048530579, "log_odds_ratio": -0.39330264925956726, "logits/chosen": -0.33268722891807556, "logits/rejected": -1.3394025564193726, "logps/chosen": -1.4223885536193848, "logps/rejected": -2.0310678482055664, "loss": 1.4913, "nll_loss": 1.4519734382629395, "rewards/accuracies": 1.0, "rewards/chosen": -0.14223885536193848, "rewards/margins": 0.06086793541908264, "rewards/rejected": -0.20310677587985992, "step": 498 }, { "epoch": 0.7864460204885737, "grad_norm": 0.22321587800979614, "learning_rate": 3.047222586125979e-06, "log_odds_chosen": 0.7735339999198914, "log_odds_ratio": -0.3840080797672272, "logits/chosen": -0.1548488885164261, "logits/rejected": -0.9053974151611328, "logps/chosen": -1.431384801864624, "logps/rejected": -2.068115234375, "loss": 1.51, "nll_loss": 1.4715591669082642, "rewards/accuracies": 1.0, "rewards/chosen": -0.14313849806785583, "rewards/margins": 0.06367303431034088, "rewards/rejected": -0.20681151747703552, "step": 499 }, { "epoch": 0.7880220646178093, "grad_norm": 0.18041643500328064, "learning_rate": 3.042535341998152e-06, "log_odds_chosen": 0.5752748847007751, "log_odds_ratio": -0.4517236649990082, "logits/chosen": -0.07348179817199707, "logits/rejected": -1.2616822719573975, "logps/chosen": -1.578834056854248, "logps/rejected": -2.061535120010376, "loss": 1.6247, "nll_loss": 1.579504370689392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1578833907842636, "rewards/margins": 0.04827011376619339, "rewards/rejected": -0.2061535120010376, "step": 500 } ], "logging_steps": 1, "max_steps": 1270, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }