{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.575256107171001, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015760441292356187, "grad_norm": 0.711133599281311, "learning_rate": 0.0, "log_odds_chosen": 0.38395223021507263, "log_odds_ratio": -0.5362357497215271, "logits/chosen": -0.6395432353019714, "logits/rejected": -0.1508907973766327, "logps/chosen": -1.973930835723877, "logps/rejected": -2.3123726844787598, "loss": 2.2406, "nll_loss": 2.1870110034942627, "rewards/accuracies": 0.875, "rewards/chosen": -0.19739308953285217, "rewards/margins": 0.03384416550397873, "rewards/rejected": -0.2312372624874115, "step": 1 }, { "epoch": 0.0031520882584712374, "grad_norm": 0.6591355204582214, "learning_rate": 3.1496062992125985e-08, "log_odds_chosen": 0.39076143503189087, "log_odds_ratio": -0.5237792730331421, "logits/chosen": -0.6357220411300659, "logits/rejected": -0.09711451828479767, "logps/chosen": -1.8780229091644287, "logps/rejected": -2.2161149978637695, "loss": 2.1349, "nll_loss": 2.082494020462036, "rewards/accuracies": 1.0, "rewards/chosen": -0.1878022849559784, "rewards/margins": 0.03380918130278587, "rewards/rejected": -0.22161146998405457, "step": 2 }, { "epoch": 0.004728132387706856, "grad_norm": 0.74098140001297, "learning_rate": 6.299212598425197e-08, "log_odds_chosen": 0.5195883512496948, "log_odds_ratio": -0.4742986857891083, "logits/chosen": -0.7715582251548767, "logits/rejected": -0.26645177602767944, "logps/chosen": -1.9943277835845947, "logps/rejected": -2.4578633308410645, "loss": 2.2471, "nll_loss": 2.199704647064209, "rewards/accuracies": 1.0, "rewards/chosen": -0.19943277537822723, "rewards/margins": 0.046353571116924286, "rewards/rejected": -0.24578633904457092, "step": 3 }, { "epoch": 0.006304176516942475, "grad_norm": 0.6662443280220032, "learning_rate": 9.448818897637795e-08, "log_odds_chosen": 0.55958491563797, "log_odds_ratio": -0.46615538001060486, "logits/chosen": -0.5838385820388794, "logits/rejected": -0.1573001742362976, "logps/chosen": -2.007845163345337, "logps/rejected": -2.5115368366241455, "loss": 2.2568, "nll_loss": 2.210216760635376, "rewards/accuracies": 0.875, "rewards/chosen": -0.20078451931476593, "rewards/margins": 0.05036917328834534, "rewards/rejected": -0.2511536777019501, "step": 4 }, { "epoch": 0.007880220646178092, "grad_norm": 0.7787235379219055, "learning_rate": 1.2598425196850394e-07, "log_odds_chosen": 0.705410361289978, "log_odds_ratio": -0.42067474126815796, "logits/chosen": -0.7229734063148499, "logits/rejected": -0.27978262305259705, "logps/chosen": -1.9303616285324097, "logps/rejected": -2.555299758911133, "loss": 2.1734, "nll_loss": 2.1313021183013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.19303615391254425, "rewards/margins": 0.06249381601810455, "rewards/rejected": -0.2555299699306488, "step": 5 }, { "epoch": 0.009456264775413711, "grad_norm": 0.6482278108596802, "learning_rate": 1.5748031496062992e-07, "log_odds_chosen": 0.40409255027770996, "log_odds_ratio": -0.5151315331459045, "logits/chosen": -0.5276838541030884, "logits/rejected": -0.05200649052858353, "logps/chosen": -1.8561073541641235, "logps/rejected": -2.208005905151367, "loss": 2.1449, "nll_loss": 2.0933837890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.18561072647571564, "rewards/margins": 0.03518984466791153, "rewards/rejected": -0.22080056369304657, "step": 6 }, { "epoch": 0.01103230890464933, "grad_norm": 0.6663646697998047, "learning_rate": 1.889763779527559e-07, "log_odds_chosen": 0.546273410320282, "log_odds_ratio": -0.46661460399627686, "logits/chosen": -0.5808312296867371, "logits/rejected": -0.19844059646129608, "logps/chosen": -1.9260658025741577, "logps/rejected": -2.4119839668273926, "loss": 2.1774, "nll_loss": 2.1307828426361084, "rewards/accuracies": 1.0, "rewards/chosen": -0.1926065981388092, "rewards/margins": 0.04859180375933647, "rewards/rejected": -0.24119840562343597, "step": 7 }, { "epoch": 0.01260835303388495, "grad_norm": 0.782015323638916, "learning_rate": 2.2047244094488187e-07, "log_odds_chosen": 0.8110038638114929, "log_odds_ratio": -0.3770079016685486, "logits/chosen": -0.6198790669441223, "logits/rejected": -0.24129487574100494, "logps/chosen": -2.014923572540283, "logps/rejected": -2.744842290878296, "loss": 2.2715, "nll_loss": 2.2338366508483887, "rewards/accuracies": 1.0, "rewards/chosen": -0.20149235427379608, "rewards/margins": 0.07299190014600754, "rewards/rejected": -0.274484246969223, "step": 8 }, { "epoch": 0.014184397163120567, "grad_norm": 0.6285175085067749, "learning_rate": 2.519685039370079e-07, "log_odds_chosen": 0.4325932562351227, "log_odds_ratio": -0.5066515207290649, "logits/chosen": -0.5963254570960999, "logits/rejected": -0.07179142534732819, "logps/chosen": -1.9378471374511719, "logps/rejected": -2.31904673576355, "loss": 2.1726, "nll_loss": 2.1219170093536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.1937847137451172, "rewards/margins": 0.038119956851005554, "rewards/rejected": -0.23190467059612274, "step": 9 }, { "epoch": 0.015760441292356184, "grad_norm": 0.7205284833908081, "learning_rate": 2.8346456692913386e-07, "log_odds_chosen": 0.4166225790977478, "log_odds_ratio": -0.5110099911689758, "logits/chosen": -0.6733875274658203, "logits/rejected": -0.2730729877948761, "logps/chosen": -1.96082603931427, "logps/rejected": -2.3293237686157227, "loss": 2.21, "nll_loss": 2.158905267715454, "rewards/accuracies": 1.0, "rewards/chosen": -0.19608259201049805, "rewards/margins": 0.03684981167316437, "rewards/rejected": -0.2329324334859848, "step": 10 }, { "epoch": 0.017336485421591805, "grad_norm": 0.7679106593132019, "learning_rate": 3.1496062992125984e-07, "log_odds_chosen": 0.48609045147895813, "log_odds_ratio": -0.48330000042915344, "logits/chosen": -0.5869827270507812, "logits/rejected": -0.24110490083694458, "logps/chosen": -2.035675048828125, "logps/rejected": -2.4683971405029297, "loss": 2.2835, "nll_loss": 2.2351536750793457, "rewards/accuracies": 1.0, "rewards/chosen": -0.2035675048828125, "rewards/margins": 0.043272241950035095, "rewards/rejected": -0.2468397617340088, "step": 11 }, { "epoch": 0.018912529550827423, "grad_norm": 0.8531121015548706, "learning_rate": 3.464566929133858e-07, "log_odds_chosen": 0.5449747443199158, "log_odds_ratio": -0.461398184299469, "logits/chosen": -0.7053269147872925, "logits/rejected": -0.12370388209819794, "logps/chosen": -2.076099157333374, "logps/rejected": -2.565109968185425, "loss": 2.3324, "nll_loss": 2.286276340484619, "rewards/accuracies": 1.0, "rewards/chosen": -0.20760990679264069, "rewards/margins": 0.04890113323926926, "rewards/rejected": -0.25651103258132935, "step": 12 }, { "epoch": 0.02048857368006304, "grad_norm": 0.8578523397445679, "learning_rate": 3.779527559055118e-07, "log_odds_chosen": 0.5090824365615845, "log_odds_ratio": -0.47858256101608276, "logits/chosen": -0.7868019342422485, "logits/rejected": -0.07148027420043945, "logps/chosen": -1.9688328504562378, "logps/rejected": -2.4192914962768555, "loss": 2.2258, "nll_loss": 2.1779398918151855, "rewards/accuracies": 1.0, "rewards/chosen": -0.19688329100608826, "rewards/margins": 0.045045845210552216, "rewards/rejected": -0.24192912876605988, "step": 13 }, { "epoch": 0.02206461780929866, "grad_norm": 0.675309956073761, "learning_rate": 4.0944881889763777e-07, "log_odds_chosen": 0.3657957911491394, "log_odds_ratio": -0.5386834144592285, "logits/chosen": -0.6320536136627197, "logits/rejected": -0.3780551254749298, "logps/chosen": -1.9759397506713867, "logps/rejected": -2.300447463989258, "loss": 2.2257, "nll_loss": 2.171861171722412, "rewards/accuracies": 1.0, "rewards/chosen": -0.19759398698806763, "rewards/margins": 0.03245077282190323, "rewards/rejected": -0.23004476726055145, "step": 14 }, { "epoch": 0.02364066193853428, "grad_norm": 0.7733155488967896, "learning_rate": 4.4094488188976375e-07, "log_odds_chosen": 0.41122347116470337, "log_odds_ratio": -0.5111778378486633, "logits/chosen": -0.7448755502700806, "logits/rejected": -0.08966228365898132, "logps/chosen": -1.9629881381988525, "logps/rejected": -2.32719087600708, "loss": 2.2273, "nll_loss": 2.176145076751709, "rewards/accuracies": 1.0, "rewards/chosen": -0.19629880785942078, "rewards/margins": 0.03642028942704201, "rewards/rejected": -0.23271909356117249, "step": 15 }, { "epoch": 0.0252167060677699, "grad_norm": 0.7707588076591492, "learning_rate": 4.7244094488188973e-07, "log_odds_chosen": 0.4248766303062439, "log_odds_ratio": -0.5078074336051941, "logits/chosen": -0.6721003651618958, "logits/rejected": -0.19612114131450653, "logps/chosen": -1.9606484174728394, "logps/rejected": -2.336439609527588, "loss": 2.22, "nll_loss": 2.1692113876342773, "rewards/accuracies": 1.0, "rewards/chosen": -0.19606485962867737, "rewards/margins": 0.03757911175489426, "rewards/rejected": -0.23364394903182983, "step": 16 }, { "epoch": 0.026792750197005517, "grad_norm": 0.6388130784034729, "learning_rate": 5.039370078740158e-07, "log_odds_chosen": 0.6333746314048767, "log_odds_ratio": -0.43810510635375977, "logits/chosen": -0.5735284686088562, "logits/rejected": -0.31118011474609375, "logps/chosen": -1.9192208051681519, "logps/rejected": -2.4831156730651855, "loss": 2.1769, "nll_loss": 2.133085012435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.19192209839820862, "rewards/margins": 0.056389469653367996, "rewards/rejected": -0.24831153452396393, "step": 17 }, { "epoch": 0.028368794326241134, "grad_norm": 0.7463440299034119, "learning_rate": 5.354330708661418e-07, "log_odds_chosen": 0.3546241819858551, "log_odds_ratio": -0.544340193271637, "logits/chosen": -0.6276163458824158, "logits/rejected": 0.05960750952363014, "logps/chosen": -1.9554523229599, "logps/rejected": -2.2666845321655273, "loss": 2.2226, "nll_loss": 2.168125629425049, "rewards/accuracies": 0.875, "rewards/chosen": -0.1955452263355255, "rewards/margins": 0.031123224645853043, "rewards/rejected": -0.22666846215724945, "step": 18 }, { "epoch": 0.029944838455476755, "grad_norm": 0.6933729648590088, "learning_rate": 5.669291338582677e-07, "log_odds_chosen": 0.6329823732376099, "log_odds_ratio": -0.4341467022895813, "logits/chosen": -0.753471851348877, "logits/rejected": -0.27794983983039856, "logps/chosen": -1.8416026830673218, "logps/rejected": -2.3966176509857178, "loss": 2.115, "nll_loss": 2.07161283493042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1841602772474289, "rewards/margins": 0.05550149455666542, "rewards/rejected": -0.23966176807880402, "step": 19 }, { "epoch": 0.03152088258471237, "grad_norm": 0.7374937534332275, "learning_rate": 5.984251968503937e-07, "log_odds_chosen": 0.5668491125106812, "log_odds_ratio": -0.46694958209991455, "logits/chosen": -0.562913179397583, "logits/rejected": -0.25034084916114807, "logps/chosen": -1.995664119720459, "logps/rejected": -2.5026793479919434, "loss": 2.262, "nll_loss": 2.215284824371338, "rewards/accuracies": 0.875, "rewards/chosen": -0.19956642389297485, "rewards/margins": 0.05070152133703232, "rewards/rejected": -0.25026795268058777, "step": 20 }, { "epoch": 0.03309692671394799, "grad_norm": 0.75541090965271, "learning_rate": 6.299212598425197e-07, "log_odds_chosen": 0.35937565565109253, "log_odds_ratio": -0.5362535715103149, "logits/chosen": -0.65813809633255, "logits/rejected": -0.1279284954071045, "logps/chosen": -2.04636812210083, "logps/rejected": -2.3680672645568848, "loss": 2.3058, "nll_loss": 2.2522225379943848, "rewards/accuracies": 0.75, "rewards/chosen": -0.20463679730892181, "rewards/margins": 0.032169945538043976, "rewards/rejected": -0.2368067502975464, "step": 21 }, { "epoch": 0.03467297084318361, "grad_norm": 0.7607430219650269, "learning_rate": 6.614173228346457e-07, "log_odds_chosen": 0.36671358346939087, "log_odds_ratio": -0.5375123620033264, "logits/chosen": -0.7311565279960632, "logits/rejected": -0.08119023591279984, "logps/chosen": -1.9978885650634766, "logps/rejected": -2.3232674598693848, "loss": 2.2612, "nll_loss": 2.207477331161499, "rewards/accuracies": 0.875, "rewards/chosen": -0.1997888684272766, "rewards/margins": 0.03253789618611336, "rewards/rejected": -0.23232676088809967, "step": 22 }, { "epoch": 0.036249014972419225, "grad_norm": 0.7234435081481934, "learning_rate": 6.929133858267716e-07, "log_odds_chosen": 0.39767685532569885, "log_odds_ratio": -0.5170127153396606, "logits/chosen": -0.7005606293678284, "logits/rejected": -0.19814857840538025, "logps/chosen": -2.0349388122558594, "logps/rejected": -2.389338254928589, "loss": 2.2728, "nll_loss": 2.221050977706909, "rewards/accuracies": 1.0, "rewards/chosen": -0.2034938931465149, "rewards/margins": 0.03543993830680847, "rewards/rejected": -0.23893383145332336, "step": 23 }, { "epoch": 0.037825059101654845, "grad_norm": 0.7151917815208435, "learning_rate": 7.244094488188977e-07, "log_odds_chosen": 0.4232009947299957, "log_odds_ratio": -0.5118768215179443, "logits/chosen": -0.42520439624786377, "logits/rejected": -0.13911336660385132, "logps/chosen": -1.974929690361023, "logps/rejected": -2.349339723587036, "loss": 2.2336, "nll_loss": 2.1824424266815186, "rewards/accuracies": 1.0, "rewards/chosen": -0.19749295711517334, "rewards/margins": 0.03744099289178848, "rewards/rejected": -0.23493395745754242, "step": 24 }, { "epoch": 0.039401103230890466, "grad_norm": 0.7771602272987366, "learning_rate": 7.559055118110236e-07, "log_odds_chosen": 0.3602335751056671, "log_odds_ratio": -0.5373751521110535, "logits/chosen": -0.7194356918334961, "logits/rejected": -0.2223992645740509, "logps/chosen": -2.008389711380005, "logps/rejected": -2.332648277282715, "loss": 2.2619, "nll_loss": 2.208117961883545, "rewards/accuracies": 0.875, "rewards/chosen": -0.20083898305892944, "rewards/margins": 0.03242585435509682, "rewards/rejected": -0.23326483368873596, "step": 25 }, { "epoch": 0.04097714736012608, "grad_norm": 0.7429983615875244, "learning_rate": 7.874015748031496e-07, "log_odds_chosen": 0.46550512313842773, "log_odds_ratio": -0.4939187467098236, "logits/chosen": -0.8255457282066345, "logits/rejected": -0.3358853757381439, "logps/chosen": -1.954911708831787, "logps/rejected": -2.3684439659118652, "loss": 2.2146, "nll_loss": 2.1652259826660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.19549117982387543, "rewards/margins": 0.041353195905685425, "rewards/rejected": -0.23684436082839966, "step": 26 }, { "epoch": 0.0425531914893617, "grad_norm": 0.7892085313796997, "learning_rate": 8.188976377952755e-07, "log_odds_chosen": 0.7098885774612427, "log_odds_ratio": -0.41376522183418274, "logits/chosen": -0.6670718789100647, "logits/rejected": -0.18330247700214386, "logps/chosen": -2.0041563510894775, "logps/rejected": -2.642397165298462, "loss": 2.2622, "nll_loss": 2.2208266258239746, "rewards/accuracies": 1.0, "rewards/chosen": -0.20041564106941223, "rewards/margins": 0.06382407993078232, "rewards/rejected": -0.26423972845077515, "step": 27 }, { "epoch": 0.04412923561859732, "grad_norm": 0.7302277684211731, "learning_rate": 8.503937007874016e-07, "log_odds_chosen": 0.4039226770401001, "log_odds_ratio": -0.5200110673904419, "logits/chosen": -0.6893348097801208, "logits/rejected": -0.09423862397670746, "logps/chosen": -1.9387571811676025, "logps/rejected": -2.295835494995117, "loss": 2.2167, "nll_loss": 2.1646535396575928, "rewards/accuracies": 0.875, "rewards/chosen": -0.19387571513652802, "rewards/margins": 0.03570783883333206, "rewards/rejected": -0.22958354651927948, "step": 28 }, { "epoch": 0.045705279747832936, "grad_norm": 0.6707211136817932, "learning_rate": 8.818897637795275e-07, "log_odds_chosen": 0.35679179430007935, "log_odds_ratio": -0.5352319478988647, "logits/chosen": -0.5461763739585876, "logits/rejected": -0.14010290801525116, "logps/chosen": -2.050537109375, "logps/rejected": -2.372122287750244, "loss": 2.3054, "nll_loss": 2.2518763542175293, "rewards/accuracies": 1.0, "rewards/chosen": -0.20505373179912567, "rewards/margins": 0.032158493995666504, "rewards/rejected": -0.23721222579479218, "step": 29 }, { "epoch": 0.04728132387706856, "grad_norm": 0.8279486894607544, "learning_rate": 9.133858267716535e-07, "log_odds_chosen": 0.5706252455711365, "log_odds_ratio": -0.45796477794647217, "logits/chosen": -0.6770057678222656, "logits/rejected": -0.2806178331375122, "logps/chosen": -2.0110249519348145, "logps/rejected": -2.521954298019409, "loss": 2.3011, "nll_loss": 2.2553153038024902, "rewards/accuracies": 0.875, "rewards/chosen": -0.20110251009464264, "rewards/margins": 0.05109292268753052, "rewards/rejected": -0.25219541788101196, "step": 30 }, { "epoch": 0.04885736800630418, "grad_norm": 0.7405450344085693, "learning_rate": 9.448818897637795e-07, "log_odds_chosen": 0.6274739503860474, "log_odds_ratio": -0.4411022663116455, "logits/chosen": -0.6367984414100647, "logits/rejected": 0.004103410989046097, "logps/chosen": -2.0546810626983643, "logps/rejected": -2.6223583221435547, "loss": 2.3002, "nll_loss": 2.2560579776763916, "rewards/accuracies": 1.0, "rewards/chosen": -0.20546811819076538, "rewards/margins": 0.056767746806144714, "rewards/rejected": -0.2622358202934265, "step": 31 }, { "epoch": 0.0504334121355398, "grad_norm": 0.638395369052887, "learning_rate": 9.763779527559055e-07, "log_odds_chosen": 0.6788234114646912, "log_odds_ratio": -0.42549028992652893, "logits/chosen": -0.570993185043335, "logits/rejected": -0.3356379568576813, "logps/chosen": -1.9266096353530884, "logps/rejected": -2.533726692199707, "loss": 2.1793, "nll_loss": 2.136709690093994, "rewards/accuracies": 1.0, "rewards/chosen": -0.19266097247600555, "rewards/margins": 0.060711681842803955, "rewards/rejected": -0.2533726394176483, "step": 32 }, { "epoch": 0.05200945626477541, "grad_norm": 0.6255950331687927, "learning_rate": 1.0078740157480315e-06, "log_odds_chosen": 0.5599108338356018, "log_odds_ratio": -0.4542999565601349, "logits/chosen": -0.5288444757461548, "logits/rejected": -0.19333161413669586, "logps/chosen": -1.9585925340652466, "logps/rejected": -2.4559030532836914, "loss": 2.2007, "nll_loss": 2.155308961868286, "rewards/accuracies": 1.0, "rewards/chosen": -0.19585925340652466, "rewards/margins": 0.04973103851079941, "rewards/rejected": -0.24559029936790466, "step": 33 }, { "epoch": 0.05358550039401103, "grad_norm": 0.6921458840370178, "learning_rate": 1.0393700787401573e-06, "log_odds_chosen": 0.3519511818885803, "log_odds_ratio": -0.5428643226623535, "logits/chosen": -0.536325216293335, "logits/rejected": -0.2403833270072937, "logps/chosen": -1.9013547897338867, "logps/rejected": -2.205166816711426, "loss": 2.1703, "nll_loss": 2.116063117980957, "rewards/accuracies": 1.0, "rewards/chosen": -0.19013547897338867, "rewards/margins": 0.030381204560399055, "rewards/rejected": -0.22051668167114258, "step": 34 }, { "epoch": 0.055161544523246654, "grad_norm": 0.8423165082931519, "learning_rate": 1.0708661417322836e-06, "log_odds_chosen": 0.42774710059165955, "log_odds_ratio": -0.5038249492645264, "logits/chosen": -0.7048341631889343, "logits/rejected": 0.0643918514251709, "logps/chosen": -2.080242872238159, "logps/rejected": -2.46270489692688, "loss": 2.3413, "nll_loss": 2.2909011840820312, "rewards/accuracies": 1.0, "rewards/chosen": -0.2080242931842804, "rewards/margins": 0.03824619948863983, "rewards/rejected": -0.24627049267292023, "step": 35 }, { "epoch": 0.05673758865248227, "grad_norm": 0.7374855875968933, "learning_rate": 1.1023622047244094e-06, "log_odds_chosen": 0.5830976963043213, "log_odds_ratio": -0.44582459330558777, "logits/chosen": -0.8039961457252502, "logits/rejected": -0.06987367570400238, "logps/chosen": -1.92990243434906, "logps/rejected": -2.4467267990112305, "loss": 2.1837, "nll_loss": 2.139101266860962, "rewards/accuracies": 1.0, "rewards/chosen": -0.192990243434906, "rewards/margins": 0.05168245732784271, "rewards/rejected": -0.2446727156639099, "step": 36 }, { "epoch": 0.05831363278171789, "grad_norm": 0.6995700001716614, "learning_rate": 1.1338582677165354e-06, "log_odds_chosen": 0.4410613477230072, "log_odds_ratio": -0.5076866149902344, "logits/chosen": -0.6389314532279968, "logits/rejected": -0.06876173615455627, "logps/chosen": -1.9238637685775757, "logps/rejected": -2.3091113567352295, "loss": 2.1937, "nll_loss": 2.1429271697998047, "rewards/accuracies": 0.875, "rewards/chosen": -0.19238635897636414, "rewards/margins": 0.03852475434541702, "rewards/rejected": -0.23091113567352295, "step": 37 }, { "epoch": 0.05988967691095351, "grad_norm": 0.7543565034866333, "learning_rate": 1.1653543307086612e-06, "log_odds_chosen": 0.3667663633823395, "log_odds_ratio": -0.5343418121337891, "logits/chosen": -0.7570109367370605, "logits/rejected": -0.23115745186805725, "logps/chosen": -1.921524167060852, "logps/rejected": -2.239858865737915, "loss": 2.1977, "nll_loss": 2.1442925930023193, "rewards/accuracies": 0.875, "rewards/chosen": -0.19215241074562073, "rewards/margins": 0.03183349221944809, "rewards/rejected": -0.22398591041564941, "step": 38 }, { "epoch": 0.061465721040189124, "grad_norm": 0.7876814007759094, "learning_rate": 1.1968503937007875e-06, "log_odds_chosen": 0.2030404657125473, "log_odds_ratio": -0.6065970659255981, "logits/chosen": -0.6633272171020508, "logits/rejected": -0.040516383945941925, "logps/chosen": -2.038562774658203, "logps/rejected": -2.220552444458008, "loss": 2.3176, "nll_loss": 2.256978988647461, "rewards/accuracies": 0.75, "rewards/chosen": -0.20385627448558807, "rewards/margins": 0.01819896697998047, "rewards/rejected": -0.22205524146556854, "step": 39 }, { "epoch": 0.06304176516942474, "grad_norm": 0.7621078491210938, "learning_rate": 1.2283464566929133e-06, "log_odds_chosen": 0.37610867619514465, "log_odds_ratio": -0.5284633040428162, "logits/chosen": -0.7413150668144226, "logits/rejected": -0.18302536010742188, "logps/chosen": -1.9204038381576538, "logps/rejected": -2.2485194206237793, "loss": 2.1889, "nll_loss": 2.1360361576080322, "rewards/accuracies": 1.0, "rewards/chosen": -0.19204038381576538, "rewards/margins": 0.03281155228614807, "rewards/rejected": -0.22485193610191345, "step": 40 }, { "epoch": 0.06461780929866036, "grad_norm": 0.6990500688552856, "learning_rate": 1.2598425196850393e-06, "log_odds_chosen": 0.27543094754219055, "log_odds_ratio": -0.5724075436592102, "logits/chosen": -0.6147856712341309, "logits/rejected": -0.09826792776584625, "logps/chosen": -1.9605408906936646, "logps/rejected": -2.205174446105957, "loss": 2.2093, "nll_loss": 2.152068614959717, "rewards/accuracies": 0.75, "rewards/chosen": -0.1960541009902954, "rewards/margins": 0.024463361129164696, "rewards/rejected": -0.22051745653152466, "step": 41 }, { "epoch": 0.06619385342789598, "grad_norm": 0.6998929381370544, "learning_rate": 1.2913385826771652e-06, "log_odds_chosen": 0.3089278042316437, "log_odds_ratio": -0.5570518374443054, "logits/chosen": -0.6978001594543457, "logits/rejected": -0.15154145658016205, "logps/chosen": -1.960097074508667, "logps/rejected": -2.2320406436920166, "loss": 2.2221, "nll_loss": 2.166372537612915, "rewards/accuracies": 0.875, "rewards/chosen": -0.19600971043109894, "rewards/margins": 0.02719433605670929, "rewards/rejected": -0.22320404648780823, "step": 42 }, { "epoch": 0.0677698975571316, "grad_norm": 0.6787785291671753, "learning_rate": 1.3228346456692914e-06, "log_odds_chosen": 0.16384947299957275, "log_odds_ratio": -0.6235592365264893, "logits/chosen": -0.5829145312309265, "logits/rejected": -0.14386498928070068, "logps/chosen": -1.978913426399231, "logps/rejected": -2.123264789581299, "loss": 2.2412, "nll_loss": 2.1788277626037598, "rewards/accuracies": 0.625, "rewards/chosen": -0.19789133965969086, "rewards/margins": 0.014435119926929474, "rewards/rejected": -0.21232648193836212, "step": 43 }, { "epoch": 0.06934594168636722, "grad_norm": 0.7161246538162231, "learning_rate": 1.3543307086614172e-06, "log_odds_chosen": 0.5076345801353455, "log_odds_ratio": -0.4807063937187195, "logits/chosen": -0.7080238461494446, "logits/rejected": 0.010522328317165375, "logps/chosen": -1.9329512119293213, "logps/rejected": -2.384503126144409, "loss": 2.1908, "nll_loss": 2.142735004425049, "rewards/accuracies": 1.0, "rewards/chosen": -0.19329513609409332, "rewards/margins": 0.045155204832553864, "rewards/rejected": -0.2384503185749054, "step": 44 }, { "epoch": 0.07092198581560284, "grad_norm": 0.6544040441513062, "learning_rate": 1.3858267716535433e-06, "log_odds_chosen": 0.5975862145423889, "log_odds_ratio": -0.4529675245285034, "logits/chosen": -0.5176110863685608, "logits/rejected": -0.06739248335361481, "logps/chosen": -1.9081940650939941, "logps/rejected": -2.438753843307495, "loss": 2.1874, "nll_loss": 2.1420602798461914, "rewards/accuracies": 0.875, "rewards/chosen": -0.1908193975687027, "rewards/margins": 0.05305597186088562, "rewards/rejected": -0.24387536942958832, "step": 45 }, { "epoch": 0.07249802994483845, "grad_norm": 0.7946521043777466, "learning_rate": 1.417322834645669e-06, "log_odds_chosen": 0.5836816430091858, "log_odds_ratio": -0.458423912525177, "logits/chosen": -0.7441750764846802, "logits/rejected": -0.32784658670425415, "logps/chosen": -1.9207260608673096, "logps/rejected": -2.439450740814209, "loss": 2.1926, "nll_loss": 2.14674973487854, "rewards/accuracies": 1.0, "rewards/chosen": -0.19207260012626648, "rewards/margins": 0.05187246948480606, "rewards/rejected": -0.24394509196281433, "step": 46 }, { "epoch": 0.07407407407407407, "grad_norm": 0.6708806753158569, "learning_rate": 1.4488188976377953e-06, "log_odds_chosen": 0.2689046561717987, "log_odds_ratio": -0.578476071357727, "logits/chosen": -0.7736371159553528, "logits/rejected": -0.19334951043128967, "logps/chosen": -1.9722037315368652, "logps/rejected": -2.2119808197021484, "loss": 2.2323, "nll_loss": 2.1744863986968994, "rewards/accuracies": 0.875, "rewards/chosen": -0.19722038507461548, "rewards/margins": 0.023977704346179962, "rewards/rejected": -0.22119809687137604, "step": 47 }, { "epoch": 0.07565011820330969, "grad_norm": 0.7817642688751221, "learning_rate": 1.4803149606299211e-06, "log_odds_chosen": 0.413425475358963, "log_odds_ratio": -0.5176951289176941, "logits/chosen": -0.5976810455322266, "logits/rejected": -0.1483435034751892, "logps/chosen": -2.023184061050415, "logps/rejected": -2.3940744400024414, "loss": 2.2969, "nll_loss": 2.2451674938201904, "rewards/accuracies": 1.0, "rewards/chosen": -0.20231840014457703, "rewards/margins": 0.03708904981613159, "rewards/rejected": -0.23940744996070862, "step": 48 }, { "epoch": 0.07722616233254531, "grad_norm": 0.631397008895874, "learning_rate": 1.5118110236220472e-06, "log_odds_chosen": 0.4730543792247772, "log_odds_ratio": -0.49180155992507935, "logits/chosen": -0.622490406036377, "logits/rejected": -0.015296130441129208, "logps/chosen": -1.9089866876602173, "logps/rejected": -2.3237972259521484, "loss": 2.1401, "nll_loss": 2.0909266471862793, "rewards/accuracies": 1.0, "rewards/chosen": -0.19089870154857635, "rewards/margins": 0.04148102179169655, "rewards/rejected": -0.2323797196149826, "step": 49 }, { "epoch": 0.07880220646178093, "grad_norm": 0.6548082232475281, "learning_rate": 1.543307086614173e-06, "log_odds_chosen": 0.2945685088634491, "log_odds_ratio": -0.5625656843185425, "logits/chosen": -0.638104259967804, "logits/rejected": -0.22048690915107727, "logps/chosen": -1.9748042821884155, "logps/rejected": -2.235903024673462, "loss": 2.2419, "nll_loss": 2.185655355453491, "rewards/accuracies": 1.0, "rewards/chosen": -0.19748042523860931, "rewards/margins": 0.026109864935278893, "rewards/rejected": -0.22359029948711395, "step": 50 }, { "epoch": 0.08037825059101655, "grad_norm": 0.6355348825454712, "learning_rate": 1.5748031496062992e-06, "log_odds_chosen": 0.6154743432998657, "log_odds_ratio": -0.4387228488922119, "logits/chosen": -0.5254096984863281, "logits/rejected": -0.08727583289146423, "logps/chosen": -1.8662408590316772, "logps/rejected": -2.4071710109710693, "loss": 2.1184, "nll_loss": 2.0745527744293213, "rewards/accuracies": 1.0, "rewards/chosen": -0.18662410974502563, "rewards/margins": 0.054093025624752045, "rewards/rejected": -0.24071712791919708, "step": 51 }, { "epoch": 0.08195429472025216, "grad_norm": 0.6476663947105408, "learning_rate": 1.6062992125984253e-06, "log_odds_chosen": 0.5350648760795593, "log_odds_ratio": -0.46677446365356445, "logits/chosen": -0.4835931360721588, "logits/rejected": -0.016967706382274628, "logps/chosen": -2.0277915000915527, "logps/rejected": -2.507091522216797, "loss": 2.2718, "nll_loss": 2.2251675128936768, "rewards/accuracies": 1.0, "rewards/chosen": -0.202779158949852, "rewards/margins": 0.04792997986078262, "rewards/rejected": -0.2507091462612152, "step": 52 }, { "epoch": 0.08353033884948778, "grad_norm": 0.718711793422699, "learning_rate": 1.637795275590551e-06, "log_odds_chosen": 0.3033553957939148, "log_odds_ratio": -0.5565884709358215, "logits/chosen": -0.5977045297622681, "logits/rejected": -0.13718965649604797, "logps/chosen": -1.9643840789794922, "logps/rejected": -2.230160713195801, "loss": 2.2233, "nll_loss": 2.1675939559936523, "rewards/accuracies": 1.0, "rewards/chosen": -0.19643841683864594, "rewards/margins": 0.026577647775411606, "rewards/rejected": -0.22301605343818665, "step": 53 }, { "epoch": 0.0851063829787234, "grad_norm": 0.6878601908683777, "learning_rate": 1.6692913385826771e-06, "log_odds_chosen": 0.30879032611846924, "log_odds_ratio": -0.5532358288764954, "logits/chosen": -0.6751337051391602, "logits/rejected": -0.14412644505500793, "logps/chosen": -1.903916835784912, "logps/rejected": -2.1728036403656006, "loss": 2.1706, "nll_loss": 2.1152570247650146, "rewards/accuracies": 1.0, "rewards/chosen": -0.1903916895389557, "rewards/margins": 0.026888679713010788, "rewards/rejected": -0.2172803431749344, "step": 54 }, { "epoch": 0.08668242710795902, "grad_norm": 0.7093151211738586, "learning_rate": 1.7007874015748031e-06, "log_odds_chosen": 0.42612695693969727, "log_odds_ratio": -0.5059034824371338, "logits/chosen": -0.6053857803344727, "logits/rejected": -0.21064752340316772, "logps/chosen": -1.9867243766784668, "logps/rejected": -2.36510968208313, "loss": 2.2557, "nll_loss": 2.205127716064453, "rewards/accuracies": 1.0, "rewards/chosen": -0.19867242872714996, "rewards/margins": 0.037838518619537354, "rewards/rejected": -0.23651094734668732, "step": 55 }, { "epoch": 0.08825847123719464, "grad_norm": 0.8093518614768982, "learning_rate": 1.7322834645669292e-06, "log_odds_chosen": 0.352740615606308, "log_odds_ratio": -0.5422862768173218, "logits/chosen": -0.5116597414016724, "logits/rejected": -0.0041604433208703995, "logps/chosen": -2.0882434844970703, "logps/rejected": -2.4050841331481934, "loss": 2.3456, "nll_loss": 2.291337728500366, "rewards/accuracies": 0.875, "rewards/chosen": -0.20882436633110046, "rewards/margins": 0.03168405592441559, "rewards/rejected": -0.24050842225551605, "step": 56 }, { "epoch": 0.08983451536643026, "grad_norm": 0.6063317656517029, "learning_rate": 1.763779527559055e-06, "log_odds_chosen": 0.37979212403297424, "log_odds_ratio": -0.5244497656822205, "logits/chosen": -0.5742859840393066, "logits/rejected": -0.12732906639575958, "logps/chosen": -1.9634265899658203, "logps/rejected": -2.2987239360809326, "loss": 2.2111, "nll_loss": 2.158637285232544, "rewards/accuracies": 1.0, "rewards/chosen": -0.19634266197681427, "rewards/margins": 0.03352972865104675, "rewards/rejected": -0.22987240552902222, "step": 57 }, { "epoch": 0.09141055949566587, "grad_norm": 0.7049276828765869, "learning_rate": 1.795275590551181e-06, "log_odds_chosen": 0.38489818572998047, "log_odds_ratio": -0.5272811651229858, "logits/chosen": -0.603106677532196, "logits/rejected": -0.32046759128570557, "logps/chosen": -1.931801199913025, "logps/rejected": -2.274812936782837, "loss": 2.1909, "nll_loss": 2.138176918029785, "rewards/accuracies": 1.0, "rewards/chosen": -0.193180114030838, "rewards/margins": 0.034301191568374634, "rewards/rejected": -0.22748132050037384, "step": 58 }, { "epoch": 0.09298660362490149, "grad_norm": 0.7724094390869141, "learning_rate": 1.826771653543307e-06, "log_odds_chosen": 0.6565301418304443, "log_odds_ratio": -0.4238013029098511, "logits/chosen": -0.5493816137313843, "logits/rejected": -0.04481405392289162, "logps/chosen": -2.03456711769104, "logps/rejected": -2.6273677349090576, "loss": 2.2666, "nll_loss": 2.2242276668548584, "rewards/accuracies": 1.0, "rewards/chosen": -0.20345671474933624, "rewards/margins": 0.05928007513284683, "rewards/rejected": -0.2627367675304413, "step": 59 }, { "epoch": 0.09456264775413711, "grad_norm": 0.7019063830375671, "learning_rate": 1.858267716535433e-06, "log_odds_chosen": 0.48238804936408997, "log_odds_ratio": -0.4877777695655823, "logits/chosen": -0.6007660627365112, "logits/rejected": -0.25237271189689636, "logps/chosen": -1.9639641046524048, "logps/rejected": -2.3943371772766113, "loss": 2.223, "nll_loss": 2.174217462539673, "rewards/accuracies": 1.0, "rewards/chosen": -0.19639641046524048, "rewards/margins": 0.04303732514381409, "rewards/rejected": -0.23943373560905457, "step": 60 }, { "epoch": 0.09613869188337273, "grad_norm": 0.7680268883705139, "learning_rate": 1.889763779527559e-06, "log_odds_chosen": 0.25518718361854553, "log_odds_ratio": -0.5833750367164612, "logits/chosen": -0.5432047247886658, "logits/rejected": -0.21941693127155304, "logps/chosen": -2.100229501724243, "logps/rejected": -2.330411195755005, "loss": 2.3455, "nll_loss": 2.2871248722076416, "rewards/accuracies": 0.75, "rewards/chosen": -0.2100229412317276, "rewards/margins": 0.023018185049295425, "rewards/rejected": -0.23304113745689392, "step": 61 }, { "epoch": 0.09771473601260836, "grad_norm": 0.8415902853012085, "learning_rate": 1.9212598425196847e-06, "log_odds_chosen": 0.3082142472267151, "log_odds_ratio": -0.5612497329711914, "logits/chosen": -0.6062051653862, "logits/rejected": -0.11909964680671692, "logps/chosen": -2.0706098079681396, "logps/rejected": -2.3465754985809326, "loss": 2.3287, "nll_loss": 2.272575616836548, "rewards/accuracies": 0.75, "rewards/chosen": -0.20706097781658173, "rewards/margins": 0.02759658358991146, "rewards/rejected": -0.23465755581855774, "step": 62 }, { "epoch": 0.09929078014184398, "grad_norm": 0.6543618440628052, "learning_rate": 1.952755905511811e-06, "log_odds_chosen": 0.5749139785766602, "log_odds_ratio": -0.45239800214767456, "logits/chosen": -0.7079123258590698, "logits/rejected": -0.2599683403968811, "logps/chosen": -1.8045680522918701, "logps/rejected": -2.300361394882202, "loss": 2.0753, "nll_loss": 2.030048370361328, "rewards/accuracies": 1.0, "rewards/chosen": -0.18045681715011597, "rewards/margins": 0.04957934468984604, "rewards/rejected": -0.23003613948822021, "step": 63 }, { "epoch": 0.1008668242710796, "grad_norm": 0.6648881435394287, "learning_rate": 1.9842519685039368e-06, "log_odds_chosen": 0.6830120086669922, "log_odds_ratio": -0.4217277765274048, "logits/chosen": -0.6593011617660522, "logits/rejected": -0.08247893303632736, "logps/chosen": -1.8293054103851318, "logps/rejected": -2.4285809993743896, "loss": 2.0953, "nll_loss": 2.0531651973724365, "rewards/accuracies": 1.0, "rewards/chosen": -0.18293055891990662, "rewards/margins": 0.059927552938461304, "rewards/rejected": -0.24285811185836792, "step": 64 }, { "epoch": 0.1024428684003152, "grad_norm": 0.6505405902862549, "learning_rate": 2.015748031496063e-06, "log_odds_chosen": 0.6114057898521423, "log_odds_ratio": -0.4410432279109955, "logits/chosen": -0.6462138295173645, "logits/rejected": -0.3533778786659241, "logps/chosen": -1.9271063804626465, "logps/rejected": -2.47088360786438, "loss": 2.1845, "nll_loss": 2.1404013633728027, "rewards/accuracies": 1.0, "rewards/chosen": -0.19271063804626465, "rewards/margins": 0.0543777197599411, "rewards/rejected": -0.24708837270736694, "step": 65 }, { "epoch": 0.10401891252955082, "grad_norm": 0.6485214829444885, "learning_rate": 2.047244094488189e-06, "log_odds_chosen": 0.452458918094635, "log_odds_ratio": -0.49970927834510803, "logits/chosen": -0.6230807900428772, "logits/rejected": -0.13434045016765594, "logps/chosen": -2.0179097652435303, "logps/rejected": -2.419290781021118, "loss": 2.2838, "nll_loss": 2.233837842941284, "rewards/accuracies": 0.875, "rewards/chosen": -0.20179098844528198, "rewards/margins": 0.04013810679316521, "rewards/rejected": -0.2419290840625763, "step": 66 }, { "epoch": 0.10559495665878645, "grad_norm": 0.6492588520050049, "learning_rate": 2.0787401574803147e-06, "log_odds_chosen": 0.5289919972419739, "log_odds_ratio": -0.4675235152244568, "logits/chosen": -0.534376859664917, "logits/rejected": -0.03799459710717201, "logps/chosen": -1.955706238746643, "logps/rejected": -2.4241867065429688, "loss": 2.1946, "nll_loss": 2.1478805541992188, "rewards/accuracies": 1.0, "rewards/chosen": -0.19557063281536102, "rewards/margins": 0.04684804379940033, "rewards/rejected": -0.24241869151592255, "step": 67 }, { "epoch": 0.10717100078802207, "grad_norm": 0.774642288684845, "learning_rate": 2.1102362204724405e-06, "log_odds_chosen": 0.41134878993034363, "log_odds_ratio": -0.5170325040817261, "logits/chosen": -0.6746619939804077, "logits/rejected": -0.185140922665596, "logps/chosen": -1.947751760482788, "logps/rejected": -2.311985969543457, "loss": 2.2474, "nll_loss": 2.1957201957702637, "rewards/accuracies": 0.875, "rewards/chosen": -0.19477517902851105, "rewards/margins": 0.036423418670892715, "rewards/rejected": -0.23119859397411346, "step": 68 }, { "epoch": 0.10874704491725769, "grad_norm": 0.8252844214439392, "learning_rate": 2.141732283464567e-06, "log_odds_chosen": 0.22346967458724976, "log_odds_ratio": -0.5949736833572388, "logits/chosen": -0.41877222061157227, "logits/rejected": -0.11722514033317566, "logps/chosen": -2.0319464206695557, "logps/rejected": -2.2292232513427734, "loss": 2.3097, "nll_loss": 2.250192403793335, "rewards/accuracies": 0.875, "rewards/chosen": -0.20319463312625885, "rewards/margins": 0.019727692008018494, "rewards/rejected": -0.22292232513427734, "step": 69 }, { "epoch": 0.11032308904649331, "grad_norm": 0.6770060658454895, "learning_rate": 2.173228346456693e-06, "log_odds_chosen": 0.4207773804664612, "log_odds_ratio": -0.5108780264854431, "logits/chosen": -0.5316891670227051, "logits/rejected": -0.10831936448812485, "logps/chosen": -1.9700641632080078, "logps/rejected": -2.342879056930542, "loss": 2.2395, "nll_loss": 2.188405752182007, "rewards/accuracies": 1.0, "rewards/chosen": -0.19700641930103302, "rewards/margins": 0.03728148713707924, "rewards/rejected": -0.23428791761398315, "step": 70 }, { "epoch": 0.11189913317572892, "grad_norm": 0.8300355672836304, "learning_rate": 2.204724409448819e-06, "log_odds_chosen": 0.5063481330871582, "log_odds_ratio": -0.48265108466148376, "logits/chosen": -0.7789384126663208, "logits/rejected": -0.24630165100097656, "logps/chosen": -1.9212646484375, "logps/rejected": -2.370161771774292, "loss": 2.1808, "nll_loss": 2.1325840950012207, "rewards/accuracies": 1.0, "rewards/chosen": -0.19212648272514343, "rewards/margins": 0.04488971084356308, "rewards/rejected": -0.23701618611812592, "step": 71 }, { "epoch": 0.11347517730496454, "grad_norm": 0.6843920946121216, "learning_rate": 2.2362204724409446e-06, "log_odds_chosen": 0.32779812812805176, "log_odds_ratio": -0.5560404062271118, "logits/chosen": -0.5591788291931152, "logits/rejected": -0.06292789429426193, "logps/chosen": -1.936835527420044, "logps/rejected": -2.2265937328338623, "loss": 2.1968, "nll_loss": 2.1412172317504883, "rewards/accuracies": 0.875, "rewards/chosen": -0.19368356466293335, "rewards/margins": 0.02897578477859497, "rewards/rejected": -0.22265934944152832, "step": 72 }, { "epoch": 0.11505122143420016, "grad_norm": 0.6691780090332031, "learning_rate": 2.267716535433071e-06, "log_odds_chosen": 0.3424806594848633, "log_odds_ratio": -0.5391549468040466, "logits/chosen": -0.5084943175315857, "logits/rejected": -0.18799816071987152, "logps/chosen": -1.982399821281433, "logps/rejected": -2.2857072353363037, "loss": 2.2447, "nll_loss": 2.190774917602539, "rewards/accuracies": 1.0, "rewards/chosen": -0.1982399970293045, "rewards/margins": 0.0303307194262743, "rewards/rejected": -0.22857069969177246, "step": 73 }, { "epoch": 0.11662726556343578, "grad_norm": 0.7013347744941711, "learning_rate": 2.2992125984251967e-06, "log_odds_chosen": 0.42650213837623596, "log_odds_ratio": -0.5057096481323242, "logits/chosen": -0.6094076037406921, "logits/rejected": -0.11771736294031143, "logps/chosen": -1.8428946733474731, "logps/rejected": -2.2101123332977295, "loss": 2.1276, "nll_loss": 2.0770435333251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.18428948521614075, "rewards/margins": 0.03672178089618683, "rewards/rejected": -0.22101125121116638, "step": 74 }, { "epoch": 0.1182033096926714, "grad_norm": 0.6637840867042542, "learning_rate": 2.3307086614173225e-06, "log_odds_chosen": 0.5096178650856018, "log_odds_ratio": -0.476134717464447, "logits/chosen": -0.5619401335716248, "logits/rejected": -0.2677369713783264, "logps/chosen": -1.8805265426635742, "logps/rejected": -2.32961106300354, "loss": 2.1385, "nll_loss": 2.0909037590026855, "rewards/accuracies": 1.0, "rewards/chosen": -0.18805265426635742, "rewards/margins": 0.044908471405506134, "rewards/rejected": -0.23296113312244415, "step": 75 }, { "epoch": 0.11977935382190702, "grad_norm": 0.6581621170043945, "learning_rate": 2.3622047244094483e-06, "log_odds_chosen": 0.5206456184387207, "log_odds_ratio": -0.47490978240966797, "logits/chosen": -0.5504530072212219, "logits/rejected": -0.03281405568122864, "logps/chosen": -1.9286243915557861, "logps/rejected": -2.3910675048828125, "loss": 2.1717, "nll_loss": 2.124224901199341, "rewards/accuracies": 1.0, "rewards/chosen": -0.19286245107650757, "rewards/margins": 0.04624428227543831, "rewards/rejected": -0.23910671472549438, "step": 76 }, { "epoch": 0.12135539795114263, "grad_norm": 0.7063953876495361, "learning_rate": 2.393700787401575e-06, "log_odds_chosen": 0.4808962941169739, "log_odds_ratio": -0.49262717366218567, "logits/chosen": -0.5677655339241028, "logits/rejected": -0.21073025465011597, "logps/chosen": -1.9511951208114624, "logps/rejected": -2.377624988555908, "loss": 2.2192, "nll_loss": 2.169985771179199, "rewards/accuracies": 1.0, "rewards/chosen": -0.19511950016021729, "rewards/margins": 0.042642995715141296, "rewards/rejected": -0.23776251077651978, "step": 77 }, { "epoch": 0.12293144208037825, "grad_norm": 0.5751843452453613, "learning_rate": 2.425196850393701e-06, "log_odds_chosen": 0.3792150020599365, "log_odds_ratio": -0.5299723744392395, "logits/chosen": -0.37227052450180054, "logits/rejected": -0.4450679123401642, "logps/chosen": -2.0182228088378906, "logps/rejected": -2.355861186981201, "loss": 2.2537, "nll_loss": 2.200679302215576, "rewards/accuracies": 0.875, "rewards/chosen": -0.20182228088378906, "rewards/margins": 0.03376384079456329, "rewards/rejected": -0.23558615148067474, "step": 78 }, { "epoch": 0.12450748620961387, "grad_norm": 0.6594578623771667, "learning_rate": 2.4566929133858266e-06, "log_odds_chosen": 0.518637478351593, "log_odds_ratio": -0.47430098056793213, "logits/chosen": -0.4779280126094818, "logits/rejected": -0.2910279631614685, "logps/chosen": -1.9429047107696533, "logps/rejected": -2.402376174926758, "loss": 2.1903, "nll_loss": 2.1428279876708984, "rewards/accuracies": 1.0, "rewards/chosen": -0.19429044425487518, "rewards/margins": 0.04594714939594269, "rewards/rejected": -0.24023759365081787, "step": 79 }, { "epoch": 0.12608353033884948, "grad_norm": 0.6245352625846863, "learning_rate": 2.488188976377953e-06, "log_odds_chosen": 0.5585002303123474, "log_odds_ratio": -0.45803701877593994, "logits/chosen": -0.5769734978675842, "logits/rejected": -0.25027596950531006, "logps/chosen": -1.8669335842132568, "logps/rejected": -2.356663227081299, "loss": 2.1157, "nll_loss": 2.06986403465271, "rewards/accuracies": 1.0, "rewards/chosen": -0.18669337034225464, "rewards/margins": 0.048972949385643005, "rewards/rejected": -0.23566631972789764, "step": 80 }, { "epoch": 0.1276595744680851, "grad_norm": 0.5566908717155457, "learning_rate": 2.5196850393700787e-06, "log_odds_chosen": 0.3883778750896454, "log_odds_ratio": -0.5254943370819092, "logits/chosen": -0.42693546414375305, "logits/rejected": -0.2633028030395508, "logps/chosen": -1.93135666847229, "logps/rejected": -2.2755541801452637, "loss": 2.17, "nll_loss": 2.1174182891845703, "rewards/accuracies": 1.0, "rewards/chosen": -0.19313567876815796, "rewards/margins": 0.034419745206832886, "rewards/rejected": -0.22755542397499084, "step": 81 }, { "epoch": 0.12923561859732072, "grad_norm": 0.5781261324882507, "learning_rate": 2.5511811023622045e-06, "log_odds_chosen": 0.524163544178009, "log_odds_ratio": -0.4806976020336151, "logits/chosen": -0.4674437940120697, "logits/rejected": -0.23945724964141846, "logps/chosen": -1.8828051090240479, "logps/rejected": -2.3516387939453125, "loss": 2.1336, "nll_loss": 2.085569381713867, "rewards/accuracies": 0.875, "rewards/chosen": -0.18828049302101135, "rewards/margins": 0.046883389353752136, "rewards/rejected": -0.23516389727592468, "step": 82 }, { "epoch": 0.13081166272655634, "grad_norm": 0.6088637709617615, "learning_rate": 2.5826771653543303e-06, "log_odds_chosen": 0.36324411630630493, "log_odds_ratio": -0.5395435690879822, "logits/chosen": -0.39306196570396423, "logits/rejected": -0.1700359582901001, "logps/chosen": -1.9157882928848267, "logps/rejected": -2.237780809402466, "loss": 2.1604, "nll_loss": 2.106419563293457, "rewards/accuracies": 1.0, "rewards/chosen": -0.19157883524894714, "rewards/margins": 0.032199256122112274, "rewards/rejected": -0.22377808392047882, "step": 83 }, { "epoch": 0.13238770685579196, "grad_norm": 0.6803274750709534, "learning_rate": 2.6141732283464566e-06, "log_odds_chosen": 0.5524423122406006, "log_odds_ratio": -0.46847474575042725, "logits/chosen": -0.5545064210891724, "logits/rejected": -0.24267421662807465, "logps/chosen": -1.8639458417892456, "logps/rejected": -2.3503522872924805, "loss": 2.1458, "nll_loss": 2.0989749431610107, "rewards/accuracies": 1.0, "rewards/chosen": -0.1863945871591568, "rewards/margins": 0.048640646040439606, "rewards/rejected": -0.235035240650177, "step": 84 }, { "epoch": 0.13396375098502758, "grad_norm": 0.5811371207237244, "learning_rate": 2.645669291338583e-06, "log_odds_chosen": 0.5210408568382263, "log_odds_ratio": -0.46715638041496277, "logits/chosen": -0.3822883367538452, "logits/rejected": -0.1786729395389557, "logps/chosen": -1.9316679239273071, "logps/rejected": -2.3919014930725098, "loss": 2.1698, "nll_loss": 2.1230902671813965, "rewards/accuracies": 1.0, "rewards/chosen": -0.1931667923927307, "rewards/margins": 0.04602333903312683, "rewards/rejected": -0.23919013142585754, "step": 85 }, { "epoch": 0.1355397951142632, "grad_norm": 0.6602110266685486, "learning_rate": 2.6771653543307086e-06, "log_odds_chosen": 0.48973286151885986, "log_odds_ratio": -0.48310309648513794, "logits/chosen": -0.5846769213676453, "logits/rejected": -0.20886988937854767, "logps/chosen": -1.9184874296188354, "logps/rejected": -2.349517822265625, "loss": 2.1703, "nll_loss": 2.1220102310180664, "rewards/accuracies": 1.0, "rewards/chosen": -0.1918487399816513, "rewards/margins": 0.043103061616420746, "rewards/rejected": -0.23495177924633026, "step": 86 }, { "epoch": 0.13711583924349882, "grad_norm": 0.5744991302490234, "learning_rate": 2.7086614173228344e-06, "log_odds_chosen": 0.3423335552215576, "log_odds_ratio": -0.5400257110595703, "logits/chosen": -0.37106236815452576, "logits/rejected": -0.360477089881897, "logps/chosen": -1.922955870628357, "logps/rejected": -2.2213680744171143, "loss": 2.1639, "nll_loss": 2.109863042831421, "rewards/accuracies": 1.0, "rewards/chosen": -0.19229556620121002, "rewards/margins": 0.02984124794602394, "rewards/rejected": -0.22213682532310486, "step": 87 }, { "epoch": 0.13869188337273444, "grad_norm": 0.6963973045349121, "learning_rate": 2.7401574803149607e-06, "log_odds_chosen": 0.5418673753738403, "log_odds_ratio": -0.475824773311615, "logits/chosen": -0.6239266991615295, "logits/rejected": -0.2058069109916687, "logps/chosen": -1.848305583000183, "logps/rejected": -2.326037645339966, "loss": 2.1293, "nll_loss": 2.0817408561706543, "rewards/accuracies": 0.875, "rewards/chosen": -0.18483057618141174, "rewards/margins": 0.04777318611741066, "rewards/rejected": -0.2326037585735321, "step": 88 }, { "epoch": 0.14026792750197006, "grad_norm": 0.6552391648292542, "learning_rate": 2.7716535433070865e-06, "log_odds_chosen": 0.500076174736023, "log_odds_ratio": -0.4754161834716797, "logits/chosen": -0.44256362318992615, "logits/rejected": -0.20719635486602783, "logps/chosen": -1.9562758207321167, "logps/rejected": -2.3987882137298584, "loss": 2.2135, "nll_loss": 2.165970802307129, "rewards/accuracies": 1.0, "rewards/chosen": -0.1956275850534439, "rewards/margins": 0.04425125569105148, "rewards/rejected": -0.2398788183927536, "step": 89 }, { "epoch": 0.14184397163120568, "grad_norm": 0.6141228079795837, "learning_rate": 2.8031496062992123e-06, "log_odds_chosen": 0.428195595741272, "log_odds_ratio": -0.5067068934440613, "logits/chosen": -0.3649379312992096, "logits/rejected": -0.1602931022644043, "logps/chosen": -2.002263307571411, "logps/rejected": -2.3845441341400146, "loss": 2.2267, "nll_loss": 2.1760218143463135, "rewards/accuracies": 1.0, "rewards/chosen": -0.20022635161876678, "rewards/margins": 0.038228072226047516, "rewards/rejected": -0.2384544163942337, "step": 90 }, { "epoch": 0.1434200157604413, "grad_norm": 0.5524324774742126, "learning_rate": 2.834645669291338e-06, "log_odds_chosen": 0.4524117112159729, "log_odds_ratio": -0.49862101674079895, "logits/chosen": -0.36407744884490967, "logits/rejected": -0.2811731696128845, "logps/chosen": -1.8061617612838745, "logps/rejected": -2.198903799057007, "loss": 2.0471, "nll_loss": 1.997222900390625, "rewards/accuracies": 1.0, "rewards/chosen": -0.18061619997024536, "rewards/margins": 0.0392741933465004, "rewards/rejected": -0.21989038586616516, "step": 91 }, { "epoch": 0.1449960598896769, "grad_norm": 0.5921797752380371, "learning_rate": 2.8661417322834644e-06, "log_odds_chosen": 0.5024532675743103, "log_odds_ratio": -0.47816595435142517, "logits/chosen": -0.44370609521865845, "logits/rejected": -0.18724730610847473, "logps/chosen": -1.829555869102478, "logps/rejected": -2.2681233882904053, "loss": 2.0879, "nll_loss": 2.040083169937134, "rewards/accuracies": 1.0, "rewards/chosen": -0.18295560777187347, "rewards/margins": 0.04385674372315407, "rewards/rejected": -0.22681234776973724, "step": 92 }, { "epoch": 0.14657210401891252, "grad_norm": 0.628095805644989, "learning_rate": 2.8976377952755906e-06, "log_odds_chosen": 0.43953385949134827, "log_odds_ratio": -0.5222434401512146, "logits/chosen": -0.4026286005973816, "logits/rejected": -0.23625504970550537, "logps/chosen": -1.9340780973434448, "logps/rejected": -2.3274731636047363, "loss": 2.18, "nll_loss": 2.127760410308838, "rewards/accuracies": 0.75, "rewards/chosen": -0.19340780377388, "rewards/margins": 0.03933952748775482, "rewards/rejected": -0.23274733126163483, "step": 93 }, { "epoch": 0.14814814814814814, "grad_norm": 0.5658537149429321, "learning_rate": 2.9291338582677165e-06, "log_odds_chosen": 0.45617368817329407, "log_odds_ratio": -0.4992842674255371, "logits/chosen": -0.36675694584846497, "logits/rejected": -0.3160392642021179, "logps/chosen": -1.9161105155944824, "logps/rejected": -2.315382480621338, "loss": 2.1513, "nll_loss": 2.1013565063476562, "rewards/accuracies": 0.875, "rewards/chosen": -0.19161105155944824, "rewards/margins": 0.03992719575762749, "rewards/rejected": -0.23153826594352722, "step": 94 }, { "epoch": 0.14972419227738376, "grad_norm": 0.639642596244812, "learning_rate": 2.9606299212598423e-06, "log_odds_chosen": 0.5549046993255615, "log_odds_ratio": -0.463879257440567, "logits/chosen": -0.43291109800338745, "logits/rejected": -0.15716485679149628, "logps/chosen": -1.9124903678894043, "logps/rejected": -2.4041366577148438, "loss": 2.1648, "nll_loss": 2.1183886528015137, "rewards/accuracies": 1.0, "rewards/chosen": -0.1912490427494049, "rewards/margins": 0.049164604395627975, "rewards/rejected": -0.240413635969162, "step": 95 }, { "epoch": 0.15130023640661938, "grad_norm": 0.569778323173523, "learning_rate": 2.9921259842519685e-06, "log_odds_chosen": 0.5220546722412109, "log_odds_ratio": -0.46969637274742126, "logits/chosen": -0.42379647493362427, "logits/rejected": -0.3440685570240021, "logps/chosen": -1.9186266660690308, "logps/rejected": -2.3788628578186035, "loss": 2.141, "nll_loss": 2.0940771102905273, "rewards/accuracies": 1.0, "rewards/chosen": -0.19186267256736755, "rewards/margins": 0.04602360725402832, "rewards/rejected": -0.23788626492023468, "step": 96 }, { "epoch": 0.152876280535855, "grad_norm": 0.56700599193573, "learning_rate": 3.0236220472440943e-06, "log_odds_chosen": 0.350454717874527, "log_odds_ratio": -0.5345020294189453, "logits/chosen": -0.47989675402641296, "logits/rejected": -0.17767037451267242, "logps/chosen": -1.8971889019012451, "logps/rejected": -2.200305461883545, "loss": 2.1227, "nll_loss": 2.069289445877075, "rewards/accuracies": 1.0, "rewards/chosen": -0.18971890211105347, "rewards/margins": 0.03031165711581707, "rewards/rejected": -0.22003056108951569, "step": 97 }, { "epoch": 0.15445232466509062, "grad_norm": 0.5734896063804626, "learning_rate": 3.05511811023622e-06, "log_odds_chosen": 0.4143810570240021, "log_odds_ratio": -0.512945294380188, "logits/chosen": -0.31117746233940125, "logits/rejected": -0.24713104963302612, "logps/chosen": -2.0033698081970215, "logps/rejected": -2.3717055320739746, "loss": 2.2459, "nll_loss": 2.1946518421173096, "rewards/accuracies": 1.0, "rewards/chosen": -0.2003369927406311, "rewards/margins": 0.03683357313275337, "rewards/rejected": -0.23717054724693298, "step": 98 }, { "epoch": 0.15602836879432624, "grad_norm": 0.6178714036941528, "learning_rate": 3.086614173228346e-06, "log_odds_chosen": 0.24748294055461884, "log_odds_ratio": -0.5794985890388489, "logits/chosen": -0.5359123945236206, "logits/rejected": -0.3172164559364319, "logps/chosen": -1.911259651184082, "logps/rejected": -2.12546443939209, "loss": 2.1682, "nll_loss": 2.1102778911590576, "rewards/accuracies": 1.0, "rewards/chosen": -0.19112597405910492, "rewards/margins": 0.021420463919639587, "rewards/rejected": -0.2125464230775833, "step": 99 }, { "epoch": 0.15760441292356187, "grad_norm": 0.590636670589447, "learning_rate": 3.1181102362204722e-06, "log_odds_chosen": 0.36373111605644226, "log_odds_ratio": -0.533814549446106, "logits/chosen": -0.4497278034687042, "logits/rejected": -0.2689790427684784, "logps/chosen": -1.8333114385604858, "logps/rejected": -2.1512179374694824, "loss": 2.0894, "nll_loss": 2.036029815673828, "rewards/accuracies": 1.0, "rewards/chosen": -0.18333116173744202, "rewards/margins": 0.031790636479854584, "rewards/rejected": -0.2151218056678772, "step": 100 }, { "epoch": 0.15918045705279749, "grad_norm": 0.5497896075248718, "learning_rate": 3.1496062992125985e-06, "log_odds_chosen": 0.42619574069976807, "log_odds_ratio": -0.5083091855049133, "logits/chosen": -0.3605027198791504, "logits/rejected": -0.24103917181491852, "logps/chosen": -1.8285539150238037, "logps/rejected": -2.200834035873413, "loss": 2.0895, "nll_loss": 2.038670301437378, "rewards/accuracies": 1.0, "rewards/chosen": -0.18285538256168365, "rewards/margins": 0.03722800686955452, "rewards/rejected": -0.22008340060710907, "step": 101 }, { "epoch": 0.1607565011820331, "grad_norm": 0.5178012251853943, "learning_rate": 3.1811023622047243e-06, "log_odds_chosen": 0.43138545751571655, "log_odds_ratio": -0.5122407078742981, "logits/chosen": -0.3726266324520111, "logits/rejected": -0.37165510654449463, "logps/chosen": -1.7784022092819214, "logps/rejected": -2.1532044410705566, "loss": 2.0323, "nll_loss": 1.9810512065887451, "rewards/accuracies": 1.0, "rewards/chosen": -0.1778402179479599, "rewards/margins": 0.03748023882508278, "rewards/rejected": -0.21532045304775238, "step": 102 }, { "epoch": 0.16233254531126873, "grad_norm": 0.5599470734596252, "learning_rate": 3.2125984251968505e-06, "log_odds_chosen": 0.4818297326564789, "log_odds_ratio": -0.49377134442329407, "logits/chosen": -0.2876685857772827, "logits/rejected": -0.038474664092063904, "logps/chosen": -1.7715296745300293, "logps/rejected": -2.1862025260925293, "loss": 2.0313, "nll_loss": 1.9819062948226929, "rewards/accuracies": 0.875, "rewards/chosen": -0.17715296149253845, "rewards/margins": 0.04146728664636612, "rewards/rejected": -0.21862025558948517, "step": 103 }, { "epoch": 0.16390858944050432, "grad_norm": 0.5964917540550232, "learning_rate": 3.2440944881889763e-06, "log_odds_chosen": 0.4325031638145447, "log_odds_ratio": -0.5093069672584534, "logits/chosen": -0.23736125230789185, "logits/rejected": -0.06159596145153046, "logps/chosen": -1.9128376245498657, "logps/rejected": -2.294654369354248, "loss": 2.1641, "nll_loss": 2.113180637359619, "rewards/accuracies": 1.0, "rewards/chosen": -0.19128376245498657, "rewards/margins": 0.03818168863654137, "rewards/rejected": -0.22946545481681824, "step": 104 }, { "epoch": 0.16548463356973994, "grad_norm": 0.4927206039428711, "learning_rate": 3.275590551181102e-06, "log_odds_chosen": 0.5299558043479919, "log_odds_ratio": -0.4695979058742523, "logits/chosen": -0.377352774143219, "logits/rejected": -0.17640215158462524, "logps/chosen": -1.7906301021575928, "logps/rejected": -2.250798225402832, "loss": 2.0519, "nll_loss": 2.0049400329589844, "rewards/accuracies": 1.0, "rewards/chosen": -0.17906302213668823, "rewards/margins": 0.04601679742336273, "rewards/rejected": -0.22507980465888977, "step": 105 }, { "epoch": 0.16706067769897556, "grad_norm": 0.5032868385314941, "learning_rate": 3.307086614173228e-06, "log_odds_chosen": 0.4509121775627136, "log_odds_ratio": -0.5058165788650513, "logits/chosen": -0.35679227113723755, "logits/rejected": -0.1107356995344162, "logps/chosen": -1.8084826469421387, "logps/rejected": -2.190898895263672, "loss": 2.0546, "nll_loss": 2.003988742828369, "rewards/accuracies": 0.875, "rewards/chosen": -0.18084825575351715, "rewards/margins": 0.03824164718389511, "rewards/rejected": -0.21908989548683167, "step": 106 }, { "epoch": 0.16863672182821118, "grad_norm": 0.5300337672233582, "learning_rate": 3.3385826771653542e-06, "log_odds_chosen": 0.43685051798820496, "log_odds_ratio": -0.5093865394592285, "logits/chosen": -0.2651398479938507, "logits/rejected": -0.28625091910362244, "logps/chosen": -1.8772473335266113, "logps/rejected": -2.2612218856811523, "loss": 2.118, "nll_loss": 2.0670440196990967, "rewards/accuracies": 0.875, "rewards/chosen": -0.1877247393131256, "rewards/margins": 0.03839743137359619, "rewards/rejected": -0.226122185587883, "step": 107 }, { "epoch": 0.1702127659574468, "grad_norm": 0.49163365364074707, "learning_rate": 3.37007874015748e-06, "log_odds_chosen": 0.3183455765247345, "log_odds_ratio": -0.5563209056854248, "logits/chosen": -0.23882922530174255, "logits/rejected": -0.3078491985797882, "logps/chosen": -1.85860013961792, "logps/rejected": -2.1333258152008057, "loss": 2.102, "nll_loss": 2.0463998317718506, "rewards/accuracies": 0.875, "rewards/chosen": -0.1858600229024887, "rewards/margins": 0.02747257985174656, "rewards/rejected": -0.21333259344100952, "step": 108 }, { "epoch": 0.17178881008668243, "grad_norm": 0.5118012428283691, "learning_rate": 3.4015748031496063e-06, "log_odds_chosen": 0.4563853442668915, "log_odds_ratio": -0.49264228343963623, "logits/chosen": -0.16680516302585602, "logits/rejected": -0.2830784320831299, "logps/chosen": -1.9145252704620361, "logps/rejected": -2.3134219646453857, "loss": 2.1528, "nll_loss": 2.1035213470458984, "rewards/accuracies": 1.0, "rewards/chosen": -0.19145254790782928, "rewards/margins": 0.03988967090845108, "rewards/rejected": -0.23134221136569977, "step": 109 }, { "epoch": 0.17336485421591805, "grad_norm": 0.505682110786438, "learning_rate": 3.433070866141732e-06, "log_odds_chosen": 0.49830734729766846, "log_odds_ratio": -0.48064208030700684, "logits/chosen": -0.27441835403442383, "logits/rejected": -0.24226327240467072, "logps/chosen": -1.8005558252334595, "logps/rejected": -2.2267792224884033, "loss": 2.0182, "nll_loss": 1.9701097011566162, "rewards/accuracies": 1.0, "rewards/chosen": -0.18005558848381042, "rewards/margins": 0.04262235015630722, "rewards/rejected": -0.22267794609069824, "step": 110 }, { "epoch": 0.17494089834515367, "grad_norm": 0.5464332103729248, "learning_rate": 3.4645669291338583e-06, "log_odds_chosen": 0.5071738362312317, "log_odds_ratio": -0.4801686406135559, "logits/chosen": -0.23766781389713287, "logits/rejected": -0.30691930651664734, "logps/chosen": -1.902522087097168, "logps/rejected": -2.3450286388397217, "loss": 2.1483, "nll_loss": 2.10026478767395, "rewards/accuracies": 1.0, "rewards/chosen": -0.19025221467018127, "rewards/margins": 0.044250644743442535, "rewards/rejected": -0.2345028668642044, "step": 111 }, { "epoch": 0.1765169424743893, "grad_norm": 0.5223987102508545, "learning_rate": 3.496062992125984e-06, "log_odds_chosen": 0.3665584325790405, "log_odds_ratio": -0.5315839052200317, "logits/chosen": -0.20885254442691803, "logits/rejected": -0.12856575846672058, "logps/chosen": -1.9509872198104858, "logps/rejected": -2.271317481994629, "loss": 2.1848, "nll_loss": 2.131622314453125, "rewards/accuracies": 1.0, "rewards/chosen": -0.19509869813919067, "rewards/margins": 0.0320330373942852, "rewards/rejected": -0.22713173925876617, "step": 112 }, { "epoch": 0.1780929866036249, "grad_norm": 0.5140707492828369, "learning_rate": 3.52755905511811e-06, "log_odds_chosen": 0.38427114486694336, "log_odds_ratio": -0.526176929473877, "logits/chosen": -0.3687783181667328, "logits/rejected": -0.2154863178730011, "logps/chosen": -1.7950341701507568, "logps/rejected": -2.127782106399536, "loss": 2.0463, "nll_loss": 1.9936522245407104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1795034110546112, "rewards/margins": 0.03327479213476181, "rewards/rejected": -0.21277819573879242, "step": 113 }, { "epoch": 0.17966903073286053, "grad_norm": 0.48041167855262756, "learning_rate": 3.559055118110236e-06, "log_odds_chosen": 0.2929834723472595, "log_odds_ratio": -0.5727202892303467, "logits/chosen": -0.11503149569034576, "logits/rejected": -0.22482499480247498, "logps/chosen": -1.921325922012329, "logps/rejected": -2.181408405303955, "loss": 2.167, "nll_loss": 2.1096832752227783, "rewards/accuracies": 0.875, "rewards/chosen": -0.1921325922012329, "rewards/margins": 0.026008253917098045, "rewards/rejected": -0.2181408405303955, "step": 114 }, { "epoch": 0.18124507486209615, "grad_norm": 0.483804315328598, "learning_rate": 3.590551181102362e-06, "log_odds_chosen": 0.46048107743263245, "log_odds_ratio": -0.4994431436061859, "logits/chosen": -0.21135865151882172, "logits/rejected": -0.4032437205314636, "logps/chosen": -1.8342138528823853, "logps/rejected": -2.2355825901031494, "loss": 2.0671, "nll_loss": 2.017176389694214, "rewards/accuracies": 0.875, "rewards/chosen": -0.18342137336730957, "rewards/margins": 0.040136873722076416, "rewards/rejected": -0.2235582321882248, "step": 115 }, { "epoch": 0.18282111899133174, "grad_norm": 0.4813881814479828, "learning_rate": 3.622047244094488e-06, "log_odds_chosen": 0.3711977005004883, "log_odds_ratio": -0.5412157773971558, "logits/chosen": -0.22255532443523407, "logits/rejected": -0.2175833135843277, "logps/chosen": -1.8901299238204956, "logps/rejected": -2.2157669067382812, "loss": 2.1143, "nll_loss": 2.060180187225342, "rewards/accuracies": 0.875, "rewards/chosen": -0.18901298940181732, "rewards/margins": 0.032563693821430206, "rewards/rejected": -0.22157667577266693, "step": 116 }, { "epoch": 0.18439716312056736, "grad_norm": 0.4870263934135437, "learning_rate": 3.653543307086614e-06, "log_odds_chosen": 0.526931881904602, "log_odds_ratio": -0.47742602229118347, "logits/chosen": -0.2706168293952942, "logits/rejected": -0.5030975341796875, "logps/chosen": -1.8207095861434937, "logps/rejected": -2.2792084217071533, "loss": 2.0652, "nll_loss": 2.0174200534820557, "rewards/accuracies": 0.875, "rewards/chosen": -0.18207095563411713, "rewards/margins": 0.045849889516830444, "rewards/rejected": -0.22792083024978638, "step": 117 }, { "epoch": 0.18597320724980299, "grad_norm": 0.47839877009391785, "learning_rate": 3.68503937007874e-06, "log_odds_chosen": 0.30926501750946045, "log_odds_ratio": -0.5605666637420654, "logits/chosen": -0.08142746239900589, "logits/rejected": -0.2830328047275543, "logps/chosen": -1.83827805519104, "logps/rejected": -2.1060383319854736, "loss": 2.0833, "nll_loss": 2.0272810459136963, "rewards/accuracies": 0.875, "rewards/chosen": -0.18382780253887177, "rewards/margins": 0.026776034384965897, "rewards/rejected": -0.21060383319854736, "step": 118 }, { "epoch": 0.1875492513790386, "grad_norm": 0.4611856937408447, "learning_rate": 3.716535433070866e-06, "log_odds_chosen": 0.30054840445518494, "log_odds_ratio": -0.5679630041122437, "logits/chosen": -0.13648174703121185, "logits/rejected": -0.2771826982498169, "logps/chosen": -1.8114385604858398, "logps/rejected": -2.073773145675659, "loss": 2.0738, "nll_loss": 2.016970157623291, "rewards/accuracies": 0.875, "rewards/chosen": -0.1811438798904419, "rewards/margins": 0.02623344026505947, "rewards/rejected": -0.20737731456756592, "step": 119 }, { "epoch": 0.18912529550827423, "grad_norm": 0.4521031677722931, "learning_rate": 3.748031496062992e-06, "log_odds_chosen": 0.6006217002868652, "log_odds_ratio": -0.44487234950065613, "logits/chosen": -0.18391111493110657, "logits/rejected": -0.23368988931179047, "logps/chosen": -1.713165283203125, "logps/rejected": -2.2281906604766846, "loss": 1.9444, "nll_loss": 1.8999552726745605, "rewards/accuracies": 1.0, "rewards/chosen": -0.17131653428077698, "rewards/margins": 0.05150254815816879, "rewards/rejected": -0.22281907498836517, "step": 120 }, { "epoch": 0.19070133963750985, "grad_norm": 0.4375765323638916, "learning_rate": 3.779527559055118e-06, "log_odds_chosen": 0.607134222984314, "log_odds_ratio": -0.4383259415626526, "logits/chosen": -0.2432423233985901, "logits/rejected": -0.42568087577819824, "logps/chosen": -1.8271077871322632, "logps/rejected": -2.3578097820281982, "loss": 2.0471, "nll_loss": 2.0032753944396973, "rewards/accuracies": 1.0, "rewards/chosen": -0.18271078169345856, "rewards/margins": 0.05307020992040634, "rewards/rejected": -0.2357809841632843, "step": 121 }, { "epoch": 0.19227738376674547, "grad_norm": 0.4764109253883362, "learning_rate": 3.8110236220472436e-06, "log_odds_chosen": 0.5355339050292969, "log_odds_ratio": -0.47745591402053833, "logits/chosen": -0.30387943983078003, "logits/rejected": -0.23297454416751862, "logps/chosen": -1.7462990283966064, "logps/rejected": -2.2124016284942627, "loss": 1.9695, "nll_loss": 1.9217469692230225, "rewards/accuracies": 0.875, "rewards/chosen": -0.17462992668151855, "rewards/margins": 0.04661024361848831, "rewards/rejected": -0.22124016284942627, "step": 122 }, { "epoch": 0.1938534278959811, "grad_norm": 0.4778152108192444, "learning_rate": 3.8425196850393695e-06, "log_odds_chosen": 0.39439237117767334, "log_odds_ratio": -0.5168735384941101, "logits/chosen": -0.06380043923854828, "logits/rejected": -0.3037663400173187, "logps/chosen": -1.8502298593521118, "logps/rejected": -2.1919362545013428, "loss": 2.0886, "nll_loss": 2.036864757537842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1850229799747467, "rewards/margins": 0.03417064994573593, "rewards/rejected": -0.21919363737106323, "step": 123 }, { "epoch": 0.1954294720252167, "grad_norm": 0.42974886298179626, "learning_rate": 3.874015748031496e-06, "log_odds_chosen": 0.4634344279766083, "log_odds_ratio": -0.5001177787780762, "logits/chosen": -0.25694918632507324, "logits/rejected": -0.22004857659339905, "logps/chosen": -1.7307007312774658, "logps/rejected": -2.1188673973083496, "loss": 1.9529, "nll_loss": 1.9028429985046387, "rewards/accuracies": 1.0, "rewards/chosen": -0.17307007312774658, "rewards/margins": 0.03881664574146271, "rewards/rejected": -0.2118867039680481, "step": 124 }, { "epoch": 0.19700551615445233, "grad_norm": 0.4418664276599884, "learning_rate": 3.905511811023622e-06, "log_odds_chosen": 0.4357145130634308, "log_odds_ratio": -0.505855143070221, "logits/chosen": -0.15186086297035217, "logits/rejected": -0.4162241220474243, "logps/chosen": -1.7670007944107056, "logps/rejected": -2.140122890472412, "loss": 2.0107, "nll_loss": 1.9601045846939087, "rewards/accuracies": 1.0, "rewards/chosen": -0.17670010030269623, "rewards/margins": 0.03731219470500946, "rewards/rejected": -0.2140122801065445, "step": 125 }, { "epoch": 0.19858156028368795, "grad_norm": 0.47172582149505615, "learning_rate": 3.937007874015748e-06, "log_odds_chosen": 0.6300610899925232, "log_odds_ratio": -0.4334939122200012, "logits/chosen": -0.1414909064769745, "logits/rejected": -0.23909084498882294, "logps/chosen": -1.773337483406067, "logps/rejected": -2.3242154121398926, "loss": 1.998, "nll_loss": 1.9546327590942383, "rewards/accuracies": 1.0, "rewards/chosen": -0.1773337423801422, "rewards/margins": 0.05508778989315033, "rewards/rejected": -0.23242153227329254, "step": 126 }, { "epoch": 0.20015760441292357, "grad_norm": 0.4991217255592346, "learning_rate": 3.9685039370078736e-06, "log_odds_chosen": 0.4967314600944519, "log_odds_ratio": -0.4841119050979614, "logits/chosen": -0.11699728667736053, "logits/rejected": -0.20840948820114136, "logps/chosen": -1.8117221593856812, "logps/rejected": -2.245494842529297, "loss": 2.0711, "nll_loss": 2.0227043628692627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1811722218990326, "rewards/margins": 0.04337725043296814, "rewards/rejected": -0.22454948723316193, "step": 127 }, { "epoch": 0.2017336485421592, "grad_norm": 0.45142993330955505, "learning_rate": 4e-06, "log_odds_chosen": 0.3186233639717102, "log_odds_ratio": -0.5520759224891663, "logits/chosen": -0.007777207065373659, "logits/rejected": -0.0923052504658699, "logps/chosen": -1.8868976831436157, "logps/rejected": -2.1636693477630615, "loss": 2.1041, "nll_loss": 2.0488626956939697, "rewards/accuracies": 0.75, "rewards/chosen": -0.18868978321552277, "rewards/margins": 0.02767716720700264, "rewards/rejected": -0.2163669317960739, "step": 128 }, { "epoch": 0.2033096926713948, "grad_norm": 0.4361562430858612, "learning_rate": 3.999992445477635e-06, "log_odds_chosen": 0.45227304100990295, "log_odds_ratio": -0.5005910396575928, "logits/chosen": -0.04890897497534752, "logits/rejected": -0.3455308675765991, "logps/chosen": -1.8800231218338013, "logps/rejected": -2.2768943309783936, "loss": 2.0833, "nll_loss": 2.0331974029541016, "rewards/accuracies": 0.875, "rewards/chosen": -0.1880023181438446, "rewards/margins": 0.03968711942434311, "rewards/rejected": -0.2276894450187683, "step": 129 }, { "epoch": 0.2048857368006304, "grad_norm": 0.43126383423805237, "learning_rate": 3.999969781967615e-06, "log_odds_chosen": 0.27507030963897705, "log_odds_ratio": -0.5684081315994263, "logits/chosen": -0.03611285984516144, "logits/rejected": -0.18150243163108826, "logps/chosen": -1.980247139930725, "logps/rejected": -2.222841501235962, "loss": 2.1813, "nll_loss": 2.1244537830352783, "rewards/accuracies": 1.0, "rewards/chosen": -0.198024719953537, "rewards/margins": 0.024259435012936592, "rewards/rejected": -0.22228413820266724, "step": 130 }, { "epoch": 0.20646178092986603, "grad_norm": 0.39808744192123413, "learning_rate": 3.99993200964115e-06, "log_odds_chosen": 0.37286853790283203, "log_odds_ratio": -0.5256955623626709, "logits/chosen": -0.13878293335437775, "logits/rejected": -0.38629403710365295, "logps/chosen": -1.8050813674926758, "logps/rejected": -2.1250405311584473, "loss": 2.0229, "nll_loss": 1.9703779220581055, "rewards/accuracies": 1.0, "rewards/chosen": -0.18050813674926758, "rewards/margins": 0.031995922327041626, "rewards/rejected": -0.2125040590763092, "step": 131 }, { "epoch": 0.20803782505910165, "grad_norm": 0.49582305550575256, "learning_rate": 3.99987912878359e-06, "log_odds_chosen": 0.1634901463985443, "log_odds_ratio": -0.6200548410415649, "logits/chosen": -0.09574344009160995, "logits/rejected": -0.01687694527208805, "logps/chosen": -1.7705621719360352, "logps/rejected": -1.9035704135894775, "loss": 2.0395, "nll_loss": 1.977489709854126, "rewards/accuracies": 0.75, "rewards/chosen": -0.1770562380552292, "rewards/margins": 0.013300813734531403, "rewards/rejected": -0.19035704433918, "step": 132 }, { "epoch": 0.20961386918833727, "grad_norm": 0.42744600772857666, "learning_rate": 3.999811139794429e-06, "log_odds_chosen": 0.3788083493709564, "log_odds_ratio": -0.5247887372970581, "logits/chosen": -0.06136152893304825, "logits/rejected": -0.26565802097320557, "logps/chosen": -1.8101221323013306, "logps/rejected": -2.135667324066162, "loss": 2.0189, "nll_loss": 1.9664689302444458, "rewards/accuracies": 1.0, "rewards/chosen": -0.18101221323013306, "rewards/margins": 0.032554514706134796, "rewards/rejected": -0.21356670558452606, "step": 133 }, { "epoch": 0.2111899133175729, "grad_norm": 0.4283032715320587, "learning_rate": 3.999728043187288e-06, "log_odds_chosen": 0.35587507486343384, "log_odds_ratio": -0.5397139191627502, "logits/chosen": -0.04053102061152458, "logits/rejected": -0.2791404724121094, "logps/chosen": -1.8541502952575684, "logps/rejected": -2.164419412612915, "loss": 2.0887, "nll_loss": 2.0347094535827637, "rewards/accuracies": 0.875, "rewards/chosen": -0.18541501462459564, "rewards/margins": 0.03102692775428295, "rewards/rejected": -0.21644194424152374, "step": 134 }, { "epoch": 0.2127659574468085, "grad_norm": 0.42146554589271545, "learning_rate": 3.999629839589922e-06, "log_odds_chosen": 0.22164756059646606, "log_odds_ratio": -0.5917123556137085, "logits/chosen": 0.012096976861357689, "logits/rejected": -0.4592039883136749, "logps/chosen": -1.9410529136657715, "logps/rejected": -2.1342885494232178, "loss": 2.1714, "nll_loss": 2.1121792793273926, "rewards/accuracies": 0.875, "rewards/chosen": -0.19410529732704163, "rewards/margins": 0.01932355761528015, "rewards/rejected": -0.21342885494232178, "step": 135 }, { "epoch": 0.21434200157604413, "grad_norm": 0.4338121712207794, "learning_rate": 3.999516529744215e-06, "log_odds_chosen": 0.3292469382286072, "log_odds_ratio": -0.5459690690040588, "logits/chosen": 0.07608616352081299, "logits/rejected": -0.40737682580947876, "logps/chosen": -1.8194847106933594, "logps/rejected": -2.10274600982666, "loss": 2.0403, "nll_loss": 1.9856888055801392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819484829902649, "rewards/margins": 0.02832612209022045, "rewards/rejected": -0.2102746069431305, "step": 136 }, { "epoch": 0.21591804570527975, "grad_norm": 0.3987630605697632, "learning_rate": 3.999388114506166e-06, "log_odds_chosen": 0.4010235667228699, "log_odds_ratio": -0.518801212310791, "logits/chosen": -0.046873223036527634, "logits/rejected": -0.4238424599170685, "logps/chosen": -1.7170600891113281, "logps/rejected": -2.0600481033325195, "loss": 1.931, "nll_loss": 1.8791390657424927, "rewards/accuracies": 1.0, "rewards/chosen": -0.17170599102973938, "rewards/margins": 0.034298814833164215, "rewards/rejected": -0.206004798412323, "step": 137 }, { "epoch": 0.21749408983451538, "grad_norm": 0.4006027579307556, "learning_rate": 3.999244594845892e-06, "log_odds_chosen": 0.25786924362182617, "log_odds_ratio": -0.5790534019470215, "logits/chosen": -0.023555610328912735, "logits/rejected": -0.35695680975914, "logps/chosen": -1.7636680603027344, "logps/rejected": -1.985347032546997, "loss": 1.9912, "nll_loss": 1.9332479238510132, "rewards/accuracies": 0.875, "rewards/chosen": -0.17636680603027344, "rewards/margins": 0.02216789685189724, "rewards/rejected": -0.19853469729423523, "step": 138 }, { "epoch": 0.219070133963751, "grad_norm": 0.4049820303916931, "learning_rate": 3.999085971847616e-06, "log_odds_chosen": 0.24562738835811615, "log_odds_ratio": -0.58580482006073, "logits/chosen": -0.012129198759794235, "logits/rejected": -0.512136697769165, "logps/chosen": -1.8218495845794678, "logps/rejected": -2.0348801612854004, "loss": 2.0389, "nll_loss": 1.980314016342163, "rewards/accuracies": 0.875, "rewards/chosen": -0.18218494951725006, "rewards/margins": 0.02130305953323841, "rewards/rejected": -0.20348800718784332, "step": 139 }, { "epoch": 0.22064617809298662, "grad_norm": 0.3915075361728668, "learning_rate": 3.998912246709658e-06, "log_odds_chosen": 0.3664124310016632, "log_odds_ratio": -0.5369927287101746, "logits/chosen": -0.08141148090362549, "logits/rejected": -0.4445778429508209, "logps/chosen": -1.7210732698440552, "logps/rejected": -2.033111572265625, "loss": 1.9395, "nll_loss": 1.885791301727295, "rewards/accuracies": 0.875, "rewards/chosen": -0.17210730910301208, "rewards/margins": 0.0312038566917181, "rewards/rejected": -0.20331117510795593, "step": 140 }, { "epoch": 0.2222222222222222, "grad_norm": 0.42792966961860657, "learning_rate": 3.9987234207444295e-06, "log_odds_chosen": 0.3271371126174927, "log_odds_ratio": -0.5472476482391357, "logits/chosen": -0.05070888251066208, "logits/rejected": -0.30135494470596313, "logps/chosen": -1.817905068397522, "logps/rejected": -2.099316358566284, "loss": 2.0393, "nll_loss": 1.984588384628296, "rewards/accuracies": 1.0, "rewards/chosen": -0.1817905157804489, "rewards/margins": 0.028141150251030922, "rewards/rejected": -0.2099316567182541, "step": 141 }, { "epoch": 0.22379826635145783, "grad_norm": 0.40370792150497437, "learning_rate": 3.998519495378419e-06, "log_odds_chosen": 0.34052586555480957, "log_odds_ratio": -0.5426381826400757, "logits/chosen": 0.0529387891292572, "logits/rejected": -0.22841407358646393, "logps/chosen": -1.896314024925232, "logps/rejected": -2.194129705429077, "loss": 2.0889, "nll_loss": 2.0345916748046875, "rewards/accuracies": 1.0, "rewards/chosen": -0.189631387591362, "rewards/margins": 0.029781583696603775, "rewards/rejected": -0.21941299736499786, "step": 142 }, { "epoch": 0.22537431048069345, "grad_norm": 0.36527448892593384, "learning_rate": 3.998300472152187e-06, "log_odds_chosen": 0.3244752287864685, "log_odds_ratio": -0.5529875755310059, "logits/chosen": -0.07975448668003082, "logits/rejected": -0.5476751923561096, "logps/chosen": -1.6814563274383545, "logps/rejected": -1.9556654691696167, "loss": 1.9047, "nll_loss": 1.8494292497634888, "rewards/accuracies": 0.875, "rewards/chosen": -0.16814564168453217, "rewards/margins": 0.02742091566324234, "rewards/rejected": -0.1955665647983551, "step": 143 }, { "epoch": 0.22695035460992907, "grad_norm": 0.39946654438972473, "learning_rate": 3.998066352720347e-06, "log_odds_chosen": 0.4114699959754944, "log_odds_ratio": -0.5125808715820312, "logits/chosen": 0.10884220898151398, "logits/rejected": -0.2769298553466797, "logps/chosen": -1.807681679725647, "logps/rejected": -2.163328170776367, "loss": 2.0156, "nll_loss": 1.9643856287002563, "rewards/accuracies": 1.0, "rewards/chosen": -0.1807681769132614, "rewards/margins": 0.035564638674259186, "rewards/rejected": -0.2163328230381012, "step": 144 }, { "epoch": 0.2285263987391647, "grad_norm": 0.3883441388607025, "learning_rate": 3.997817138851562e-06, "log_odds_chosen": 0.3042789101600647, "log_odds_ratio": -0.5641068816184998, "logits/chosen": 0.047148481011390686, "logits/rejected": -0.2664666771888733, "logps/chosen": -1.894091248512268, "logps/rejected": -2.1613922119140625, "loss": 2.1037, "nll_loss": 2.0473220348358154, "rewards/accuracies": 0.875, "rewards/chosen": -0.18940910696983337, "rewards/margins": 0.0267301294952631, "rewards/rejected": -0.21613925695419312, "step": 145 }, { "epoch": 0.23010244286840031, "grad_norm": 0.3664163053035736, "learning_rate": 3.997552832428522e-06, "log_odds_chosen": 0.40218284726142883, "log_odds_ratio": -0.5150843262672424, "logits/chosen": -0.06997128576040268, "logits/rejected": -0.30114221572875977, "logps/chosen": -1.683500051498413, "logps/rejected": -2.0218000411987305, "loss": 1.9221, "nll_loss": 1.8706339597702026, "rewards/accuracies": 1.0, "rewards/chosen": -0.1683500111103058, "rewards/margins": 0.03382997214794159, "rewards/rejected": -0.20217998325824738, "step": 146 }, { "epoch": 0.23167848699763594, "grad_norm": 0.35738253593444824, "learning_rate": 3.9972734354479366e-06, "log_odds_chosen": 0.33341336250305176, "log_odds_ratio": -0.5498776435852051, "logits/chosen": 0.05102141201496124, "logits/rejected": -0.588348925113678, "logps/chosen": -1.6781635284423828, "logps/rejected": -1.957244634628296, "loss": 1.9026, "nll_loss": 1.84757661819458, "rewards/accuracies": 0.875, "rewards/chosen": -0.1678163707256317, "rewards/margins": 0.027908099815249443, "rewards/rejected": -0.1957244724035263, "step": 147 }, { "epoch": 0.23325453112687156, "grad_norm": 0.40279892086982727, "learning_rate": 3.996978950020517e-06, "log_odds_chosen": 0.21276284754276276, "log_odds_ratio": -0.6048003435134888, "logits/chosen": 0.1419999599456787, "logits/rejected": -0.28115570545196533, "logps/chosen": -1.8639137744903564, "logps/rejected": -2.0437545776367188, "loss": 2.0862, "nll_loss": 2.0256824493408203, "rewards/accuracies": 0.75, "rewards/chosen": -0.18639138340950012, "rewards/margins": 0.01798408292233944, "rewards/rejected": -0.2043754756450653, "step": 148 }, { "epoch": 0.23483057525610718, "grad_norm": 0.35266202688217163, "learning_rate": 3.996669378370959e-06, "log_odds_chosen": 0.5316495299339294, "log_odds_ratio": -0.46872708201408386, "logits/chosen": 0.010169305838644505, "logits/rejected": -0.3486666679382324, "logps/chosen": -1.6488699913024902, "logps/rejected": -2.1011159420013428, "loss": 1.8663, "nll_loss": 1.8194066286087036, "rewards/accuracies": 1.0, "rewards/chosen": -0.16488701105117798, "rewards/margins": 0.045224592089653015, "rewards/rejected": -0.2101115882396698, "step": 149 }, { "epoch": 0.2364066193853428, "grad_norm": 0.39699894189834595, "learning_rate": 3.996344722837929e-06, "log_odds_chosen": 0.49894896149635315, "log_odds_ratio": -0.4788591265678406, "logits/chosen": -0.0008517892565578222, "logits/rejected": -0.31570884585380554, "logps/chosen": -1.8067049980163574, "logps/rejected": -2.239896535873413, "loss": 2.0051, "nll_loss": 1.95723295211792, "rewards/accuracies": 1.0, "rewards/chosen": -0.18067049980163574, "rewards/margins": 0.043319158256053925, "rewards/rejected": -0.22398965060710907, "step": 150 }, { "epoch": 0.23798266351457842, "grad_norm": 0.33054688572883606, "learning_rate": 3.996004985874043e-06, "log_odds_chosen": 0.2428944855928421, "log_odds_ratio": -0.5818551778793335, "logits/chosen": 0.11997652053833008, "logits/rejected": -0.2498077005147934, "logps/chosen": -1.698814868927002, "logps/rejected": -1.9012809991836548, "loss": 1.9266, "nll_loss": 1.8684155941009521, "rewards/accuracies": 1.0, "rewards/chosen": -0.16988149285316467, "rewards/margins": 0.020246637985110283, "rewards/rejected": -0.19012810289859772, "step": 151 }, { "epoch": 0.23955870764381404, "grad_norm": 0.3768727481365204, "learning_rate": 3.995650170045855e-06, "log_odds_chosen": 0.28013092279434204, "log_odds_ratio": -0.5691770315170288, "logits/chosen": 0.045284271240234375, "logits/rejected": -0.3556942641735077, "logps/chosen": -1.7669368982315063, "logps/rejected": -2.0047860145568848, "loss": 1.9804, "nll_loss": 1.9234654903411865, "rewards/accuracies": 0.875, "rewards/chosen": -0.17669367790222168, "rewards/margins": 0.023784920573234558, "rewards/rejected": -0.20047861337661743, "step": 152 }, { "epoch": 0.24113475177304963, "grad_norm": 0.3691398501396179, "learning_rate": 3.995280278033825e-06, "log_odds_chosen": 0.20656853914260864, "log_odds_ratio": -0.6042188405990601, "logits/chosen": 0.11217048764228821, "logits/rejected": -0.24175406992435455, "logps/chosen": -1.8632197380065918, "logps/rejected": -2.039140462875366, "loss": 2.0785, "nll_loss": 2.0180578231811523, "rewards/accuracies": 0.75, "rewards/chosen": -0.18632197380065918, "rewards/margins": 0.017592042684555054, "rewards/rejected": -0.20391403138637543, "step": 153 }, { "epoch": 0.24271079590228525, "grad_norm": 0.35842734575271606, "learning_rate": 3.994895312632314e-06, "log_odds_chosen": 0.4981134533882141, "log_odds_ratio": -0.4802461564540863, "logits/chosen": 0.16929033398628235, "logits/rejected": -0.7549564838409424, "logps/chosen": -1.7397829294204712, "logps/rejected": -2.1666393280029297, "loss": 1.9514, "nll_loss": 1.903334140777588, "rewards/accuracies": 1.0, "rewards/chosen": -0.1739783138036728, "rewards/margins": 0.04268564283847809, "rewards/rejected": -0.21666395664215088, "step": 154 }, { "epoch": 0.24428684003152087, "grad_norm": 0.33668240904808044, "learning_rate": 3.994495276749549e-06, "log_odds_chosen": 0.47896263003349304, "log_odds_ratio": -0.48850810527801514, "logits/chosen": 0.07264027744531631, "logits/rejected": -0.33893775939941406, "logps/chosen": -1.7592413425445557, "logps/rejected": -2.172550916671753, "loss": 1.9568, "nll_loss": 1.9079933166503906, "rewards/accuracies": 1.0, "rewards/chosen": -0.175924152135849, "rewards/margins": 0.04133095592260361, "rewards/rejected": -0.217255100607872, "step": 155 }, { "epoch": 0.2458628841607565, "grad_norm": 0.34656822681427, "learning_rate": 3.994080173407612e-06, "log_odds_chosen": 0.23366691172122955, "log_odds_ratio": -0.5879085063934326, "logits/chosen": 0.1407233029603958, "logits/rejected": -0.3865904211997986, "logps/chosen": -1.7929620742797852, "logps/rejected": -1.9902442693710327, "loss": 1.9994, "nll_loss": 1.9406037330627441, "rewards/accuracies": 0.75, "rewards/chosen": -0.17929621040821075, "rewards/margins": 0.01972820609807968, "rewards/rejected": -0.19902442395687103, "step": 156 }, { "epoch": 0.24743892828999212, "grad_norm": 0.34390729665756226, "learning_rate": 3.993650005742409e-06, "log_odds_chosen": 0.3310392498970032, "log_odds_ratio": -0.546916127204895, "logits/chosen": 0.06716110557317734, "logits/rejected": -0.23844899237155914, "logps/chosen": -1.7643970251083374, "logps/rejected": -2.046977996826172, "loss": 1.9724, "nll_loss": 1.9176855087280273, "rewards/accuracies": 1.0, "rewards/chosen": -0.17643971741199493, "rewards/margins": 0.028258096426725388, "rewards/rejected": -0.20469780266284943, "step": 157 }, { "epoch": 0.24901497241922774, "grad_norm": 0.3150049149990082, "learning_rate": 3.993204777003652e-06, "log_odds_chosen": 0.337340384721756, "log_odds_ratio": -0.5445095896720886, "logits/chosen": 0.07445216178894043, "logits/rejected": -0.441582053899765, "logps/chosen": -1.6941965818405151, "logps/rejected": -1.980376124382019, "loss": 1.898, "nll_loss": 1.843545913696289, "rewards/accuracies": 1.0, "rewards/chosen": -0.16941964626312256, "rewards/margins": 0.02861795574426651, "rewards/rejected": -0.19803762435913086, "step": 158 }, { "epoch": 0.25059101654846333, "grad_norm": 0.3337211012840271, "learning_rate": 3.992744490554832e-06, "log_odds_chosen": 0.33180904388427734, "log_odds_ratio": -0.5461182594299316, "logits/chosen": 0.21238459646701813, "logits/rejected": -0.269389808177948, "logps/chosen": -1.7364294528961182, "logps/rejected": -2.0181472301483154, "loss": 1.9565, "nll_loss": 1.9018971920013428, "rewards/accuracies": 1.0, "rewards/chosen": -0.17364296317100525, "rewards/margins": 0.028171781450510025, "rewards/rejected": -0.20181472599506378, "step": 159 }, { "epoch": 0.25216706067769895, "grad_norm": 0.31171178817749023, "learning_rate": 3.992269149873192e-06, "log_odds_chosen": 0.32716354727745056, "log_odds_ratio": -0.547857403755188, "logits/chosen": 0.028445789590477943, "logits/rejected": -0.35958513617515564, "logps/chosen": -1.7089133262634277, "logps/rejected": -1.9816714525222778, "loss": 1.9143, "nll_loss": 1.8594882488250732, "rewards/accuracies": 1.0, "rewards/chosen": -0.17089134454727173, "rewards/margins": 0.02727578952908516, "rewards/rejected": -0.19816714525222778, "step": 160 }, { "epoch": 0.25374310480693457, "grad_norm": 0.30663299560546875, "learning_rate": 3.991778758549705e-06, "log_odds_chosen": 0.195327490568161, "log_odds_ratio": -0.6073517799377441, "logits/chosen": 0.2354024052619934, "logits/rejected": -0.21214549243450165, "logps/chosen": -1.8341491222381592, "logps/rejected": -2.0042359828948975, "loss": 2.023, "nll_loss": 1.9623081684112549, "rewards/accuracies": 0.75, "rewards/chosen": -0.18341490626335144, "rewards/margins": 0.017008693888783455, "rewards/rejected": -0.20042361319065094, "step": 161 }, { "epoch": 0.2553191489361702, "grad_norm": 0.31510376930236816, "learning_rate": 3.9912733202890415e-06, "log_odds_chosen": 0.32201409339904785, "log_odds_ratio": -0.548933744430542, "logits/chosen": 0.10407942533493042, "logits/rejected": -0.4005528688430786, "logps/chosen": -1.7570445537567139, "logps/rejected": -2.0296471118927, "loss": 1.96, "nll_loss": 1.905151128768921, "rewards/accuracies": 1.0, "rewards/chosen": -0.1757044792175293, "rewards/margins": 0.027260230854153633, "rewards/rejected": -0.20296470820903778, "step": 162 }, { "epoch": 0.2568951930654058, "grad_norm": 0.3028022050857544, "learning_rate": 3.990752838909548e-06, "log_odds_chosen": 0.1706855297088623, "log_odds_ratio": -0.6158415079116821, "logits/chosen": 0.1903667449951172, "logits/rejected": -0.2845366299152374, "logps/chosen": -1.8539068698883057, "logps/rejected": -1.9989856481552124, "loss": 2.0475, "nll_loss": 1.9859018325805664, "rewards/accuracies": 0.75, "rewards/chosen": -0.18539069592952728, "rewards/margins": 0.014507867395877838, "rewards/rejected": -0.19989855587482452, "step": 163 }, { "epoch": 0.25847123719464143, "grad_norm": 0.3319096267223358, "learning_rate": 3.990217318343213e-06, "log_odds_chosen": 0.5035750269889832, "log_odds_ratio": -0.4748569130897522, "logits/chosen": 0.15394604206085205, "logits/rejected": -0.5184996128082275, "logps/chosen": -1.921760082244873, "logps/rejected": -2.365640163421631, "loss": 2.1041, "nll_loss": 2.0565714836120605, "rewards/accuracies": 1.0, "rewards/chosen": -0.19217601418495178, "rewards/margins": 0.044387996196746826, "rewards/rejected": -0.2365640103816986, "step": 164 }, { "epoch": 0.26004728132387706, "grad_norm": 0.3104284405708313, "learning_rate": 3.989666762635637e-06, "log_odds_chosen": 0.2950212061405182, "log_odds_ratio": -0.5599942803382874, "logits/chosen": 0.1402396708726883, "logits/rejected": -0.27953556180000305, "logps/chosen": -1.767073631286621, "logps/rejected": -2.017169713973999, "loss": 1.9826, "nll_loss": 1.926632285118103, "rewards/accuracies": 1.0, "rewards/chosen": -0.17670737206935883, "rewards/margins": 0.025009607896208763, "rewards/rejected": -0.20171695947647095, "step": 165 }, { "epoch": 0.2616233254531127, "grad_norm": 0.30276185274124146, "learning_rate": 3.9891011759460056e-06, "log_odds_chosen": 0.3634149432182312, "log_odds_ratio": -0.5344185829162598, "logits/chosen": 0.1359172910451889, "logits/rejected": -0.3355481028556824, "logps/chosen": -1.75217866897583, "logps/rejected": -2.055960178375244, "loss": 1.958, "nll_loss": 1.9045321941375732, "rewards/accuracies": 1.0, "rewards/chosen": -0.1752178817987442, "rewards/margins": 0.03037814423441887, "rewards/rejected": -0.20559601485729218, "step": 166 }, { "epoch": 0.2631993695823483, "grad_norm": 0.2769591212272644, "learning_rate": 3.988520562547057e-06, "log_odds_chosen": 0.3933558762073517, "log_odds_ratio": -0.5260743498802185, "logits/chosen": 0.15155696868896484, "logits/rejected": -0.42779073119163513, "logps/chosen": -1.7146203517913818, "logps/rejected": -2.04492449760437, "loss": 1.9032, "nll_loss": 1.8506273031234741, "rewards/accuracies": 1.0, "rewards/chosen": -0.1714620441198349, "rewards/margins": 0.033030424267053604, "rewards/rejected": -0.2044924646615982, "step": 167 }, { "epoch": 0.2647754137115839, "grad_norm": 0.29901036620140076, "learning_rate": 3.987924926825047e-06, "log_odds_chosen": 0.40397408604621887, "log_odds_ratio": -0.5212303996086121, "logits/chosen": 0.12876684963703156, "logits/rejected": -0.32701608538627625, "logps/chosen": -1.790124773979187, "logps/rejected": -2.1377172470092773, "loss": 2.0015, "nll_loss": 1.9493852853775024, "rewards/accuracies": 0.875, "rewards/chosen": -0.1790124773979187, "rewards/margins": 0.03475925698876381, "rewards/rejected": -0.2137717306613922, "step": 168 }, { "epoch": 0.26635145784081954, "grad_norm": 0.2879612147808075, "learning_rate": 3.98731427327972e-06, "log_odds_chosen": 0.38767939805984497, "log_odds_ratio": -0.5189070105552673, "logits/chosen": 0.09615111351013184, "logits/rejected": -0.552105724811554, "logps/chosen": -1.7201054096221924, "logps/rejected": -2.047496795654297, "loss": 1.9089, "nll_loss": 1.8569788932800293, "rewards/accuracies": 1.0, "rewards/chosen": -0.17201054096221924, "rewards/margins": 0.03273913264274597, "rewards/rejected": -0.2047496885061264, "step": 169 }, { "epoch": 0.26792750197005516, "grad_norm": 0.2887536585330963, "learning_rate": 3.986688606524273e-06, "log_odds_chosen": 0.32007887959480286, "log_odds_ratio": -0.5568875670433044, "logits/chosen": 0.20907297730445862, "logits/rejected": -0.717089831829071, "logps/chosen": -1.7479276657104492, "logps/rejected": -2.0232884883880615, "loss": 1.9428, "nll_loss": 1.8871328830718994, "rewards/accuracies": 0.75, "rewards/chosen": -0.17479278147220612, "rewards/margins": 0.02753606252372265, "rewards/rejected": -0.20232883095741272, "step": 170 }, { "epoch": 0.2695035460992908, "grad_norm": 0.2829509675502777, "learning_rate": 3.986047931285315e-06, "log_odds_chosen": 0.493367999792099, "log_odds_ratio": -0.4808065891265869, "logits/chosen": 0.08703712373971939, "logits/rejected": -0.6400361657142639, "logps/chosen": -1.6581294536590576, "logps/rejected": -2.072545051574707, "loss": 1.848, "nll_loss": 1.7999612092971802, "rewards/accuracies": 1.0, "rewards/chosen": -0.16581295430660248, "rewards/margins": 0.041441574692726135, "rewards/rejected": -0.20725451409816742, "step": 171 }, { "epoch": 0.2710795902285264, "grad_norm": 0.27343836426734924, "learning_rate": 3.985392252402847e-06, "log_odds_chosen": 0.23880890011787415, "log_odds_ratio": -0.5835365653038025, "logits/chosen": 0.08178934454917908, "logits/rejected": -0.431425541639328, "logps/chosen": -1.655697226524353, "logps/rejected": -1.8529317378997803, "loss": 1.8557, "nll_loss": 1.7973511219024658, "rewards/accuracies": 1.0, "rewards/chosen": -0.1655697375535965, "rewards/margins": 0.0197234395891428, "rewards/rejected": -0.18529316782951355, "step": 172 }, { "epoch": 0.272655634357762, "grad_norm": 0.2862318754196167, "learning_rate": 3.984721574830206e-06, "log_odds_chosen": 0.27102503180503845, "log_odds_ratio": -0.5708762407302856, "logits/chosen": 0.14478133618831635, "logits/rejected": -0.510442316532135, "logps/chosen": -1.7600399255752563, "logps/rejected": -1.9898476600646973, "loss": 1.952, "nll_loss": 1.894890308380127, "rewards/accuracies": 1.0, "rewards/chosen": -0.17600399255752563, "rewards/margins": 0.02298077940940857, "rewards/rejected": -0.1989847719669342, "step": 173 }, { "epoch": 0.27423167848699764, "grad_norm": 0.26644009351730347, "learning_rate": 3.984035903634041e-06, "log_odds_chosen": 0.2790209650993347, "log_odds_ratio": -0.5701457262039185, "logits/chosen": 0.1971094161272049, "logits/rejected": -0.5762545466423035, "logps/chosen": -1.7363693714141846, "logps/rejected": -1.9710581302642822, "loss": 1.914, "nll_loss": 1.8569939136505127, "rewards/accuracies": 0.75, "rewards/chosen": -0.17363695800304413, "rewards/margins": 0.023468857631087303, "rewards/rejected": -0.19710581004619598, "step": 174 }, { "epoch": 0.27580772261623326, "grad_norm": 0.2771873474121094, "learning_rate": 3.983335243994273e-06, "log_odds_chosen": 0.37392759323120117, "log_odds_ratio": -0.5302526354789734, "logits/chosen": 0.21129730343818665, "logits/rejected": -0.5397756099700928, "logps/chosen": -1.726618766784668, "logps/rejected": -2.042837381362915, "loss": 1.9182, "nll_loss": 1.8651602268218994, "rewards/accuracies": 0.875, "rewards/chosen": -0.17266185581684113, "rewards/margins": 0.03162187710404396, "rewards/rejected": -0.20428375899791718, "step": 175 }, { "epoch": 0.2773837667454689, "grad_norm": 0.2980581223964691, "learning_rate": 3.982619601204049e-06, "log_odds_chosen": 0.2845771908760071, "log_odds_ratio": -0.565645158290863, "logits/chosen": 0.1371021270751953, "logits/rejected": -0.544967770576477, "logps/chosen": -1.784250259399414, "logps/rejected": -2.0266098976135254, "loss": 1.9752, "nll_loss": 1.9185881614685059, "rewards/accuracies": 1.0, "rewards/chosen": -0.17842502892017365, "rewards/margins": 0.02423596940934658, "rewards/rejected": -0.20266100764274597, "step": 176 }, { "epoch": 0.2789598108747045, "grad_norm": 0.28183117508888245, "learning_rate": 3.9818889806697085e-06, "log_odds_chosen": 0.37239906191825867, "log_odds_ratio": -0.5329021215438843, "logits/chosen": 0.24540948867797852, "logits/rejected": -0.537138044834137, "logps/chosen": -1.824477195739746, "logps/rejected": -2.1465721130371094, "loss": 1.9904, "nll_loss": 1.9371103048324585, "rewards/accuracies": 0.875, "rewards/chosen": -0.18244771659374237, "rewards/margins": 0.032209500670433044, "rewards/rejected": -0.21465720236301422, "step": 177 }, { "epoch": 0.2805358550039401, "grad_norm": 0.2755196988582611, "learning_rate": 3.98114338791074e-06, "log_odds_chosen": 0.4614989757537842, "log_odds_ratio": -0.5006829500198364, "logits/chosen": 0.11655398458242416, "logits/rejected": -0.4944975674152374, "logps/chosen": -1.6953446865081787, "logps/rejected": -2.088918685913086, "loss": 1.89, "nll_loss": 1.8398841619491577, "rewards/accuracies": 1.0, "rewards/chosen": -0.16953447461128235, "rewards/margins": 0.03935740143060684, "rewards/rejected": -0.2088918834924698, "step": 178 }, { "epoch": 0.28211189913317575, "grad_norm": 0.262363463640213, "learning_rate": 3.980382828559742e-06, "log_odds_chosen": 0.37132197618484497, "log_odds_ratio": -0.53047114610672, "logits/chosen": 0.1178860068321228, "logits/rejected": -0.5356312990188599, "logps/chosen": -1.7367223501205444, "logps/rejected": -2.051452159881592, "loss": 1.9086, "nll_loss": 1.8555084466934204, "rewards/accuracies": 0.875, "rewards/chosen": -0.17367224395275116, "rewards/margins": 0.03147297352552414, "rewards/rejected": -0.2051452100276947, "step": 179 }, { "epoch": 0.28368794326241137, "grad_norm": 0.2845723032951355, "learning_rate": 3.9796073083623774e-06, "log_odds_chosen": 0.33220580220222473, "log_odds_ratio": -0.5529859066009521, "logits/chosen": 0.13935938477516174, "logits/rejected": -0.5945897698402405, "logps/chosen": -1.805295705795288, "logps/rejected": -2.08857798576355, "loss": 1.9753, "nll_loss": 1.9200148582458496, "rewards/accuracies": 0.875, "rewards/chosen": -0.18052956461906433, "rewards/margins": 0.028328238055109978, "rewards/rejected": -0.20885780453681946, "step": 180 }, { "epoch": 0.285263987391647, "grad_norm": 0.2820357382297516, "learning_rate": 3.978816833177329e-06, "log_odds_chosen": 0.3633921444416046, "log_odds_ratio": -0.5330725312232971, "logits/chosen": 0.2247752994298935, "logits/rejected": -0.42544880509376526, "logps/chosen": -1.7320586442947388, "logps/rejected": -2.0406675338745117, "loss": 1.91, "nll_loss": 1.8566887378692627, "rewards/accuracies": 1.0, "rewards/chosen": -0.17320585250854492, "rewards/margins": 0.03086087293922901, "rewards/rejected": -0.20406673848628998, "step": 181 }, { "epoch": 0.2868400315208826, "grad_norm": 0.2795039713382721, "learning_rate": 3.978011408976261e-06, "log_odds_chosen": 0.2320139855146408, "log_odds_ratio": -0.5891825556755066, "logits/chosen": 0.19194500148296356, "logits/rejected": -0.5741883516311646, "logps/chosen": -1.750451683998108, "logps/rejected": -1.9439057111740112, "loss": 1.9425, "nll_loss": 1.8835428953170776, "rewards/accuracies": 0.75, "rewards/chosen": -0.1750451624393463, "rewards/margins": 0.019345413893461227, "rewards/rejected": -0.19439058005809784, "step": 182 }, { "epoch": 0.28841607565011823, "grad_norm": 0.2846316993236542, "learning_rate": 3.9771910418437674e-06, "log_odds_chosen": 0.3140004575252533, "log_odds_ratio": -0.54939204454422, "logits/chosen": 0.1681434065103531, "logits/rejected": -0.3001997172832489, "logps/chosen": -1.7808198928833008, "logps/rejected": -2.049476385116577, "loss": 1.956, "nll_loss": 1.9010967016220093, "rewards/accuracies": 1.0, "rewards/chosen": -0.17808198928833008, "rewards/margins": 0.026865659281611443, "rewards/rejected": -0.20494765043258667, "step": 183 }, { "epoch": 0.2899921197793538, "grad_norm": 0.2878468334674835, "learning_rate": 3.976355737977332e-06, "log_odds_chosen": 0.3773011863231659, "log_odds_ratio": -0.5303803086280823, "logits/chosen": 0.18860210478305817, "logits/rejected": -0.5023236274719238, "logps/chosen": -1.7276164293289185, "logps/rejected": -2.046980381011963, "loss": 1.9104, "nll_loss": 1.8573479652404785, "rewards/accuracies": 0.875, "rewards/chosen": -0.17276166379451752, "rewards/margins": 0.03193638473749161, "rewards/rejected": -0.20469802618026733, "step": 184 }, { "epoch": 0.2915681639085894, "grad_norm": 0.2533093988895416, "learning_rate": 3.975505503687274e-06, "log_odds_chosen": 0.3036259412765503, "log_odds_ratio": -0.5556970238685608, "logits/chosen": 0.23977245390415192, "logits/rejected": -0.28354203701019287, "logps/chosen": -1.7183681726455688, "logps/rejected": -1.9746772050857544, "loss": 1.8965, "nll_loss": 1.8409373760223389, "rewards/accuracies": 1.0, "rewards/chosen": -0.17183682322502136, "rewards/margins": 0.025630896911025047, "rewards/rejected": -0.19746771454811096, "step": 185 }, { "epoch": 0.29314420803782504, "grad_norm": 0.2554759383201599, "learning_rate": 3.974640345396708e-06, "log_odds_chosen": 0.458074152469635, "log_odds_ratio": -0.4967803359031677, "logits/chosen": 0.09675043821334839, "logits/rejected": -0.8243634104728699, "logps/chosen": -1.6910256147384644, "logps/rejected": -2.077892780303955, "loss": 1.8667, "nll_loss": 1.8170145750045776, "rewards/accuracies": 1.0, "rewards/chosen": -0.16910257935523987, "rewards/margins": 0.03868672996759415, "rewards/rejected": -0.20778930187225342, "step": 186 }, { "epoch": 0.29472025216706066, "grad_norm": 0.2935222387313843, "learning_rate": 3.9737602696414925e-06, "log_odds_chosen": 0.159349724650383, "log_odds_ratio": -0.6268662214279175, "logits/chosen": 0.21252146363258362, "logits/rejected": -0.6300286650657654, "logps/chosen": -1.78038489818573, "logps/rejected": -1.914283037185669, "loss": 1.9361, "nll_loss": 1.8734400272369385, "rewards/accuracies": 0.625, "rewards/chosen": -0.17803849279880524, "rewards/margins": 0.013389825820922852, "rewards/rejected": -0.1914283186197281, "step": 187 }, { "epoch": 0.2962962962962963, "grad_norm": 0.2445816546678543, "learning_rate": 3.972865283070179e-06, "log_odds_chosen": 0.38453128933906555, "log_odds_ratio": -0.5239704847335815, "logits/chosen": 0.16407378017902374, "logits/rejected": -0.7210597991943359, "logps/chosen": -1.6779310703277588, "logps/rejected": -2.003727436065674, "loss": 1.8617, "nll_loss": 1.8092904090881348, "rewards/accuracies": 1.0, "rewards/chosen": -0.16779311001300812, "rewards/margins": 0.032579630613327026, "rewards/rejected": -0.20037274062633514, "step": 188 }, { "epoch": 0.2978723404255319, "grad_norm": 0.2602602243423462, "learning_rate": 3.971955392443965e-06, "log_odds_chosen": 0.3589805066585541, "log_odds_ratio": -0.535193920135498, "logits/chosen": 0.19359144568443298, "logits/rejected": -0.5294786691665649, "logps/chosen": -1.7104607820510864, "logps/rejected": -2.017254114151001, "loss": 1.8819, "nll_loss": 1.8284274339675903, "rewards/accuracies": 1.0, "rewards/chosen": -0.17104607820510864, "rewards/margins": 0.03067934513092041, "rewards/rejected": -0.20172543823719025, "step": 189 }, { "epoch": 0.2994483845547675, "grad_norm": 0.2570762634277344, "learning_rate": 3.971030604636637e-06, "log_odds_chosen": 0.20797011256217957, "log_odds_ratio": -0.5995408892631531, "logits/chosen": 0.24219730496406555, "logits/rejected": -0.49577596783638, "logps/chosen": -1.726780891418457, "logps/rejected": -1.9002296924591064, "loss": 1.8985, "nll_loss": 1.8385004997253418, "rewards/accuracies": 0.875, "rewards/chosen": -0.17267809808254242, "rewards/margins": 0.017344871535897255, "rewards/rejected": -0.19002296030521393, "step": 190 }, { "epoch": 0.30102442868400314, "grad_norm": 0.27055835723876953, "learning_rate": 3.970090926634526e-06, "log_odds_chosen": 0.29205840826034546, "log_odds_ratio": -0.5609118938446045, "logits/chosen": 0.3021741211414337, "logits/rejected": -0.7257508635520935, "logps/chosen": -1.7830076217651367, "logps/rejected": -2.0307276248931885, "loss": 1.9615, "nll_loss": 1.9053698778152466, "rewards/accuracies": 1.0, "rewards/chosen": -0.17830076813697815, "rewards/margins": 0.024771984666585922, "rewards/rejected": -0.20307274162769318, "step": 191 }, { "epoch": 0.30260047281323876, "grad_norm": 0.27686426043510437, "learning_rate": 3.9691363655364526e-06, "log_odds_chosen": 0.3535913825035095, "log_odds_ratio": -0.5396946668624878, "logits/chosen": 0.19939753413200378, "logits/rejected": -0.527854859828949, "logps/chosen": -1.7547531127929688, "logps/rejected": -2.056049108505249, "loss": 1.9315, "nll_loss": 1.8775546550750732, "rewards/accuracies": 0.875, "rewards/chosen": -0.1754753142595291, "rewards/margins": 0.030129600316286087, "rewards/rejected": -0.2056049108505249, "step": 192 }, { "epoch": 0.3041765169424744, "grad_norm": 0.24814869463443756, "learning_rate": 3.968166928553666e-06, "log_odds_chosen": 0.29018789529800415, "log_odds_ratio": -0.5631506443023682, "logits/chosen": 0.23991963267326355, "logits/rejected": -0.6883436441421509, "logps/chosen": -1.708446741104126, "logps/rejected": -1.9508640766143799, "loss": 1.9016, "nll_loss": 1.8453017473220825, "rewards/accuracies": 0.875, "rewards/chosen": -0.1708446592092514, "rewards/margins": 0.02424173429608345, "rewards/rejected": -0.19508640468120575, "step": 193 }, { "epoch": 0.30575256107171, "grad_norm": 0.2546519637107849, "learning_rate": 3.967182623009804e-06, "log_odds_chosen": 0.42340725660324097, "log_odds_ratio": -0.5090901851654053, "logits/chosen": 0.18889212608337402, "logits/rejected": -0.5412212014198303, "logps/chosen": -1.7333446741104126, "logps/rejected": -2.0966084003448486, "loss": 1.8946, "nll_loss": 1.8436520099639893, "rewards/accuracies": 1.0, "rewards/chosen": -0.17333447933197021, "rewards/margins": 0.03632635623216629, "rewards/rejected": -0.2096608430147171, "step": 194 }, { "epoch": 0.3073286052009456, "grad_norm": 0.27097785472869873, "learning_rate": 3.966183456340821e-06, "log_odds_chosen": 0.19589565694332123, "log_odds_ratio": -0.6020736694335938, "logits/chosen": 0.23710918426513672, "logits/rejected": -0.5323060154914856, "logps/chosen": -1.7581830024719238, "logps/rejected": -1.9214140176773071, "loss": 1.9344, "nll_loss": 1.8741968870162964, "rewards/accuracies": 1.0, "rewards/chosen": -0.1758182942867279, "rewards/margins": 0.01632309891283512, "rewards/rejected": -0.19214141368865967, "step": 195 }, { "epoch": 0.30890464933018125, "grad_norm": 0.2723829448223114, "learning_rate": 3.965169436094947e-06, "log_odds_chosen": 0.35698461532592773, "log_odds_ratio": -0.5402282476425171, "logits/chosen": 0.22953951358795166, "logits/rejected": -0.668543815612793, "logps/chosen": -1.6529057025909424, "logps/rejected": -1.9540297985076904, "loss": 1.8363, "nll_loss": 1.7822985649108887, "rewards/accuracies": 0.875, "rewards/chosen": -0.16529057919979095, "rewards/margins": 0.030112413689494133, "rewards/rejected": -0.19540299475193024, "step": 196 }, { "epoch": 0.31048069345941687, "grad_norm": 0.26106390357017517, "learning_rate": 3.964140569932618e-06, "log_odds_chosen": 0.29018843173980713, "log_odds_ratio": -0.5803290009498596, "logits/chosen": 0.1857983022928238, "logits/rejected": -0.8773932456970215, "logps/chosen": -1.7773064374923706, "logps/rejected": -2.021932601928711, "loss": 1.9428, "nll_loss": 1.884739637374878, "rewards/accuracies": 0.75, "rewards/chosen": -0.17773064970970154, "rewards/margins": 0.02446262538433075, "rewards/rejected": -0.2021932750940323, "step": 197 }, { "epoch": 0.3120567375886525, "grad_norm": 0.25397989153862, "learning_rate": 3.9630968656264285e-06, "log_odds_chosen": 0.42598769068717957, "log_odds_ratio": -0.509061872959137, "logits/chosen": 0.21715356409549713, "logits/rejected": -0.4817379415035248, "logps/chosen": -1.7002034187316895, "logps/rejected": -2.0615005493164062, "loss": 1.8609, "nll_loss": 1.810002326965332, "rewards/accuracies": 1.0, "rewards/chosen": -0.17002034187316895, "rewards/margins": 0.036129724234342575, "rewards/rejected": -0.20615006983280182, "step": 198 }, { "epoch": 0.3136327817178881, "grad_norm": 0.26781895756721497, "learning_rate": 3.962038331061065e-06, "log_odds_chosen": 0.1461435854434967, "log_odds_ratio": -0.6345757246017456, "logits/chosen": 0.28389307856559753, "logits/rejected": -0.6472858786582947, "logps/chosen": -1.7552485466003418, "logps/rejected": -1.883392333984375, "loss": 1.9384, "nll_loss": 1.8749713897705078, "rewards/accuracies": 0.625, "rewards/chosen": -0.17552484571933746, "rewards/margins": 0.012814389541745186, "rewards/rejected": -0.1883392333984375, "step": 199 }, { "epoch": 0.31520882584712373, "grad_norm": 0.26174989342689514, "learning_rate": 3.96096497423325e-06, "log_odds_chosen": 0.26020583510398865, "log_odds_ratio": -0.5759395360946655, "logits/chosen": 0.3217354714870453, "logits/rejected": -0.2811959683895111, "logps/chosen": -1.6606853008270264, "logps/rejected": -1.8771640062332153, "loss": 1.8637, "nll_loss": 1.806121826171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.16606852412223816, "rewards/margins": 0.021647876128554344, "rewards/rejected": -0.18771640956401825, "step": 200 }, { "epoch": 0.31678486997635935, "grad_norm": 0.24145232141017914, "learning_rate": 3.959876803251684e-06, "log_odds_chosen": 0.3890625238418579, "log_odds_ratio": -0.524219274520874, "logits/chosen": 0.1805240660905838, "logits/rejected": -0.5619311928749084, "logps/chosen": -1.6790175437927246, "logps/rejected": -2.00699782371521, "loss": 1.8352, "nll_loss": 1.782778263092041, "rewards/accuracies": 1.0, "rewards/chosen": -0.16790175437927246, "rewards/margins": 0.03279803693294525, "rewards/rejected": -0.2006998062133789, "step": 201 }, { "epoch": 0.31836091410559497, "grad_norm": 0.25001904368400574, "learning_rate": 3.958773826336977e-06, "log_odds_chosen": 0.27317214012145996, "log_odds_ratio": -0.5676698088645935, "logits/chosen": 0.27888408303260803, "logits/rejected": -0.415998250246048, "logps/chosen": -1.7196669578552246, "logps/rejected": -1.9488410949707031, "loss": 1.8963, "nll_loss": 1.8395037651062012, "rewards/accuracies": 1.0, "rewards/chosen": -0.17196668684482574, "rewards/margins": 0.02291741594672203, "rewards/rejected": -0.19488412141799927, "step": 202 }, { "epoch": 0.3199369582348306, "grad_norm": 0.2554585933685303, "learning_rate": 3.957656051821592e-06, "log_odds_chosen": 0.23165369033813477, "log_odds_ratio": -0.5892881751060486, "logits/chosen": 0.3021984100341797, "logits/rejected": -0.6948502063751221, "logps/chosen": -1.7531200647354126, "logps/rejected": -1.9506298303604126, "loss": 1.9028, "nll_loss": 1.8439098596572876, "rewards/accuracies": 0.875, "rewards/chosen": -0.17531201243400574, "rewards/margins": 0.019750984385609627, "rewards/rejected": -0.19506299495697021, "step": 203 }, { "epoch": 0.3215130023640662, "grad_norm": 0.26098933815956116, "learning_rate": 3.956523488149783e-06, "log_odds_chosen": 0.3122096061706543, "log_odds_ratio": -0.5535508394241333, "logits/chosen": 0.3232036828994751, "logits/rejected": -0.47777220606803894, "logps/chosen": -1.8531510829925537, "logps/rejected": -2.122044563293457, "loss": 2.0121, "nll_loss": 1.9567644596099854, "rewards/accuracies": 1.0, "rewards/chosen": -0.1853151172399521, "rewards/margins": 0.02688935585319996, "rewards/rejected": -0.2122044712305069, "step": 204 }, { "epoch": 0.32308904649330183, "grad_norm": 0.2530037462711334, "learning_rate": 3.9553761438775285e-06, "log_odds_chosen": 0.5448867678642273, "log_odds_ratio": -0.4623796045780182, "logits/chosen": 0.1774434596300125, "logits/rejected": -0.509428083896637, "logps/chosen": -1.5850623846054077, "logps/rejected": -2.0427145957946777, "loss": 1.747, "nll_loss": 1.7007546424865723, "rewards/accuracies": 1.0, "rewards/chosen": -0.15850622951984406, "rewards/margins": 0.04576525092124939, "rewards/rejected": -0.20427148044109344, "step": 205 }, { "epoch": 0.32466509062253746, "grad_norm": 0.23750121891498566, "learning_rate": 3.954214027672465e-06, "log_odds_chosen": 0.48206406831741333, "log_odds_ratio": -0.48811841011047363, "logits/chosen": 0.18673132359981537, "logits/rejected": -0.9018082022666931, "logps/chosen": -1.67806077003479, "logps/rejected": -2.0901315212249756, "loss": 1.8315, "nll_loss": 1.7826416492462158, "rewards/accuracies": 1.0, "rewards/chosen": -0.16780607402324677, "rewards/margins": 0.041207075119018555, "rewards/rejected": -0.2090131640434265, "step": 206 }, { "epoch": 0.3262411347517731, "grad_norm": 0.25891193747520447, "learning_rate": 3.953037148313825e-06, "log_odds_chosen": 0.28911662101745605, "log_odds_ratio": -0.5642296075820923, "logits/chosen": 0.27622562646865845, "logits/rejected": -0.581394612789154, "logps/chosen": -1.6668041944503784, "logps/rejected": -1.9111988544464111, "loss": 1.8538, "nll_loss": 1.7974015474319458, "rewards/accuracies": 0.875, "rewards/chosen": -0.16668042540550232, "rewards/margins": 0.024439461529254913, "rewards/rejected": -0.19111987948417664, "step": 207 }, { "epoch": 0.32781717888100864, "grad_norm": 0.2777508497238159, "learning_rate": 3.951845514692371e-06, "log_odds_chosen": 0.39269790053367615, "log_odds_ratio": -0.5250096321105957, "logits/chosen": 0.2141711413860321, "logits/rejected": -0.47686973214149475, "logps/chosen": -1.729377269744873, "logps/rejected": -2.064727783203125, "loss": 1.9033, "nll_loss": 1.8507862091064453, "rewards/accuracies": 1.0, "rewards/chosen": -0.17293773591518402, "rewards/margins": 0.03353503346443176, "rewards/rejected": -0.20647276937961578, "step": 208 }, { "epoch": 0.32939322301024426, "grad_norm": 0.2944742441177368, "learning_rate": 3.950639135810325e-06, "log_odds_chosen": 0.3137563467025757, "log_odds_ratio": -0.5539580583572388, "logits/chosen": 0.3174862265586853, "logits/rejected": -0.45376986265182495, "logps/chosen": -1.850959062576294, "logps/rejected": -2.1199288368225098, "loss": 1.9963, "nll_loss": 1.9408817291259766, "rewards/accuracies": 0.875, "rewards/chosen": -0.1850959062576294, "rewards/margins": 0.02689695730805397, "rewards/rejected": -0.21199287474155426, "step": 209 }, { "epoch": 0.3309692671394799, "grad_norm": 0.3148304224014282, "learning_rate": 3.9494180207813044e-06, "log_odds_chosen": 0.23787932097911835, "log_odds_ratio": -0.5915074348449707, "logits/chosen": 0.30660662055015564, "logits/rejected": -0.7329879403114319, "logps/chosen": -1.717742919921875, "logps/rejected": -1.916438102722168, "loss": 1.8905, "nll_loss": 1.8313184976577759, "rewards/accuracies": 0.875, "rewards/chosen": -0.17177429795265198, "rewards/margins": 0.019869530573487282, "rewards/rejected": -0.1916438341140747, "step": 210 }, { "epoch": 0.3325453112687155, "grad_norm": 0.2588319778442383, "learning_rate": 3.948182178830249e-06, "log_odds_chosen": 0.29779791831970215, "log_odds_ratio": -0.5746859908103943, "logits/chosen": 0.2758365273475647, "logits/rejected": -0.8558934926986694, "logps/chosen": -1.7486122846603394, "logps/rejected": -2.0061802864074707, "loss": 1.904, "nll_loss": 1.8465510606765747, "rewards/accuracies": 0.75, "rewards/chosen": -0.17486125230789185, "rewards/margins": 0.025756794959306717, "rewards/rejected": -0.20061802864074707, "step": 211 }, { "epoch": 0.3341213553979511, "grad_norm": 0.27347490191459656, "learning_rate": 3.9469316192933545e-06, "log_odds_chosen": 0.5377534627914429, "log_odds_ratio": -0.4662073850631714, "logits/chosen": 0.16830803453922272, "logits/rejected": -0.490761935710907, "logps/chosen": -1.6018506288528442, "logps/rejected": -2.05351185798645, "loss": 1.7708, "nll_loss": 1.7242186069488525, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601850688457489, "rewards/margins": 0.04516611993312836, "rewards/rejected": -0.20535118877887726, "step": 212 }, { "epoch": 0.33569739952718675, "grad_norm": 0.2533399164676666, "learning_rate": 3.945666351618001e-06, "log_odds_chosen": 0.2882734537124634, "log_odds_ratio": -0.562584638595581, "logits/chosen": 0.2559697926044464, "logits/rejected": -0.5387569069862366, "logps/chosen": -1.6552236080169678, "logps/rejected": -1.8957173824310303, "loss": 1.823, "nll_loss": 1.7667841911315918, "rewards/accuracies": 1.0, "rewards/chosen": -0.16552235186100006, "rewards/margins": 0.02404937893152237, "rewards/rejected": -0.18957173824310303, "step": 213 }, { "epoch": 0.33727344365642237, "grad_norm": 0.25932711362838745, "learning_rate": 3.9443863853626825e-06, "log_odds_chosen": 0.3108881711959839, "log_odds_ratio": -0.5589509010314941, "logits/chosen": 0.2859116494655609, "logits/rejected": -0.6345618367195129, "logps/chosen": -1.6215680837631226, "logps/rejected": -1.8729337453842163, "loss": 1.8125, "nll_loss": 1.7566089630126953, "rewards/accuracies": 0.875, "rewards/chosen": -0.1621568202972412, "rewards/margins": 0.025136563926935196, "rewards/rejected": -0.1872933804988861, "step": 214 }, { "epoch": 0.338849487785658, "grad_norm": 0.250918984413147, "learning_rate": 3.943091730196931e-06, "log_odds_chosen": 0.2744632363319397, "log_odds_ratio": -0.5730749368667603, "logits/chosen": 0.3415977954864502, "logits/rejected": -0.41705650091171265, "logps/chosen": -1.6209944486618042, "logps/rejected": -1.8511853218078613, "loss": 1.7975, "nll_loss": 1.740174651145935, "rewards/accuracies": 1.0, "rewards/chosen": -0.1620994359254837, "rewards/margins": 0.023019105195999146, "rewards/rejected": -0.18511852622032166, "step": 215 }, { "epoch": 0.3404255319148936, "grad_norm": 0.27474015951156616, "learning_rate": 3.941782395901249e-06, "log_odds_chosen": 0.3491838574409485, "log_odds_ratio": -0.5398426055908203, "logits/chosen": 0.2213699072599411, "logits/rejected": -0.549167811870575, "logps/chosen": -1.69877028465271, "logps/rejected": -1.9932255744934082, "loss": 1.8755, "nll_loss": 1.8215399980545044, "rewards/accuracies": 0.875, "rewards/chosen": -0.16987702250480652, "rewards/margins": 0.02944553829729557, "rewards/rejected": -0.19932258129119873, "step": 216 }, { "epoch": 0.34200157604412923, "grad_norm": 0.25679028034210205, "learning_rate": 3.940458392367032e-06, "log_odds_chosen": 0.3104505240917206, "log_odds_ratio": -0.5546755194664001, "logits/chosen": 0.2595285475254059, "logits/rejected": -0.8533272743225098, "logps/chosen": -1.7510833740234375, "logps/rejected": -2.0138750076293945, "loss": 1.8934, "nll_loss": 1.8379460573196411, "rewards/accuracies": 1.0, "rewards/chosen": -0.17510835826396942, "rewards/margins": 0.026279138401150703, "rewards/rejected": -0.20138749480247498, "step": 217 }, { "epoch": 0.34357762017336485, "grad_norm": 0.2611715495586395, "learning_rate": 3.939119729596493e-06, "log_odds_chosen": 0.35739466547966003, "log_odds_ratio": -0.5335213541984558, "logits/chosen": 0.3199889659881592, "logits/rejected": -0.6550436019897461, "logps/chosen": -1.7533951997756958, "logps/rejected": -2.057891368865967, "loss": 1.9075, "nll_loss": 1.8541964292526245, "rewards/accuracies": 1.0, "rewards/chosen": -0.17533953487873077, "rewards/margins": 0.03044959530234337, "rewards/rejected": -0.20578913390636444, "step": 218 }, { "epoch": 0.34515366430260047, "grad_norm": 0.26392099261283875, "learning_rate": 3.93776641770259e-06, "log_odds_chosen": 0.2948194742202759, "log_odds_ratio": -0.562857449054718, "logits/chosen": 0.28813865780830383, "logits/rejected": -0.7187209129333496, "logps/chosen": -1.7230334281921387, "logps/rejected": -1.973144292831421, "loss": 1.8747, "nll_loss": 1.8184067010879517, "rewards/accuracies": 1.0, "rewards/chosen": -0.17230333387851715, "rewards/margins": 0.025011096149683, "rewards/rejected": -0.19731444120407104, "step": 219 }, { "epoch": 0.3467297084318361, "grad_norm": 0.26364782452583313, "learning_rate": 3.93639846690895e-06, "log_odds_chosen": 0.4254459738731384, "log_odds_ratio": -0.5071126222610474, "logits/chosen": 0.31134355068206787, "logits/rejected": -0.9344062805175781, "logps/chosen": -1.7416664361953735, "logps/rejected": -2.10628080368042, "loss": 1.8893, "nll_loss": 1.8385860919952393, "rewards/accuracies": 1.0, "rewards/chosen": -0.17416664958000183, "rewards/margins": 0.03646141290664673, "rewards/rejected": -0.21062806248664856, "step": 220 }, { "epoch": 0.3483057525610717, "grad_norm": 0.2517707943916321, "learning_rate": 3.9350158875497855e-06, "log_odds_chosen": 0.5023624300956726, "log_odds_ratio": -0.4771158695220947, "logits/chosen": 0.31009235978126526, "logits/rejected": -0.771135687828064, "logps/chosen": -1.6524683237075806, "logps/rejected": -2.075108289718628, "loss": 1.8136, "nll_loss": 1.765908122062683, "rewards/accuracies": 1.0, "rewards/chosen": -0.165246844291687, "rewards/margins": 0.04226400703191757, "rewards/rejected": -0.20751085877418518, "step": 221 }, { "epoch": 0.34988179669030733, "grad_norm": 0.24314001202583313, "learning_rate": 3.933618690069824e-06, "log_odds_chosen": 0.4676588773727417, "log_odds_ratio": -0.4905795454978943, "logits/chosen": 0.17770220339298248, "logits/rejected": -0.7084282636642456, "logps/chosen": -1.6020326614379883, "logps/rejected": -1.9884483814239502, "loss": 1.7733, "nll_loss": 1.7242058515548706, "rewards/accuracies": 1.0, "rewards/chosen": -0.16020327806472778, "rewards/margins": 0.038641560822725296, "rewards/rejected": -0.19884483516216278, "step": 222 }, { "epoch": 0.35145784081954295, "grad_norm": 0.2550933361053467, "learning_rate": 3.932206885024226e-06, "log_odds_chosen": 0.23466067016124725, "log_odds_ratio": -0.5852788090705872, "logits/chosen": 0.22302168607711792, "logits/rejected": -0.6709792017936707, "logps/chosen": -1.7109097242355347, "logps/rejected": -1.9076846837997437, "loss": 1.8546, "nll_loss": 1.7960247993469238, "rewards/accuracies": 0.875, "rewards/chosen": -0.1710909754037857, "rewards/margins": 0.019677501171827316, "rewards/rejected": -0.19076848030090332, "step": 223 }, { "epoch": 0.3530338849487786, "grad_norm": 0.22619383037090302, "learning_rate": 3.930780483078502e-06, "log_odds_chosen": 0.3614213466644287, "log_odds_ratio": -0.5334374904632568, "logits/chosen": 0.1996021270751953, "logits/rejected": -0.7123557329177856, "logps/chosen": -1.6352108716964722, "logps/rejected": -1.9351240396499634, "loss": 1.807, "nll_loss": 1.7537031173706055, "rewards/accuracies": 1.0, "rewards/chosen": -0.16352108120918274, "rewards/margins": 0.02999131567776203, "rewards/rejected": -0.19351240992546082, "step": 224 }, { "epoch": 0.3546099290780142, "grad_norm": 0.2543198764324188, "learning_rate": 3.92933949500844e-06, "log_odds_chosen": 0.39015451073646545, "log_odds_ratio": -0.5326002836227417, "logits/chosen": 0.28930386900901794, "logits/rejected": -0.7000013589859009, "logps/chosen": -1.609468936920166, "logps/rejected": -1.9277451038360596, "loss": 1.7804, "nll_loss": 1.727099895477295, "rewards/accuracies": 0.75, "rewards/chosen": -0.16094687581062317, "rewards/margins": 0.031827617436647415, "rewards/rejected": -0.19277450442314148, "step": 225 }, { "epoch": 0.3561859732072498, "grad_norm": 0.23875081539154053, "learning_rate": 3.9278839317000155e-06, "log_odds_chosen": 0.40506240725517273, "log_odds_ratio": -0.5156422853469849, "logits/chosen": 0.30614709854125977, "logits/rejected": -0.6908950805664062, "logps/chosen": -1.635833978652954, "logps/rejected": -1.9742740392684937, "loss": 1.7931, "nll_loss": 1.7415223121643066, "rewards/accuracies": 1.0, "rewards/chosen": -0.1635833978652954, "rewards/margins": 0.0338440015912056, "rewards/rejected": -0.1974273920059204, "step": 226 }, { "epoch": 0.35776201733648544, "grad_norm": 0.24680069088935852, "learning_rate": 3.926413804149314e-06, "log_odds_chosen": 0.1316530406475067, "log_odds_ratio": -0.6378493309020996, "logits/chosen": 0.28156548738479614, "logits/rejected": -0.6600248217582703, "logps/chosen": -1.7210869789123535, "logps/rejected": -1.8304848670959473, "loss": 1.8788, "nll_loss": 1.815049171447754, "rewards/accuracies": 0.625, "rewards/chosen": -0.1721086949110031, "rewards/margins": 0.010939793661236763, "rewards/rejected": -0.18304848670959473, "step": 227 }, { "epoch": 0.35933806146572106, "grad_norm": 0.2515822649002075, "learning_rate": 3.924929123462447e-06, "log_odds_chosen": 0.2854158878326416, "log_odds_ratio": -0.5666006207466125, "logits/chosen": 0.33641237020492554, "logits/rejected": -0.7290589809417725, "logps/chosen": -1.7506561279296875, "logps/rejected": -1.9922963380813599, "loss": 1.8996, "nll_loss": 1.84292733669281, "rewards/accuracies": 0.875, "rewards/chosen": -0.17506560683250427, "rewards/margins": 0.024164030328392982, "rewards/rejected": -0.1992296427488327, "step": 228 }, { "epoch": 0.3609141055949567, "grad_norm": 0.26427891850471497, "learning_rate": 3.923429900855468e-06, "log_odds_chosen": 0.21544109284877777, "log_odds_ratio": -0.5983462333679199, "logits/chosen": 0.2909620702266693, "logits/rejected": -0.438875675201416, "logps/chosen": -1.8233458995819092, "logps/rejected": -2.004505157470703, "loss": 1.9767, "nll_loss": 1.9168180227279663, "rewards/accuracies": 0.75, "rewards/chosen": -0.18233460187911987, "rewards/margins": 0.018115926533937454, "rewards/rejected": -0.20045052468776703, "step": 229 }, { "epoch": 0.3624901497241923, "grad_norm": 0.241230309009552, "learning_rate": 3.921916147654287e-06, "log_odds_chosen": 0.4597613215446472, "log_odds_ratio": -0.49515679478645325, "logits/chosen": 0.24242226779460907, "logits/rejected": -0.7508520483970642, "logps/chosen": -1.6597049236297607, "logps/rejected": -2.044846773147583, "loss": 1.7986, "nll_loss": 1.7491000890731812, "rewards/accuracies": 1.0, "rewards/chosen": -0.16597048938274384, "rewards/margins": 0.03851418197154999, "rewards/rejected": -0.20448468625545502, "step": 230 }, { "epoch": 0.3640661938534279, "grad_norm": 0.2590387761592865, "learning_rate": 3.920387875294588e-06, "log_odds_chosen": 0.30678579211235046, "log_odds_ratio": -0.5545358657836914, "logits/chosen": 0.2767443060874939, "logits/rejected": -0.5900214910507202, "logps/chosen": -1.6316543817520142, "logps/rejected": -1.8850141763687134, "loss": 1.7885, "nll_loss": 1.7330236434936523, "rewards/accuracies": 1.0, "rewards/chosen": -0.16316545009613037, "rewards/margins": 0.02533598057925701, "rewards/rejected": -0.18850143253803253, "step": 231 }, { "epoch": 0.3656422379826635, "grad_norm": 0.24193865060806274, "learning_rate": 3.918845095321737e-06, "log_odds_chosen": 0.29498574137687683, "log_odds_ratio": -0.5670905113220215, "logits/chosen": 0.23279045522212982, "logits/rejected": -0.841462254524231, "logps/chosen": -1.6641700267791748, "logps/rejected": -1.9037166833877563, "loss": 1.8125, "nll_loss": 1.755805492401123, "rewards/accuracies": 0.875, "rewards/chosen": -0.16641701757907867, "rewards/margins": 0.023954641073942184, "rewards/rejected": -0.19037166237831116, "step": 232 }, { "epoch": 0.3672182821118991, "grad_norm": 0.2594136595726013, "learning_rate": 3.9172878193907004e-06, "log_odds_chosen": 0.3280632793903351, "log_odds_ratio": -0.551827609539032, "logits/chosen": 0.18091654777526855, "logits/rejected": -0.6931451559066772, "logps/chosen": -1.7367044687271118, "logps/rejected": -2.016188859939575, "loss": 1.8981, "nll_loss": 1.8429385423660278, "rewards/accuracies": 0.75, "rewards/chosen": -0.1736704558134079, "rewards/margins": 0.02794845588505268, "rewards/rejected": -0.20161890983581543, "step": 233 }, { "epoch": 0.36879432624113473, "grad_norm": 0.2600264549255371, "learning_rate": 3.915716059265955e-06, "log_odds_chosen": 0.34488314390182495, "log_odds_ratio": -0.543495774269104, "logits/chosen": 0.25614839792251587, "logits/rejected": -0.785013735294342, "logps/chosen": -1.6906204223632812, "logps/rejected": -1.9822051525115967, "loss": 1.8406, "nll_loss": 1.7862180471420288, "rewards/accuracies": 0.75, "rewards/chosen": -0.1690620481967926, "rewards/margins": 0.02915847674012184, "rewards/rejected": -0.19822052121162415, "step": 234 }, { "epoch": 0.37037037037037035, "grad_norm": 0.22871002554893494, "learning_rate": 3.9141298268213966e-06, "log_odds_chosen": 0.4361989200115204, "log_odds_ratio": -0.5028459429740906, "logits/chosen": 0.2875756621360779, "logits/rejected": -0.6794713735580444, "logps/chosen": -1.6101529598236084, "logps/rejected": -1.9754250049591064, "loss": 1.7655, "nll_loss": 1.7152522802352905, "rewards/accuracies": 1.0, "rewards/chosen": -0.16101528704166412, "rewards/margins": 0.03652720898389816, "rewards/rejected": -0.19754250347614288, "step": 235 }, { "epoch": 0.37194641449960597, "grad_norm": 0.261535108089447, "learning_rate": 3.912529134040255e-06, "log_odds_chosen": 0.296495646238327, "log_odds_ratio": -0.5591344833374023, "logits/chosen": 0.27751684188842773, "logits/rejected": -0.5663548707962036, "logps/chosen": -1.6922770738601685, "logps/rejected": -1.9417719841003418, "loss": 1.8539, "nll_loss": 1.7979625463485718, "rewards/accuracies": 1.0, "rewards/chosen": -0.1692277193069458, "rewards/margins": 0.02494947426021099, "rewards/rejected": -0.19417718052864075, "step": 236 }, { "epoch": 0.3735224586288416, "grad_norm": 0.2609320878982544, "learning_rate": 3.910913993014998e-06, "log_odds_chosen": 0.4593818187713623, "log_odds_ratio": -0.4922163188457489, "logits/chosen": 0.3688610792160034, "logits/rejected": -0.7247602939605713, "logps/chosen": -1.687050223350525, "logps/rejected": -2.0763425827026367, "loss": 1.8363, "nll_loss": 1.7870500087738037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1687050312757492, "rewards/margins": 0.038929231464862823, "rewards/rejected": -0.20763425529003143, "step": 237 }, { "epoch": 0.3750985027580772, "grad_norm": 0.25336939096450806, "learning_rate": 3.909284415947246e-06, "log_odds_chosen": 0.4314580261707306, "log_odds_ratio": -0.5070245862007141, "logits/chosen": 0.32482269406318665, "logits/rejected": -0.8647500276565552, "logps/chosen": -1.73568594455719, "logps/rejected": -2.101022243499756, "loss": 1.87, "nll_loss": 1.819305419921875, "rewards/accuracies": 1.0, "rewards/chosen": -0.17356860637664795, "rewards/margins": 0.036533623933792114, "rewards/rejected": -0.21010223031044006, "step": 238 }, { "epoch": 0.37667454688731283, "grad_norm": 0.24124158918857574, "learning_rate": 3.907640415147674e-06, "log_odds_chosen": 0.4658309817314148, "log_odds_ratio": -0.49067068099975586, "logits/chosen": 0.21756987273693085, "logits/rejected": -0.9139111042022705, "logps/chosen": -1.6312497854232788, "logps/rejected": -2.020339250564575, "loss": 1.788, "nll_loss": 1.7389440536499023, "rewards/accuracies": 1.0, "rewards/chosen": -0.16312497854232788, "rewards/margins": 0.038908950984478, "rewards/rejected": -0.20203393697738647, "step": 239 }, { "epoch": 0.37825059101654845, "grad_norm": 0.2780405282974243, "learning_rate": 3.905982003035924e-06, "log_odds_chosen": 0.3844011127948761, "log_odds_ratio": -0.5241358280181885, "logits/chosen": 0.23867914080619812, "logits/rejected": -0.8461136817932129, "logps/chosen": -1.7408959865570068, "logps/rejected": -2.067878246307373, "loss": 1.8775, "nll_loss": 1.825044870376587, "rewards/accuracies": 1.0, "rewards/chosen": -0.17408961057662964, "rewards/margins": 0.03269820660352707, "rewards/rejected": -0.2067878246307373, "step": 240 }, { "epoch": 0.3798266351457841, "grad_norm": 0.24185331165790558, "learning_rate": 3.904309192140506e-06, "log_odds_chosen": 0.278605192899704, "log_odds_ratio": -0.571584165096283, "logits/chosen": 0.24360942840576172, "logits/rejected": -0.4919203519821167, "logps/chosen": -1.6554006338119507, "logps/rejected": -1.8880599737167358, "loss": 1.8086, "nll_loss": 1.7514057159423828, "rewards/accuracies": 0.875, "rewards/chosen": -0.16554008424282074, "rewards/margins": 0.023265928030014038, "rewards/rejected": -0.18880601227283478, "step": 241 }, { "epoch": 0.3814026792750197, "grad_norm": 0.26548993587493896, "learning_rate": 3.90262199509871e-06, "log_odds_chosen": 0.2826857268810272, "log_odds_ratio": -0.5648576021194458, "logits/chosen": 0.22797901928424835, "logits/rejected": -0.5838393568992615, "logps/chosen": -1.6858417987823486, "logps/rejected": -1.9206804037094116, "loss": 1.8445, "nll_loss": 1.7879817485809326, "rewards/accuracies": 1.0, "rewards/chosen": -0.1685841828584671, "rewards/margins": 0.023483866825699806, "rewards/rejected": -0.19206805527210236, "step": 242 }, { "epoch": 0.3829787234042553, "grad_norm": 0.2655971944332123, "learning_rate": 3.900920424656501e-06, "log_odds_chosen": 0.4220297932624817, "log_odds_ratio": -0.51133131980896, "logits/chosen": 0.2471286654472351, "logits/rejected": -0.7664991021156311, "logps/chosen": -1.6024655103683472, "logps/rejected": -1.953262209892273, "loss": 1.7496, "nll_loss": 1.6984376907348633, "rewards/accuracies": 0.875, "rewards/chosen": -0.16024655103683472, "rewards/margins": 0.035079680383205414, "rewards/rejected": -0.19532622396945953, "step": 243 }, { "epoch": 0.38455476753349094, "grad_norm": 0.2719763517379761, "learning_rate": 3.899204493668432e-06, "log_odds_chosen": 0.34531235694885254, "log_odds_ratio": -0.545647382736206, "logits/chosen": 0.2916204631328583, "logits/rejected": -0.6753207445144653, "logps/chosen": -1.760868787765503, "logps/rejected": -2.058903694152832, "loss": 1.8996, "nll_loss": 1.8450438976287842, "rewards/accuracies": 0.75, "rewards/chosen": -0.17608687281608582, "rewards/margins": 0.029803497716784477, "rewards/rejected": -0.20589037239551544, "step": 244 }, { "epoch": 0.38613081166272656, "grad_norm": 0.26759397983551025, "learning_rate": 3.897474215097542e-06, "log_odds_chosen": 0.34969383478164673, "log_odds_ratio": -0.5378445386886597, "logits/chosen": 0.30676940083503723, "logits/rejected": -0.9074739217758179, "logps/chosen": -1.7014522552490234, "logps/rejected": -1.9977072477340698, "loss": 1.8594, "nll_loss": 1.805631160736084, "rewards/accuracies": 1.0, "rewards/chosen": -0.1701452136039734, "rewards/margins": 0.029625503346323967, "rewards/rejected": -0.1997707188129425, "step": 245 }, { "epoch": 0.3877068557919622, "grad_norm": 0.24695803225040436, "learning_rate": 3.8957296020152596e-06, "log_odds_chosen": 0.4582657217979431, "log_odds_ratio": -0.4948864281177521, "logits/chosen": 0.3029223680496216, "logits/rejected": -0.7880758047103882, "logps/chosen": -1.6669402122497559, "logps/rejected": -2.054736614227295, "loss": 1.8203, "nll_loss": 1.7708466053009033, "rewards/accuracies": 1.0, "rewards/chosen": -0.1666940301656723, "rewards/margins": 0.03877962380647659, "rewards/rejected": -0.2054736316204071, "step": 246 }, { "epoch": 0.3892828999211978, "grad_norm": 0.2638940215110779, "learning_rate": 3.893970667601303e-06, "log_odds_chosen": 0.3049715757369995, "log_odds_ratio": -0.5559093952178955, "logits/chosen": 0.20208218693733215, "logits/rejected": -1.0700570344924927, "logps/chosen": -1.7513179779052734, "logps/rejected": -2.0092613697052, "loss": 1.8901, "nll_loss": 1.83454430103302, "rewards/accuracies": 1.0, "rewards/chosen": -0.17513179779052734, "rewards/margins": 0.02579433098435402, "rewards/rejected": -0.20092612504959106, "step": 247 }, { "epoch": 0.3908589440504334, "grad_norm": 0.260355681180954, "learning_rate": 3.892197425143581e-06, "log_odds_chosen": 0.4682731330394745, "log_odds_ratio": -0.48940151929855347, "logits/chosen": 0.255657821893692, "logits/rejected": -0.671928882598877, "logps/chosen": -1.6768563985824585, "logps/rejected": -2.071852922439575, "loss": 1.8127, "nll_loss": 1.763710618019104, "rewards/accuracies": 1.0, "rewards/chosen": -0.16768565773963928, "rewards/margins": 0.03949964791536331, "rewards/rejected": -0.2071852833032608, "step": 248 }, { "epoch": 0.39243498817966904, "grad_norm": 0.2769114673137665, "learning_rate": 3.890409888038094e-06, "log_odds_chosen": 0.35800671577453613, "log_odds_ratio": -0.5353755354881287, "logits/chosen": 0.27717655897140503, "logits/rejected": -0.6495774984359741, "logps/chosen": -1.718163013458252, "logps/rejected": -2.023691415786743, "loss": 1.8664, "nll_loss": 1.812865138053894, "rewards/accuracies": 1.0, "rewards/chosen": -0.17181627452373505, "rewards/margins": 0.030552847310900688, "rewards/rejected": -0.20236913859844208, "step": 249 }, { "epoch": 0.39401103230890466, "grad_norm": 0.2523289620876312, "learning_rate": 3.888608069788831e-06, "log_odds_chosen": 0.3482479751110077, "log_odds_ratio": -0.5456973314285278, "logits/chosen": 0.18573682010173798, "logits/rejected": -0.5659343600273132, "logps/chosen": -1.672256350517273, "logps/rejected": -1.9685872793197632, "loss": 1.8214, "nll_loss": 1.7668198347091675, "rewards/accuracies": 0.875, "rewards/chosen": -0.16722562909126282, "rewards/margins": 0.029633095487952232, "rewards/rejected": -0.1968587189912796, "step": 250 }, { "epoch": 0.3955870764381403, "grad_norm": 0.23957766592502594, "learning_rate": 3.8867919840076685e-06, "log_odds_chosen": 0.4184243083000183, "log_odds_ratio": -0.5095526576042175, "logits/chosen": 0.22751504182815552, "logits/rejected": -0.8524928092956543, "logps/chosen": -1.6037871837615967, "logps/rejected": -1.9516103267669678, "loss": 1.7432, "nll_loss": 1.692209005355835, "rewards/accuracies": 1.0, "rewards/chosen": -0.16037872433662415, "rewards/margins": 0.03478231281042099, "rewards/rejected": -0.19516104459762573, "step": 251 }, { "epoch": 0.3971631205673759, "grad_norm": 0.2576284110546112, "learning_rate": 3.884961644414267e-06, "log_odds_chosen": 0.34130245447158813, "log_odds_ratio": -0.5387775897979736, "logits/chosen": 0.24311238527297974, "logits/rejected": -0.3957245647907257, "logps/chosen": -1.7268153429031372, "logps/rejected": -2.0152997970581055, "loss": 1.848, "nll_loss": 1.794105887413025, "rewards/accuracies": 1.0, "rewards/chosen": -0.1726815402507782, "rewards/margins": 0.028848443180322647, "rewards/rejected": -0.20152997970581055, "step": 252 }, { "epoch": 0.3987391646966115, "grad_norm": 0.24223408102989197, "learning_rate": 3.883117064835967e-06, "log_odds_chosen": 0.3959362506866455, "log_odds_ratio": -0.5255994200706482, "logits/chosen": 0.1985481083393097, "logits/rejected": -0.7414513230323792, "logps/chosen": -1.6674610376358032, "logps/rejected": -2.0037710666656494, "loss": 1.8186, "nll_loss": 1.7660025358200073, "rewards/accuracies": 0.875, "rewards/chosen": -0.1667460948228836, "rewards/margins": 0.033631011843681335, "rewards/rejected": -0.20037710666656494, "step": 253 }, { "epoch": 0.40031520882584715, "grad_norm": 0.26332396268844604, "learning_rate": 3.881258259207688e-06, "log_odds_chosen": 0.3393678367137909, "log_odds_ratio": -0.5432149171829224, "logits/chosen": 0.30877983570098877, "logits/rejected": -0.7882847785949707, "logps/chosen": -1.6376413106918335, "logps/rejected": -1.919863224029541, "loss": 1.7661, "nll_loss": 1.7118016481399536, "rewards/accuracies": 0.875, "rewards/chosen": -0.1637641191482544, "rewards/margins": 0.028222184628248215, "rewards/rejected": -0.1919863075017929, "step": 254 }, { "epoch": 0.40189125295508277, "grad_norm": 0.25575923919677734, "learning_rate": 3.8793852415718165e-06, "log_odds_chosen": 0.4229394495487213, "log_odds_ratio": -0.5054378509521484, "logits/chosen": 0.21786652505397797, "logits/rejected": -1.0462470054626465, "logps/chosen": -1.561218023300171, "logps/rejected": -1.9078154563903809, "loss": 1.694, "nll_loss": 1.6435015201568604, "rewards/accuracies": 1.0, "rewards/chosen": -0.15612182021141052, "rewards/margins": 0.0346597358584404, "rewards/rejected": -0.19078153371810913, "step": 255 }, { "epoch": 0.4034672970843184, "grad_norm": 0.24122343957424164, "learning_rate": 3.877498026078107e-06, "log_odds_chosen": 0.2585289180278778, "log_odds_ratio": -0.5783178210258484, "logits/chosen": 0.22445496916770935, "logits/rejected": -0.9274032711982727, "logps/chosen": -1.7019916772842407, "logps/rejected": -1.9216302633285522, "loss": 1.8365, "nll_loss": 1.7786757946014404, "rewards/accuracies": 0.75, "rewards/chosen": -0.1701991707086563, "rewards/margins": 0.02196386270225048, "rewards/rejected": -0.19216305017471313, "step": 256 }, { "epoch": 0.40504334121355395, "grad_norm": 0.25378501415252686, "learning_rate": 3.875596626983573e-06, "log_odds_chosen": 0.48093894124031067, "log_odds_ratio": -0.4933563768863678, "logits/chosen": 0.24534080922603607, "logits/rejected": -0.61388099193573, "logps/chosen": -1.6251795291900635, "logps/rejected": -2.02630352973938, "loss": 1.7574, "nll_loss": 1.708074688911438, "rewards/accuracies": 1.0, "rewards/chosen": -0.1625179499387741, "rewards/margins": 0.040112413465976715, "rewards/rejected": -0.20263037085533142, "step": 257 }, { "epoch": 0.4066193853427896, "grad_norm": 0.26203182339668274, "learning_rate": 3.873681058652374e-06, "log_odds_chosen": 0.27626505494117737, "log_odds_ratio": -0.5716174244880676, "logits/chosen": 0.2242291271686554, "logits/rejected": -0.705990731716156, "logps/chosen": -1.742016077041626, "logps/rejected": -1.976877212524414, "loss": 1.8763, "nll_loss": 1.819146990776062, "rewards/accuracies": 1.0, "rewards/chosen": -0.1742016226053238, "rewards/margins": 0.023486101999878883, "rewards/rejected": -0.19768773019313812, "step": 258 }, { "epoch": 0.4081954294720252, "grad_norm": 0.26595333218574524, "learning_rate": 3.871751335555715e-06, "log_odds_chosen": 0.26442596316337585, "log_odds_ratio": -0.5714837312698364, "logits/chosen": 0.20778290927410126, "logits/rejected": -0.8641613125801086, "logps/chosen": -1.6800938844680786, "logps/rejected": -1.9005059003829956, "loss": 1.809, "nll_loss": 1.7518802881240845, "rewards/accuracies": 1.0, "rewards/chosen": -0.16800937056541443, "rewards/margins": 0.0220412015914917, "rewards/rejected": -0.19005057215690613, "step": 259 }, { "epoch": 0.4097714736012608, "grad_norm": 0.23981213569641113, "learning_rate": 3.869807472271731e-06, "log_odds_chosen": 0.3423271179199219, "log_odds_ratio": -0.5396283864974976, "logits/chosen": 0.23563726246356964, "logits/rejected": -0.7463378310203552, "logps/chosen": -1.6491752862930298, "logps/rejected": -1.9362696409225464, "loss": 1.8109, "nll_loss": 1.7569705247879028, "rewards/accuracies": 1.0, "rewards/chosen": -0.16491752862930298, "rewards/margins": 0.028709445148706436, "rewards/rejected": -0.19362697005271912, "step": 260 }, { "epoch": 0.41134751773049644, "grad_norm": 0.24014748632907867, "learning_rate": 3.8678494834853826e-06, "log_odds_chosen": 0.34554576873779297, "log_odds_ratio": -0.5426000356674194, "logits/chosen": 0.20350177586078644, "logits/rejected": -0.6877405643463135, "logps/chosen": -1.6484317779541016, "logps/rejected": -1.934998631477356, "loss": 1.7826, "nll_loss": 1.728297472000122, "rewards/accuracies": 0.875, "rewards/chosen": -0.16484320163726807, "rewards/margins": 0.028656674548983574, "rewards/rejected": -0.1934998780488968, "step": 261 }, { "epoch": 0.41292356185973206, "grad_norm": 0.27845731377601624, "learning_rate": 3.865877383988339e-06, "log_odds_chosen": 0.41938862204551697, "log_odds_ratio": -0.5120880603790283, "logits/chosen": 0.20454849302768707, "logits/rejected": -0.6759002208709717, "logps/chosen": -1.6775566339492798, "logps/rejected": -2.0273048877716064, "loss": 1.8329, "nll_loss": 1.7817234992980957, "rewards/accuracies": 1.0, "rewards/chosen": -0.16775566339492798, "rewards/margins": 0.0349748432636261, "rewards/rejected": -0.20273049175739288, "step": 262 }, { "epoch": 0.4144996059889677, "grad_norm": 0.2552221417427063, "learning_rate": 3.863891188678869e-06, "log_odds_chosen": 0.16435928642749786, "log_odds_ratio": -0.6247628927230835, "logits/chosen": 0.17906969785690308, "logits/rejected": -0.8283835649490356, "logps/chosen": -1.5928330421447754, "logps/rejected": -1.7304943799972534, "loss": 1.7381, "nll_loss": 1.6756339073181152, "rewards/accuracies": 0.75, "rewards/chosen": -0.1592833399772644, "rewards/margins": 0.013766113668680191, "rewards/rejected": -0.1730494201183319, "step": 263 }, { "epoch": 0.4160756501182033, "grad_norm": 0.24987082183361053, "learning_rate": 3.8618909125617305e-06, "log_odds_chosen": 0.5285029411315918, "log_odds_ratio": -0.4670139253139496, "logits/chosen": 0.16765527427196503, "logits/rejected": -1.05913507938385, "logps/chosen": -1.5894949436187744, "logps/rejected": -2.031071662902832, "loss": 1.7407, "nll_loss": 1.693982720375061, "rewards/accuracies": 1.0, "rewards/chosen": -0.15894947946071625, "rewards/margins": 0.04415770620107651, "rewards/rejected": -0.20310717821121216, "step": 264 }, { "epoch": 0.4176516942474389, "grad_norm": 0.24382364749908447, "learning_rate": 3.859876570748054e-06, "log_odds_chosen": 0.3537558913230896, "log_odds_ratio": -0.5339797735214233, "logits/chosen": 0.2316458523273468, "logits/rejected": -0.3801443874835968, "logps/chosen": -1.6417627334594727, "logps/rejected": -1.9363051652908325, "loss": 1.792, "nll_loss": 1.7385859489440918, "rewards/accuracies": 1.0, "rewards/chosen": -0.16417627036571503, "rewards/margins": 0.02945425920188427, "rewards/rejected": -0.19363053143024445, "step": 265 }, { "epoch": 0.41922773837667454, "grad_norm": 0.23967225849628448, "learning_rate": 3.857848178455231e-06, "log_odds_chosen": 0.11959446966648102, "log_odds_ratio": -0.6412428617477417, "logits/chosen": 0.2559909522533417, "logits/rejected": -1.0443730354309082, "logps/chosen": -1.656022071838379, "logps/rejected": -1.7599290609359741, "loss": 1.8213, "nll_loss": 1.7571834325790405, "rewards/accuracies": 0.75, "rewards/chosen": -0.16560222208499908, "rewards/margins": 0.010390684939920902, "rewards/rejected": -0.1759929060935974, "step": 266 }, { "epoch": 0.42080378250591016, "grad_norm": 0.27286192774772644, "learning_rate": 3.855805751006794e-06, "log_odds_chosen": 0.4806629419326782, "log_odds_ratio": -0.49105098843574524, "logits/chosen": 0.2224351465702057, "logits/rejected": -1.0106374025344849, "logps/chosen": -1.6792700290679932, "logps/rejected": -2.0848042964935303, "loss": 1.8049, "nll_loss": 1.7558059692382812, "rewards/accuracies": 1.0, "rewards/chosen": -0.16792699694633484, "rewards/margins": 0.04055342823266983, "rewards/rejected": -0.20848044753074646, "step": 267 }, { "epoch": 0.4223798266351458, "grad_norm": 0.22964340448379517, "learning_rate": 3.853749303832308e-06, "log_odds_chosen": 0.34756800532341003, "log_odds_ratio": -0.5398483872413635, "logits/chosen": 0.19423283636569977, "logits/rejected": -0.5895228981971741, "logps/chosen": -1.6056160926818848, "logps/rejected": -1.892835021018982, "loss": 1.7439, "nll_loss": 1.6899384260177612, "rewards/accuracies": 1.0, "rewards/chosen": -0.16056160628795624, "rewards/margins": 0.02872188575565815, "rewards/rejected": -0.18928351998329163, "step": 268 }, { "epoch": 0.4239558707643814, "grad_norm": 0.2715766429901123, "learning_rate": 3.8516788524672495e-06, "log_odds_chosen": 0.2077798843383789, "log_odds_ratio": -0.6008598804473877, "logits/chosen": 0.22610792517662048, "logits/rejected": -0.9308719038963318, "logps/chosen": -1.6854466199874878, "logps/rejected": -1.8608953952789307, "loss": 1.8208, "nll_loss": 1.7607052326202393, "rewards/accuracies": 0.625, "rewards/chosen": -0.16854466497898102, "rewards/margins": 0.017544886097311974, "rewards/rejected": -0.18608956038951874, "step": 269 }, { "epoch": 0.425531914893617, "grad_norm": 0.23874664306640625, "learning_rate": 3.849594412552889e-06, "log_odds_chosen": 0.3525705933570862, "log_odds_ratio": -0.5381215810775757, "logits/chosen": 0.24739238619804382, "logits/rejected": -0.6722042560577393, "logps/chosen": -1.587384581565857, "logps/rejected": -1.871896505355835, "loss": 1.727, "nll_loss": 1.673226237297058, "rewards/accuracies": 1.0, "rewards/chosen": -0.15873846411705017, "rewards/margins": 0.028451191261410713, "rewards/rejected": -0.18718963861465454, "step": 270 }, { "epoch": 0.42710795902285265, "grad_norm": 0.2558384835720062, "learning_rate": 3.847495999836175e-06, "log_odds_chosen": 0.3543975353240967, "log_odds_ratio": -0.5356498956680298, "logits/chosen": 0.2624046504497528, "logits/rejected": -0.9314246773719788, "logps/chosen": -1.666176438331604, "logps/rejected": -1.9635179042816162, "loss": 1.7931, "nll_loss": 1.7394909858703613, "rewards/accuracies": 1.0, "rewards/chosen": -0.16661766171455383, "rewards/margins": 0.029734138399362564, "rewards/rejected": -0.1963518112897873, "step": 271 }, { "epoch": 0.42868400315208827, "grad_norm": 0.23578688502311707, "learning_rate": 3.845383630169613e-06, "log_odds_chosen": 0.3167238235473633, "log_odds_ratio": -0.5535402894020081, "logits/chosen": 0.11884280294179916, "logits/rejected": -0.9415445923805237, "logps/chosen": -1.5773344039916992, "logps/rejected": -1.837627649307251, "loss": 1.7049, "nll_loss": 1.6495603322982788, "rewards/accuracies": 0.875, "rewards/chosen": -0.15773345530033112, "rewards/margins": 0.026029333472251892, "rewards/rejected": -0.18376277387142181, "step": 272 }, { "epoch": 0.4302600472813239, "grad_norm": 0.2651219367980957, "learning_rate": 3.843257319511147e-06, "log_odds_chosen": 0.5530175566673279, "log_odds_ratio": -0.46196067333221436, "logits/chosen": 0.17332234978675842, "logits/rejected": -1.021928310394287, "logps/chosen": -1.5769469738006592, "logps/rejected": -2.0343515872955322, "loss": 1.7041, "nll_loss": 1.6578803062438965, "rewards/accuracies": 1.0, "rewards/chosen": -0.15769469738006592, "rewards/margins": 0.04574044048786163, "rewards/rejected": -0.20343513786792755, "step": 273 }, { "epoch": 0.4318360914105595, "grad_norm": 0.24971356987953186, "learning_rate": 3.841117083924039e-06, "log_odds_chosen": 0.32874542474746704, "log_odds_ratio": -0.5475614666938782, "logits/chosen": 0.16287937760353088, "logits/rejected": -1.0936428308486938, "logps/chosen": -1.6155405044555664, "logps/rejected": -1.888677716255188, "loss": 1.7473, "nll_loss": 1.692505121231079, "rewards/accuracies": 1.0, "rewards/chosen": -0.16155406832695007, "rewards/margins": 0.0273137167096138, "rewards/rejected": -0.18886777758598328, "step": 274 }, { "epoch": 0.43341213553979513, "grad_norm": 0.2504657506942749, "learning_rate": 3.838962939576746e-06, "log_odds_chosen": 0.4173721373081207, "log_odds_ratio": -0.51390141248703, "logits/chosen": 0.13147510588169098, "logits/rejected": -0.8347434401512146, "logps/chosen": -1.5679432153701782, "logps/rejected": -1.9134660959243774, "loss": 1.72, "nll_loss": 1.668624758720398, "rewards/accuracies": 1.0, "rewards/chosen": -0.15679430961608887, "rewards/margins": 0.03455227613449097, "rewards/rejected": -0.19134658575057983, "step": 275 }, { "epoch": 0.43498817966903075, "grad_norm": 0.2594338357448578, "learning_rate": 3.8367949027427985e-06, "log_odds_chosen": 0.3050197958946228, "log_odds_ratio": -0.5566756129264832, "logits/chosen": 0.19317705929279327, "logits/rejected": -0.7772097587585449, "logps/chosen": -1.683677077293396, "logps/rejected": -1.9412704706192017, "loss": 1.8395, "nll_loss": 1.7838023900985718, "rewards/accuracies": 1.0, "rewards/chosen": -0.16836771368980408, "rewards/margins": 0.02575933374464512, "rewards/rejected": -0.19412705302238464, "step": 276 }, { "epoch": 0.43656422379826637, "grad_norm": 0.26758840680122375, "learning_rate": 3.834612989800681e-06, "log_odds_chosen": 0.5289045572280884, "log_odds_ratio": -0.46756529808044434, "logits/chosen": 0.21106746792793274, "logits/rejected": -1.300022006034851, "logps/chosen": -1.6692185401916504, "logps/rejected": -2.1159589290618896, "loss": 1.7865, "nll_loss": 1.7397515773773193, "rewards/accuracies": 1.0, "rewards/chosen": -0.16692185401916504, "rewards/margins": 0.04467405378818512, "rewards/rejected": -0.21159592270851135, "step": 277 }, { "epoch": 0.438140267927502, "grad_norm": 0.37664347887039185, "learning_rate": 3.832417217233703e-06, "log_odds_chosen": 0.3042469024658203, "log_odds_ratio": -0.555115282535553, "logits/chosen": 0.17117249965667725, "logits/rejected": -0.787204921245575, "logps/chosen": -1.6579128503799438, "logps/rejected": -1.9126213788986206, "loss": 1.7924, "nll_loss": 1.7368648052215576, "rewards/accuracies": 1.0, "rewards/chosen": -0.16579128801822662, "rewards/margins": 0.02547084540128708, "rewards/rejected": -0.1912621259689331, "step": 278 }, { "epoch": 0.4397163120567376, "grad_norm": 0.2642952501773834, "learning_rate": 3.8302076016298775e-06, "log_odds_chosen": 0.4446471929550171, "log_odds_ratio": -0.49887239933013916, "logits/chosen": 0.11896737664937973, "logits/rejected": -1.0080976486206055, "logps/chosen": -1.5889983177185059, "logps/rejected": -1.9574190378189087, "loss": 1.7293, "nll_loss": 1.6794246435165405, "rewards/accuracies": 1.0, "rewards/chosen": -0.15889984369277954, "rewards/margins": 0.036842066794633865, "rewards/rejected": -0.1957419216632843, "step": 279 }, { "epoch": 0.44129235618597323, "grad_norm": 0.2610625922679901, "learning_rate": 3.827984159681796e-06, "log_odds_chosen": 0.34331244230270386, "log_odds_ratio": -0.541645884513855, "logits/chosen": 0.10684026777744293, "logits/rejected": -0.8201386332511902, "logps/chosen": -1.6439259052276611, "logps/rejected": -1.9289183616638184, "loss": 1.7716, "nll_loss": 1.7174153327941895, "rewards/accuracies": 0.875, "rewards/chosen": -0.1643926203250885, "rewards/margins": 0.028499236330389977, "rewards/rejected": -0.19289185106754303, "step": 280 }, { "epoch": 0.4428684003152088, "grad_norm": 0.24843472242355347, "learning_rate": 3.825746908186498e-06, "log_odds_chosen": 0.3994872272014618, "log_odds_ratio": -0.5212578773498535, "logits/chosen": 0.2376633733510971, "logits/rejected": -1.0532779693603516, "logps/chosen": -1.6814634799957275, "logps/rejected": -2.018228769302368, "loss": 1.7922, "nll_loss": 1.740039348602295, "rewards/accuracies": 1.0, "rewards/chosen": -0.16814635694026947, "rewards/margins": 0.03367652744054794, "rewards/rejected": -0.201822891831398, "step": 281 }, { "epoch": 0.4444444444444444, "grad_norm": 0.2748637795448303, "learning_rate": 3.823495864045352e-06, "log_odds_chosen": 0.4453040361404419, "log_odds_ratio": -0.4974423050880432, "logits/chosen": 0.20578338205814362, "logits/rejected": -0.5144822001457214, "logps/chosen": -1.6322267055511475, "logps/rejected": -2.0018556118011475, "loss": 1.7625, "nll_loss": 1.7127240896224976, "rewards/accuracies": 1.0, "rewards/chosen": -0.16322267055511475, "rewards/margins": 0.036962881684303284, "rewards/rejected": -0.20018555223941803, "step": 282 }, { "epoch": 0.44602048857368004, "grad_norm": 0.2273551970720291, "learning_rate": 3.8212310442639205e-06, "log_odds_chosen": 0.33169132471084595, "log_odds_ratio": -0.5450347065925598, "logits/chosen": 0.2068972885608673, "logits/rejected": -0.695864200592041, "logps/chosen": -1.5796043872833252, "logps/rejected": -1.8531723022460938, "loss": 1.7234, "nll_loss": 1.6689225435256958, "rewards/accuracies": 0.875, "rewards/chosen": -0.1579604595899582, "rewards/margins": 0.027356795966625214, "rewards/rejected": -0.1853172332048416, "step": 283 }, { "epoch": 0.44759653270291566, "grad_norm": 0.24065622687339783, "learning_rate": 3.8189524659518355e-06, "log_odds_chosen": 0.40192341804504395, "log_odds_ratio": -0.5161522626876831, "logits/chosen": 0.2697753310203552, "logits/rejected": -0.8584244847297668, "logps/chosen": -1.6505751609802246, "logps/rejected": -1.9848883152008057, "loss": 1.7755, "nll_loss": 1.7239261865615845, "rewards/accuracies": 0.875, "rewards/chosen": -0.16505752503871918, "rewards/margins": 0.03343129903078079, "rewards/rejected": -0.19848881661891937, "step": 284 }, { "epoch": 0.4491725768321513, "grad_norm": 0.244610995054245, "learning_rate": 3.816660146322667e-06, "log_odds_chosen": 0.36820292472839355, "log_odds_ratio": -0.5295414328575134, "logits/chosen": 0.1906885802745819, "logits/rejected": -0.700128436088562, "logps/chosen": -1.5470272302627563, "logps/rejected": -1.847809076309204, "loss": 1.6794, "nll_loss": 1.626416802406311, "rewards/accuracies": 1.0, "rewards/chosen": -0.15470272302627563, "rewards/margins": 0.03007819503545761, "rewards/rejected": -0.18478091061115265, "step": 285 }, { "epoch": 0.4507486209613869, "grad_norm": 0.23016570508480072, "learning_rate": 3.814354102693797e-06, "log_odds_chosen": 0.3836941719055176, "log_odds_ratio": -0.5216916799545288, "logits/chosen": 0.21484431624412537, "logits/rejected": -0.9419076442718506, "logps/chosen": -1.6132217645645142, "logps/rejected": -1.9288195371627808, "loss": 1.7432, "nll_loss": 1.6910346746444702, "rewards/accuracies": 1.0, "rewards/chosen": -0.16132217645645142, "rewards/margins": 0.0315597802400589, "rewards/rejected": -0.19288195669651031, "step": 286 }, { "epoch": 0.4523246650906225, "grad_norm": 0.23653464019298553, "learning_rate": 3.8120343524862814e-06, "log_odds_chosen": 0.3473433256149292, "log_odds_ratio": -0.535927951335907, "logits/chosen": 0.18108825385570526, "logits/rejected": -0.833530843257904, "logps/chosen": -1.6525256633758545, "logps/rejected": -1.9398300647735596, "loss": 1.7645, "nll_loss": 1.710868239402771, "rewards/accuracies": 1.0, "rewards/chosen": -0.16525256633758545, "rewards/margins": 0.028730444610118866, "rewards/rejected": -0.1939830183982849, "step": 287 }, { "epoch": 0.45390070921985815, "grad_norm": 0.23629866540431976, "learning_rate": 3.809700913224726e-06, "log_odds_chosen": 0.36637431383132935, "log_odds_ratio": -0.5327513217926025, "logits/chosen": 0.15463852882385254, "logits/rejected": -1.287902593612671, "logps/chosen": -1.5792927742004395, "logps/rejected": -1.8798011541366577, "loss": 1.7056, "nll_loss": 1.6522890329360962, "rewards/accuracies": 1.0, "rewards/chosen": -0.15792928636074066, "rewards/margins": 0.03005082532763481, "rewards/rejected": -0.18798011541366577, "step": 288 }, { "epoch": 0.45547675334909377, "grad_norm": 0.23628529906272888, "learning_rate": 3.8073538025371494e-06, "log_odds_chosen": 0.4771001935005188, "log_odds_ratio": -0.48583680391311646, "logits/chosen": 0.2631601393222809, "logits/rejected": -1.0782270431518555, "logps/chosen": -1.551235318183899, "logps/rejected": -1.9436652660369873, "loss": 1.6886, "nll_loss": 1.6399903297424316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1551235318183899, "rewards/margins": 0.03924299776554108, "rewards/rejected": -0.19436652958393097, "step": 289 }, { "epoch": 0.4570527974783294, "grad_norm": 0.2573354244232178, "learning_rate": 3.804993038154852e-06, "log_odds_chosen": 0.3571556806564331, "log_odds_ratio": -0.5380254983901978, "logits/chosen": 0.18856900930404663, "logits/rejected": -0.9549089670181274, "logps/chosen": -1.5852500200271606, "logps/rejected": -1.88179349899292, "loss": 1.7309, "nll_loss": 1.6771280765533447, "rewards/accuracies": 0.875, "rewards/chosen": -0.1585249900817871, "rewards/margins": 0.029654357582330704, "rewards/rejected": -0.1881793588399887, "step": 290 }, { "epoch": 0.458628841607565, "grad_norm": 0.2339908331632614, "learning_rate": 3.8026186379122816e-06, "log_odds_chosen": 0.2612074613571167, "log_odds_ratio": -0.5736123323440552, "logits/chosen": 0.1575014591217041, "logits/rejected": -0.9461207985877991, "logps/chosen": -1.6632664203643799, "logps/rejected": -1.8798828125, "loss": 1.7845, "nll_loss": 1.72710120677948, "rewards/accuracies": 1.0, "rewards/chosen": -0.16632665693759918, "rewards/margins": 0.02166163921356201, "rewards/rejected": -0.18798828125, "step": 291 }, { "epoch": 0.46020488573680063, "grad_norm": 0.232927143573761, "learning_rate": 3.8002306197468983e-06, "log_odds_chosen": 0.16624774038791656, "log_odds_ratio": -0.6159506440162659, "logits/chosen": 0.15579693019390106, "logits/rejected": -1.0309805870056152, "logps/chosen": -1.6420388221740723, "logps/rejected": -1.777790904045105, "loss": 1.7816, "nll_loss": 1.7199894189834595, "rewards/accuracies": 0.75, "rewards/chosen": -0.16420388221740723, "rewards/margins": 0.013575192540884018, "rewards/rejected": -0.17777907848358154, "step": 292 }, { "epoch": 0.46178092986603625, "grad_norm": 0.26111435890197754, "learning_rate": 3.7978290016990367e-06, "log_odds_chosen": 0.4844040274620056, "log_odds_ratio": -0.4840275049209595, "logits/chosen": 0.12877169251441956, "logits/rejected": -1.0049679279327393, "logps/chosen": -1.6433277130126953, "logps/rejected": -2.0521345138549805, "loss": 1.7643, "nll_loss": 1.7158782482147217, "rewards/accuracies": 1.0, "rewards/chosen": -0.16433276236057281, "rewards/margins": 0.040880680084228516, "rewards/rejected": -0.20521345734596252, "step": 293 }, { "epoch": 0.46335697399527187, "grad_norm": 0.2526698112487793, "learning_rate": 3.795413801911776e-06, "log_odds_chosen": 0.5316009521484375, "log_odds_ratio": -0.46823519468307495, "logits/chosen": 0.1174880787730217, "logits/rejected": -1.0741727352142334, "logps/chosen": -1.5681180953979492, "logps/rejected": -2.0108184814453125, "loss": 1.7117, "nll_loss": 1.6648805141448975, "rewards/accuracies": 1.0, "rewards/chosen": -0.15681181848049164, "rewards/margins": 0.04427003860473633, "rewards/rejected": -0.20108187198638916, "step": 294 }, { "epoch": 0.4649330181245075, "grad_norm": 0.26307472586631775, "learning_rate": 3.7929850386307965e-06, "log_odds_chosen": 0.32403603196144104, "log_odds_ratio": -0.549573540687561, "logits/chosen": 0.2209596186876297, "logits/rejected": -0.8648887872695923, "logps/chosen": -1.775384783744812, "logps/rejected": -2.0520851612091064, "loss": 1.8883, "nll_loss": 1.8333498239517212, "rewards/accuracies": 0.875, "rewards/chosen": -0.17753848433494568, "rewards/margins": 0.02767005003988743, "rewards/rejected": -0.20520853996276855, "step": 295 }, { "epoch": 0.4665090622537431, "grad_norm": 0.24105559289455414, "learning_rate": 3.790542730204245e-06, "log_odds_chosen": 0.391379177570343, "log_odds_ratio": -0.5190234184265137, "logits/chosen": 0.17468759417533875, "logits/rejected": -1.050643801689148, "logps/chosen": -1.705775260925293, "logps/rejected": -2.037628412246704, "loss": 1.8141, "nll_loss": 1.7621614933013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.1705775409936905, "rewards/margins": 0.033185333013534546, "rewards/rejected": -0.20376285910606384, "step": 296 }, { "epoch": 0.46808510638297873, "grad_norm": 0.2376202493906021, "learning_rate": 3.7880868950825935e-06, "log_odds_chosen": 0.40684062242507935, "log_odds_ratio": -0.510847806930542, "logits/chosen": 0.13849994540214539, "logits/rejected": -1.200305700302124, "logps/chosen": -1.6610805988311768, "logps/rejected": -2.001574754714966, "loss": 1.7604, "nll_loss": 1.7093130350112915, "rewards/accuracies": 1.0, "rewards/chosen": -0.16610805690288544, "rewards/margins": 0.034049421548843384, "rewards/rejected": -0.20015747845172882, "step": 297 }, { "epoch": 0.46966115051221435, "grad_norm": 0.2616525888442993, "learning_rate": 3.7856175518185058e-06, "log_odds_chosen": 0.3999539613723755, "log_odds_ratio": -0.5190368294715881, "logits/chosen": 0.015357280150055885, "logits/rejected": -1.1991338729858398, "logps/chosen": -1.6416277885437012, "logps/rejected": -1.9745447635650635, "loss": 1.7682, "nll_loss": 1.7162597179412842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1641627848148346, "rewards/margins": 0.033291686326265335, "rewards/rejected": -0.19745448231697083, "step": 298 }, { "epoch": 0.47123719464145, "grad_norm": 0.23752158880233765, "learning_rate": 3.7831347190666883e-06, "log_odds_chosen": 0.4781672954559326, "log_odds_ratio": -0.488000750541687, "logits/chosen": 0.08399657905101776, "logits/rejected": -1.1212432384490967, "logps/chosen": -1.6346526145935059, "logps/rejected": -2.035963773727417, "loss": 1.7627, "nll_loss": 1.713881015777588, "rewards/accuracies": 0.875, "rewards/chosen": -0.16346527636051178, "rewards/margins": 0.0401310995221138, "rewards/rejected": -0.20359636843204498, "step": 299 }, { "epoch": 0.4728132387706856, "grad_norm": 0.25213325023651123, "learning_rate": 3.780638415583759e-06, "log_odds_chosen": 0.2852388620376587, "log_odds_ratio": -0.5705251097679138, "logits/chosen": 0.21283775568008423, "logits/rejected": -0.935249924659729, "logps/chosen": -1.6187589168548584, "logps/rejected": -1.8579552173614502, "loss": 1.7571, "nll_loss": 1.7000482082366943, "rewards/accuracies": 0.75, "rewards/chosen": -0.1618758887052536, "rewards/margins": 0.023919638246297836, "rewards/rejected": -0.18579553067684174, "step": 300 }, { "epoch": 0.4743892828999212, "grad_norm": 0.24251310527324677, "learning_rate": 3.7781286602280967e-06, "log_odds_chosen": 0.17071868479251862, "log_odds_ratio": -0.6216680407524109, "logits/chosen": 0.14868459105491638, "logits/rejected": -1.1415449380874634, "logps/chosen": -1.5997627973556519, "logps/rejected": -1.7335293292999268, "loss": 1.7454, "nll_loss": 1.6831833124160767, "rewards/accuracies": 0.75, "rewards/chosen": -0.1599762886762619, "rewards/margins": 0.013376658782362938, "rewards/rejected": -0.1733529418706894, "step": 301 }, { "epoch": 0.47596532702915684, "grad_norm": 0.22967545688152313, "learning_rate": 3.7756054719597044e-06, "log_odds_chosen": 0.26898688077926636, "log_odds_ratio": -0.5931567549705505, "logits/chosen": 0.033872295171022415, "logits/rejected": -1.1282514333724976, "logps/chosen": -1.6345136165618896, "logps/rejected": -1.8664180040359497, "loss": 1.7516, "nll_loss": 1.6922358274459839, "rewards/accuracies": 0.75, "rewards/chosen": -0.16345134377479553, "rewards/margins": 0.023190462961792946, "rewards/rejected": -0.18664182722568512, "step": 302 }, { "epoch": 0.47754137115839246, "grad_norm": 0.2190561145544052, "learning_rate": 3.773068869840066e-06, "log_odds_chosen": 0.19828103482723236, "log_odds_ratio": -0.6074354648590088, "logits/chosen": 0.2555277347564697, "logits/rejected": -1.1666932106018066, "logps/chosen": -1.6585826873779297, "logps/rejected": -1.8199939727783203, "loss": 1.7779, "nll_loss": 1.7171512842178345, "rewards/accuracies": 0.75, "rewards/chosen": -0.16585825383663177, "rewards/margins": 0.016141142696142197, "rewards/rejected": -0.18199938535690308, "step": 303 }, { "epoch": 0.4791174152876281, "grad_norm": 0.21842867136001587, "learning_rate": 3.770518873031997e-06, "log_odds_chosen": 0.44869300723075867, "log_odds_ratio": -0.4964669644832611, "logits/chosen": 0.03254036605358124, "logits/rejected": -0.8346494436264038, "logps/chosen": -1.5328748226165771, "logps/rejected": -1.8997446298599243, "loss": 1.674, "nll_loss": 1.6243867874145508, "rewards/accuracies": 1.0, "rewards/chosen": -0.15328750014305115, "rewards/margins": 0.03668695688247681, "rewards/rejected": -0.18997445702552795, "step": 304 }, { "epoch": 0.4806934594168637, "grad_norm": 0.253165602684021, "learning_rate": 3.7679555007995065e-06, "log_odds_chosen": 0.41193148493766785, "log_odds_ratio": -0.5260132551193237, "logits/chosen": 0.14702853560447693, "logits/rejected": -1.0447622537612915, "logps/chosen": -1.6558337211608887, "logps/rejected": -2.0012173652648926, "loss": 1.7843, "nll_loss": 1.731735110282898, "rewards/accuracies": 0.875, "rewards/chosen": -0.16558335721492767, "rewards/margins": 0.0345383882522583, "rewards/rejected": -0.20012176036834717, "step": 305 }, { "epoch": 0.48226950354609927, "grad_norm": 0.24374498426914215, "learning_rate": 3.7653787725076464e-06, "log_odds_chosen": 0.2701460123062134, "log_odds_ratio": -0.5720412731170654, "logits/chosen": 0.1489667445421219, "logits/rejected": -0.9628247618675232, "logps/chosen": -1.6143256425857544, "logps/rejected": -1.8385647535324097, "loss": 1.7374, "nll_loss": 1.680199146270752, "rewards/accuracies": 0.875, "rewards/chosen": -0.16143256425857544, "rewards/margins": 0.022423917427659035, "rewards/rejected": -0.18385647237300873, "step": 306 }, { "epoch": 0.4838455476753349, "grad_norm": 0.2575761675834656, "learning_rate": 3.7627887076223685e-06, "log_odds_chosen": 0.3698280155658722, "log_odds_ratio": -0.5293493270874023, "logits/chosen": 0.17162802815437317, "logits/rejected": -0.7795068621635437, "logps/chosen": -1.6772853136062622, "logps/rejected": -1.9871926307678223, "loss": 1.783, "nll_loss": 1.7300152778625488, "rewards/accuracies": 1.0, "rewards/chosen": -0.16772854328155518, "rewards/margins": 0.03099072352051735, "rewards/rejected": -0.19871927797794342, "step": 307 }, { "epoch": 0.4854215918045705, "grad_norm": 0.2139917016029358, "learning_rate": 3.7601853257103765e-06, "log_odds_chosen": 0.22644855082035065, "log_odds_ratio": -0.5928743481636047, "logits/chosen": 0.06793497502803802, "logits/rejected": -1.0903844833374023, "logps/chosen": -1.582783579826355, "logps/rejected": -1.7714552879333496, "loss": 1.6918, "nll_loss": 1.632529616355896, "rewards/accuracies": 0.875, "rewards/chosen": -0.15827836096286774, "rewards/margins": 0.018867187201976776, "rewards/rejected": -0.17714554071426392, "step": 308 }, { "epoch": 0.48699763593380613, "grad_norm": 0.21651345491409302, "learning_rate": 3.7575686464389767e-06, "log_odds_chosen": 0.3462998867034912, "log_odds_ratio": -0.5444170832633972, "logits/chosen": 0.10276569426059723, "logits/rejected": -1.1056041717529297, "logps/chosen": -1.5598326921463013, "logps/rejected": -1.8447059392929077, "loss": 1.6921, "nll_loss": 1.6376224756240845, "rewards/accuracies": 0.875, "rewards/chosen": -0.15598325431346893, "rewards/margins": 0.02848733589053154, "rewards/rejected": -0.18447057902812958, "step": 309 }, { "epoch": 0.48857368006304175, "grad_norm": 0.2089070826768875, "learning_rate": 3.7549386895759315e-06, "log_odds_chosen": 0.38229963183403015, "log_odds_ratio": -0.5227848291397095, "logits/chosen": 0.06746693700551987, "logits/rejected": -1.028963327407837, "logps/chosen": -1.5149016380310059, "logps/rejected": -1.8246971368789673, "loss": 1.6366, "nll_loss": 1.5843473672866821, "rewards/accuracies": 1.0, "rewards/chosen": -0.15149016678333282, "rewards/margins": 0.030979545786976814, "rewards/rejected": -0.1824697107076645, "step": 310 }, { "epoch": 0.49014972419227737, "grad_norm": 0.20956042408943176, "learning_rate": 3.7522954749893086e-06, "log_odds_chosen": 0.5883792042732239, "log_odds_ratio": -0.45304739475250244, "logits/chosen": -0.004524541087448597, "logits/rejected": -1.15907621383667, "logps/chosen": -1.5441385507583618, "logps/rejected": -2.0315380096435547, "loss": 1.6764, "nll_loss": 1.631089448928833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544138640165329, "rewards/margins": 0.048739951103925705, "rewards/rejected": -0.2031538188457489, "step": 311 }, { "epoch": 0.491725768321513, "grad_norm": 0.23161104321479797, "learning_rate": 3.749639022647332e-06, "log_odds_chosen": 0.35890865325927734, "log_odds_ratio": -0.5340113639831543, "logits/chosen": 0.06597714126110077, "logits/rejected": -1.0388400554656982, "logps/chosen": -1.6328809261322021, "logps/rejected": -1.932662010192871, "loss": 1.7561, "nll_loss": 1.7027454376220703, "rewards/accuracies": 1.0, "rewards/chosen": -0.16328811645507812, "rewards/margins": 0.02997809275984764, "rewards/rejected": -0.19326619803905487, "step": 312 }, { "epoch": 0.4933018124507486, "grad_norm": 0.20770005881786346, "learning_rate": 3.7469693526182304e-06, "log_odds_chosen": 0.513086199760437, "log_odds_ratio": -0.485725075006485, "logits/chosen": 0.0205635167658329, "logits/rejected": -1.4314470291137695, "logps/chosen": -1.581476092338562, "logps/rejected": -2.0070858001708984, "loss": 1.6904, "nll_loss": 1.6417973041534424, "rewards/accuracies": 0.875, "rewards/chosen": -0.15814761817455292, "rewards/margins": 0.04256095737218857, "rewards/rejected": -0.2007085680961609, "step": 313 }, { "epoch": 0.49487785657998423, "grad_norm": 0.21436423063278198, "learning_rate": 3.744286485070085e-06, "log_odds_chosen": 0.5086329579353333, "log_odds_ratio": -0.47319746017456055, "logits/chosen": 0.03913354501128197, "logits/rejected": -1.1685231924057007, "logps/chosen": -1.5282073020935059, "logps/rejected": -1.9466618299484253, "loss": 1.6421, "nll_loss": 1.594788908958435, "rewards/accuracies": 1.0, "rewards/chosen": -0.15282073616981506, "rewards/margins": 0.041845470666885376, "rewards/rejected": -0.19466620683670044, "step": 314 }, { "epoch": 0.49645390070921985, "grad_norm": 0.23891815543174744, "learning_rate": 3.7415904402706795e-06, "log_odds_chosen": 0.4192996621131897, "log_odds_ratio": -0.514187216758728, "logits/chosen": 0.06727111339569092, "logits/rejected": -1.1843537092208862, "logps/chosen": -1.7258471250534058, "logps/rejected": -2.082641363143921, "loss": 1.835, "nll_loss": 1.7836283445358276, "rewards/accuracies": 1.0, "rewards/chosen": -0.17258471250534058, "rewards/margins": 0.03567943722009659, "rewards/rejected": -0.20826414227485657, "step": 315 }, { "epoch": 0.4980299448384555, "grad_norm": 0.21085584163665771, "learning_rate": 3.7388812385873435e-06, "log_odds_chosen": 0.3644852042198181, "log_odds_ratio": -0.5296192169189453, "logits/chosen": 0.030755888670682907, "logits/rejected": -1.189257264137268, "logps/chosen": -1.5624669790267944, "logps/rejected": -1.8610371351242065, "loss": 1.6768, "nll_loss": 1.6238601207733154, "rewards/accuracies": 1.0, "rewards/chosen": -0.15624670684337616, "rewards/margins": 0.02985702082514763, "rewards/rejected": -0.1861037313938141, "step": 316 }, { "epoch": 0.4996059889676911, "grad_norm": 0.23275373876094818, "learning_rate": 3.7361589004868033e-06, "log_odds_chosen": 0.5013652443885803, "log_odds_ratio": -0.479345440864563, "logits/chosen": 0.14082355797290802, "logits/rejected": -1.2382307052612305, "logps/chosen": -1.6419178247451782, "logps/rejected": -2.0661187171936035, "loss": 1.7418, "nll_loss": 1.6938456296920776, "rewards/accuracies": 1.0, "rewards/chosen": -0.16419179737567902, "rewards/margins": 0.042420096695423126, "rewards/rejected": -0.20661188662052155, "step": 317 }, { "epoch": 0.5011820330969267, "grad_norm": 0.2070867121219635, "learning_rate": 3.733423446535022e-06, "log_odds_chosen": 0.5261310338973999, "log_odds_ratio": -0.47375503182411194, "logits/chosen": 0.12559077143669128, "logits/rejected": -1.3008148670196533, "logps/chosen": -1.634326457977295, "logps/rejected": -2.0824835300445557, "loss": 1.7359, "nll_loss": 1.688564658164978, "rewards/accuracies": 1.0, "rewards/chosen": -0.16343267261981964, "rewards/margins": 0.044815681874752045, "rewards/rejected": -0.20824836194515228, "step": 318 }, { "epoch": 0.5027580772261623, "grad_norm": 0.20222921669483185, "learning_rate": 3.7306748973970476e-06, "log_odds_chosen": 0.39474886655807495, "log_odds_ratio": -0.5250571370124817, "logits/chosen": 0.03667742758989334, "logits/rejected": -1.4291538000106812, "logps/chosen": -1.5048575401306152, "logps/rejected": -1.823891282081604, "loss": 1.6483, "nll_loss": 1.5958224534988403, "rewards/accuracies": 0.875, "rewards/chosen": -0.15048575401306152, "rewards/margins": 0.03190337494015694, "rewards/rejected": -0.18238912522792816, "step": 319 }, { "epoch": 0.5043341213553979, "grad_norm": 0.27620604634284973, "learning_rate": 3.7279132738368564e-06, "log_odds_chosen": 0.18474048376083374, "log_odds_ratio": -0.6092777252197266, "logits/chosen": 0.1035664826631546, "logits/rejected": -1.2585889101028442, "logps/chosen": -1.7019753456115723, "logps/rejected": -1.8543686866760254, "loss": 1.8124, "nll_loss": 1.7514902353286743, "rewards/accuracies": 0.75, "rewards/chosen": -0.17019754648208618, "rewards/margins": 0.015239320695400238, "rewards/rejected": -0.18543685972690582, "step": 320 }, { "epoch": 0.5059101654846335, "grad_norm": 0.21710968017578125, "learning_rate": 3.725138596717195e-06, "log_odds_chosen": 0.44385021924972534, "log_odds_ratio": -0.4991372227668762, "logits/chosen": 0.023607883602380753, "logits/rejected": -1.283747911453247, "logps/chosen": -1.5559569597244263, "logps/rejected": -1.9218320846557617, "loss": 1.6659, "nll_loss": 1.6159745454788208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15559569001197815, "rewards/margins": 0.03658752888441086, "rewards/rejected": -0.1921832263469696, "step": 321 }, { "epoch": 0.5074862096138691, "grad_norm": 0.20364168286323547, "learning_rate": 3.7223508869994244e-06, "log_odds_chosen": 0.4654800295829773, "log_odds_ratio": -0.4937525987625122, "logits/chosen": 0.04612987861037254, "logits/rejected": -1.3605108261108398, "logps/chosen": -1.6008849143981934, "logps/rejected": -1.9879066944122314, "loss": 1.7137, "nll_loss": 1.6643142700195312, "rewards/accuracies": 1.0, "rewards/chosen": -0.16008850932121277, "rewards/margins": 0.038702160120010376, "rewards/rejected": -0.19879068434238434, "step": 322 }, { "epoch": 0.5090622537431048, "grad_norm": 0.23145321011543274, "learning_rate": 3.7195501657433594e-06, "log_odds_chosen": 0.39249351620674133, "log_odds_ratio": -0.5205338597297668, "logits/chosen": -0.0046775080263614655, "logits/rejected": -1.3155083656311035, "logps/chosen": -1.6052358150482178, "logps/rejected": -1.9290968179702759, "loss": 1.7238, "nll_loss": 1.6717445850372314, "rewards/accuracies": 1.0, "rewards/chosen": -0.16052357852458954, "rewards/margins": 0.03238610923290253, "rewards/rejected": -0.19290968775749207, "step": 323 }, { "epoch": 0.5106382978723404, "grad_norm": 0.19913478195667267, "learning_rate": 3.716736454107111e-06, "log_odds_chosen": 0.5085100531578064, "log_odds_ratio": -0.47588035464286804, "logits/chosen": 0.002479949500411749, "logits/rejected": -1.0518862009048462, "logps/chosen": -1.5002402067184448, "logps/rejected": -1.9133625030517578, "loss": 1.6117, "nll_loss": 1.5641216039657593, "rewards/accuracies": 1.0, "rewards/chosen": -0.15002401173114777, "rewards/margins": 0.04131225496530533, "rewards/rejected": -0.1913362741470337, "step": 324 }, { "epoch": 0.512214342001576, "grad_norm": 0.21510443091392517, "learning_rate": 3.7139097733469277e-06, "log_odds_chosen": 0.5286773443222046, "log_odds_ratio": -0.46826720237731934, "logits/chosen": 0.02427489310503006, "logits/rejected": -1.2997654676437378, "logps/chosen": -1.54619562625885, "logps/rejected": -1.982952356338501, "loss": 1.6601, "nll_loss": 1.6132692098617554, "rewards/accuracies": 1.0, "rewards/chosen": -0.15461957454681396, "rewards/margins": 0.04367566481232643, "rewards/rejected": -0.1982952207326889, "step": 325 }, { "epoch": 0.5137903861308116, "grad_norm": 0.22013559937477112, "learning_rate": 3.711070144817032e-06, "log_odds_chosen": 0.4152149558067322, "log_odds_ratio": -0.512770414352417, "logits/chosen": 0.005936339497566223, "logits/rejected": -1.122101068496704, "logps/chosen": -1.633888840675354, "logps/rejected": -1.9839468002319336, "loss": 1.754, "nll_loss": 1.7027238607406616, "rewards/accuracies": 1.0, "rewards/chosen": -0.16338886320590973, "rewards/margins": 0.03500579297542572, "rewards/rejected": -0.19839467108249664, "step": 326 }, { "epoch": 0.5153664302600472, "grad_norm": 0.24476923048496246, "learning_rate": 3.708217589969461e-06, "log_odds_chosen": 0.5117456912994385, "log_odds_ratio": -0.4778427481651306, "logits/chosen": -0.02480306476354599, "logits/rejected": -1.3842414617538452, "logps/chosen": -1.6081162691116333, "logps/rejected": -2.0358729362487793, "loss": 1.7307, "nll_loss": 1.682942509651184, "rewards/accuracies": 1.0, "rewards/chosen": -0.160811647772789, "rewards/margins": 0.042775679379701614, "rewards/rejected": -0.20358730852603912, "step": 327 }, { "epoch": 0.5169424743892829, "grad_norm": 0.2023211419582367, "learning_rate": 3.705352130353904e-06, "log_odds_chosen": 0.4642333984375, "log_odds_ratio": -0.4990030825138092, "logits/chosen": 0.04158155620098114, "logits/rejected": -1.2979916334152222, "logps/chosen": -1.5249441862106323, "logps/rejected": -1.9054869413375854, "loss": 1.6543, "nll_loss": 1.6043576002120972, "rewards/accuracies": 1.0, "rewards/chosen": -0.152494415640831, "rewards/margins": 0.038054272532463074, "rewards/rejected": -0.19054868817329407, "step": 328 }, { "epoch": 0.5185185185185185, "grad_norm": 0.2157369703054428, "learning_rate": 3.7024737876175404e-06, "log_odds_chosen": 0.4267793893814087, "log_odds_ratio": -0.5041587352752686, "logits/chosen": -0.02120812237262726, "logits/rejected": -1.1328731775283813, "logps/chosen": -1.5772178173065186, "logps/rejected": -1.9269858598709106, "loss": 1.683, "nll_loss": 1.6326076984405518, "rewards/accuracies": 1.0, "rewards/chosen": -0.15772177278995514, "rewards/margins": 0.03497680649161339, "rewards/rejected": -0.19269859790802002, "step": 329 }, { "epoch": 0.5200945626477541, "grad_norm": 0.22613677382469177, "learning_rate": 3.699582583504874e-06, "log_odds_chosen": 0.2962914705276489, "log_odds_ratio": -0.5632017254829407, "logits/chosen": 0.033639900386333466, "logits/rejected": -1.3186163902282715, "logps/chosen": -1.7170500755310059, "logps/rejected": -1.966399908065796, "loss": 1.815, "nll_loss": 1.7586567401885986, "rewards/accuracies": 0.875, "rewards/chosen": -0.17170500755310059, "rewards/margins": 0.024934988468885422, "rewards/rejected": -0.1966399997472763, "step": 330 }, { "epoch": 0.5216706067769897, "grad_norm": 0.20476558804512024, "learning_rate": 3.696678539857571e-06, "log_odds_chosen": 0.40065276622772217, "log_odds_ratio": -0.5229502320289612, "logits/chosen": -0.00485864095389843, "logits/rejected": -1.3043968677520752, "logps/chosen": -1.6962597370147705, "logps/rejected": -2.0372161865234375, "loss": 1.8021, "nll_loss": 1.7498358488082886, "rewards/accuracies": 0.875, "rewards/chosen": -0.16962596774101257, "rewards/margins": 0.034095652401447296, "rewards/rejected": -0.20372162759304047, "step": 331 }, { "epoch": 0.5232466509062254, "grad_norm": 0.20712663233280182, "learning_rate": 3.6937616786142956e-06, "log_odds_chosen": 0.5233447551727295, "log_odds_ratio": -0.47018271684646606, "logits/chosen": 0.010211546905338764, "logits/rejected": -1.0574318170547485, "logps/chosen": -1.5134122371673584, "logps/rejected": -1.9436025619506836, "loss": 1.6422, "nll_loss": 1.595203161239624, "rewards/accuracies": 1.0, "rewards/chosen": -0.15134122967720032, "rewards/margins": 0.04301903396844864, "rewards/rejected": -0.19436024129390717, "step": 332 }, { "epoch": 0.524822695035461, "grad_norm": 0.18907472491264343, "learning_rate": 3.6908320218105393e-06, "log_odds_chosen": 0.3291381895542145, "log_odds_ratio": -0.5515700578689575, "logits/chosen": -0.01022535003721714, "logits/rejected": -1.2733728885650635, "logps/chosen": -1.5107743740081787, "logps/rejected": -1.7811356782913208, "loss": 1.6236, "nll_loss": 1.5684043169021606, "rewards/accuracies": 0.875, "rewards/chosen": -0.15107741951942444, "rewards/margins": 0.027036139741539955, "rewards/rejected": -0.17811356484889984, "step": 333 }, { "epoch": 0.5263987391646966, "grad_norm": 0.19910985231399536, "learning_rate": 3.6878895915784607e-06, "log_odds_chosen": 0.48929572105407715, "log_odds_ratio": -0.483676016330719, "logits/chosen": 0.03388974070549011, "logits/rejected": -1.230672836303711, "logps/chosen": -1.4776809215545654, "logps/rejected": -1.8740699291229248, "loss": 1.5862, "nll_loss": 1.5378473997116089, "rewards/accuracies": 1.0, "rewards/chosen": -0.14776809513568878, "rewards/margins": 0.03963891416788101, "rewards/rejected": -0.187406986951828, "step": 334 }, { "epoch": 0.5279747832939322, "grad_norm": 0.19112317264080048, "learning_rate": 3.6849344101467147e-06, "log_odds_chosen": 0.3993302285671234, "log_odds_ratio": -0.5199868679046631, "logits/chosen": 0.02075035311281681, "logits/rejected": -1.4165470600128174, "logps/chosen": -1.5604709386825562, "logps/rejected": -1.890925407409668, "loss": 1.6795, "nll_loss": 1.6274938583374023, "rewards/accuracies": 0.875, "rewards/chosen": -0.15604707598686218, "rewards/margins": 0.03304546698927879, "rewards/rejected": -0.18909254670143127, "step": 335 }, { "epoch": 0.5295508274231678, "grad_norm": 0.2065410017967224, "learning_rate": 3.6819664998402857e-06, "log_odds_chosen": 0.3870427906513214, "log_odds_ratio": -0.521834671497345, "logits/chosen": 0.007402241230010986, "logits/rejected": -1.2406339645385742, "logps/chosen": -1.597013235092163, "logps/rejected": -1.9189107418060303, "loss": 1.7091, "nll_loss": 1.65691339969635, "rewards/accuracies": 1.0, "rewards/chosen": -0.15970134735107422, "rewards/margins": 0.032189756631851196, "rewards/rejected": -0.19189107418060303, "step": 336 }, { "epoch": 0.5311268715524035, "grad_norm": 0.22015894949436188, "learning_rate": 3.6789858830803186e-06, "log_odds_chosen": 0.4236854314804077, "log_odds_ratio": -0.5088356733322144, "logits/chosen": 0.009572651237249374, "logits/rejected": -1.2554011344909668, "logps/chosen": -1.649095058441162, "logps/rejected": -2.0025815963745117, "loss": 1.7505, "nll_loss": 1.6996192932128906, "rewards/accuracies": 1.0, "rewards/chosen": -0.16490954160690308, "rewards/margins": 0.035348646342754364, "rewards/rejected": -0.20025816559791565, "step": 337 }, { "epoch": 0.5327029156816391, "grad_norm": 0.20396317541599274, "learning_rate": 3.6759925823839486e-06, "log_odds_chosen": 0.3307921886444092, "log_odds_ratio": -0.5461194515228271, "logits/chosen": -0.017404936254024506, "logits/rejected": -1.1315640211105347, "logps/chosen": -1.5301204919815063, "logps/rejected": -1.7987494468688965, "loss": 1.6634, "nll_loss": 1.6087586879730225, "rewards/accuracies": 1.0, "rewards/chosen": -0.15301203727722168, "rewards/margins": 0.02686290442943573, "rewards/rejected": -0.1798749417066574, "step": 338 }, { "epoch": 0.5342789598108747, "grad_norm": 0.19404453039169312, "learning_rate": 3.672986620364134e-06, "log_odds_chosen": 0.4503750205039978, "log_odds_ratio": -0.49645406007766724, "logits/chosen": 0.02146240696310997, "logits/rejected": -1.14137601852417, "logps/chosen": -1.6056911945343018, "logps/rejected": -1.9799296855926514, "loss": 1.7114, "nll_loss": 1.6618030071258545, "rewards/accuracies": 1.0, "rewards/chosen": -0.16056913137435913, "rewards/margins": 0.03742384910583496, "rewards/rejected": -0.1979929804801941, "step": 339 }, { "epoch": 0.5358550039401103, "grad_norm": 0.19405515491962433, "learning_rate": 3.669968019729481e-06, "log_odds_chosen": 0.5453786849975586, "log_odds_ratio": -0.46696317195892334, "logits/chosen": -0.03269782289862633, "logits/rejected": -1.4044153690338135, "logps/chosen": -1.4841482639312744, "logps/rejected": -1.9323790073394775, "loss": 1.5994, "nll_loss": 1.5526580810546875, "rewards/accuracies": 1.0, "rewards/chosen": -0.14841482043266296, "rewards/margins": 0.04482308030128479, "rewards/rejected": -0.19323790073394775, "step": 340 }, { "epoch": 0.5374310480693459, "grad_norm": 0.20472969114780426, "learning_rate": 3.666936803284076e-06, "log_odds_chosen": 0.47615405917167664, "log_odds_ratio": -0.4855659306049347, "logits/chosen": 0.01596236228942871, "logits/rejected": -1.2273820638656616, "logps/chosen": -1.6264029741287231, "logps/rejected": -2.0267691612243652, "loss": 1.7196, "nll_loss": 1.6710734367370605, "rewards/accuracies": 1.0, "rewards/chosen": -0.1626402884721756, "rewards/margins": 0.0400366336107254, "rewards/rejected": -0.202676922082901, "step": 341 }, { "epoch": 0.5390070921985816, "grad_norm": 0.21434363722801208, "learning_rate": 3.663892993927312e-06, "log_odds_chosen": 0.5617402791976929, "log_odds_ratio": -0.4550952911376953, "logits/chosen": -0.059917159378528595, "logits/rejected": -1.359694004058838, "logps/chosen": -1.575050950050354, "logps/rejected": -2.043088674545288, "loss": 1.6888, "nll_loss": 1.643282175064087, "rewards/accuracies": 1.0, "rewards/chosen": -0.1575051099061966, "rewards/margins": 0.046803757548332214, "rewards/rejected": -0.20430885255336761, "step": 342 }, { "epoch": 0.5405831363278172, "grad_norm": 0.20112687349319458, "learning_rate": 3.6608366146537136e-06, "log_odds_chosen": 0.6060886383056641, "log_odds_ratio": -0.45338305830955505, "logits/chosen": -0.08892233669757843, "logits/rejected": -1.231791377067566, "logps/chosen": -1.5024844408035278, "logps/rejected": -2.009472608566284, "loss": 1.6204, "nll_loss": 1.575110912322998, "rewards/accuracies": 1.0, "rewards/chosen": -0.1502484530210495, "rewards/margins": 0.05069882422685623, "rewards/rejected": -0.20094728469848633, "step": 343 }, { "epoch": 0.5421591804570528, "grad_norm": 0.20276731252670288, "learning_rate": 3.6577676885527674e-06, "log_odds_chosen": 0.47172704339027405, "log_odds_ratio": -0.4901059567928314, "logits/chosen": -0.064692422747612, "logits/rejected": -1.493216633796692, "logps/chosen": -1.5645720958709717, "logps/rejected": -1.954676866531372, "loss": 1.664, "nll_loss": 1.6149814128875732, "rewards/accuracies": 1.0, "rewards/chosen": -0.15645720064640045, "rewards/margins": 0.03901049122214317, "rewards/rejected": -0.19546771049499512, "step": 344 }, { "epoch": 0.5437352245862884, "grad_norm": 0.19876694679260254, "learning_rate": 3.654686238808744e-06, "log_odds_chosen": 0.4601633548736572, "log_odds_ratio": -0.49714383482933044, "logits/chosen": -0.09634008258581161, "logits/rejected": -1.3992743492126465, "logps/chosen": -1.657270908355713, "logps/rejected": -2.0440926551818848, "loss": 1.7581, "nll_loss": 1.7084193229675293, "rewards/accuracies": 1.0, "rewards/chosen": -0.16572707891464233, "rewards/margins": 0.03868217021226883, "rewards/rejected": -0.20440925657749176, "step": 345 }, { "epoch": 0.545311268715524, "grad_norm": 0.18044152855873108, "learning_rate": 3.6515922887005245e-06, "log_odds_chosen": 0.6581941843032837, "log_odds_ratio": -0.42069223523139954, "logits/chosen": -0.19447211921215057, "logits/rejected": -1.4484854936599731, "logps/chosen": -1.4948885440826416, "logps/rejected": -2.034649133682251, "loss": 1.6052, "nll_loss": 1.563119888305664, "rewards/accuracies": 1.0, "rewards/chosen": -0.14948883652687073, "rewards/margins": 0.053976062685251236, "rewards/rejected": -0.20346491038799286, "step": 346 }, { "epoch": 0.5468873128447597, "grad_norm": 0.1821555495262146, "learning_rate": 3.6484858616014236e-06, "log_odds_chosen": 0.29319724440574646, "log_odds_ratio": -0.5593742728233337, "logits/chosen": -0.02489875629544258, "logits/rejected": -1.4301215410232544, "logps/chosen": -1.6183066368103027, "logps/rejected": -1.8595659732818604, "loss": 1.7166, "nll_loss": 1.6606316566467285, "rewards/accuracies": 1.0, "rewards/chosen": -0.16183066368103027, "rewards/margins": 0.024125942960381508, "rewards/rejected": -0.18595659732818604, "step": 347 }, { "epoch": 0.5484633569739953, "grad_norm": 0.20152583718299866, "learning_rate": 3.6453669809790154e-06, "log_odds_chosen": 0.34444230794906616, "log_odds_ratio": -0.5457963943481445, "logits/chosen": 0.003659643232822418, "logits/rejected": -1.1108278036117554, "logps/chosen": -1.548649549484253, "logps/rejected": -1.8318045139312744, "loss": 1.6556, "nll_loss": 1.6010490655899048, "rewards/accuracies": 1.0, "rewards/chosen": -0.15486496686935425, "rewards/margins": 0.02831549569964409, "rewards/rejected": -0.18318045139312744, "step": 348 }, { "epoch": 0.5500394011032309, "grad_norm": 0.19325992465019226, "learning_rate": 3.642235670394952e-06, "log_odds_chosen": 0.43656274676322937, "log_odds_ratio": -0.5032880902290344, "logits/chosen": -0.02493971772491932, "logits/rejected": -1.0908689498901367, "logps/chosen": -1.5585966110229492, "logps/rejected": -1.9134851694107056, "loss": 1.6744, "nll_loss": 1.6240770816802979, "rewards/accuracies": 1.0, "rewards/chosen": -0.15585967898368835, "rewards/margins": 0.035488829016685486, "rewards/rejected": -0.19134849309921265, "step": 349 }, { "epoch": 0.5516154452324665, "grad_norm": 0.19222001731395721, "learning_rate": 3.63909195350479e-06, "log_odds_chosen": 0.41014277935028076, "log_odds_ratio": -0.5183743238449097, "logits/chosen": 0.04076732322573662, "logits/rejected": -1.2510112524032593, "logps/chosen": -1.6457445621490479, "logps/rejected": -1.9860320091247559, "loss": 1.7474, "nll_loss": 1.6955746412277222, "rewards/accuracies": 0.875, "rewards/chosen": -0.16457447409629822, "rewards/margins": 0.03402874246239662, "rewards/rejected": -0.19860321283340454, "step": 350 }, { "epoch": 0.5531914893617021, "grad_norm": 0.19920876622200012, "learning_rate": 3.635935854057809e-06, "log_odds_chosen": 0.3992159962654114, "log_odds_ratio": -0.5221322774887085, "logits/chosen": -0.14892584085464478, "logits/rejected": -0.9987085461616516, "logps/chosen": -1.5974595546722412, "logps/rejected": -1.9293639659881592, "loss": 1.7044, "nll_loss": 1.6521823406219482, "rewards/accuracies": 0.875, "rewards/chosen": -0.1597459614276886, "rewards/margins": 0.03319043666124344, "rewards/rejected": -0.19293639063835144, "step": 351 }, { "epoch": 0.5547675334909378, "grad_norm": 0.19184619188308716, "learning_rate": 3.6327673958968327e-06, "log_odds_chosen": 0.5156201720237732, "log_odds_ratio": -0.4716494679450989, "logits/chosen": -0.10240821540355682, "logits/rejected": -1.2988749742507935, "logps/chosen": -1.4805288314819336, "logps/rejected": -1.8984272480010986, "loss": 1.6073, "nll_loss": 1.5601266622543335, "rewards/accuracies": 1.0, "rewards/chosen": -0.1480528861284256, "rewards/margins": 0.04178984463214874, "rewards/rejected": -0.18984271585941315, "step": 352 }, { "epoch": 0.5563435776201734, "grad_norm": 0.17695419490337372, "learning_rate": 3.6295866029580483e-06, "log_odds_chosen": 0.49165239930152893, "log_odds_ratio": -0.4805663824081421, "logits/chosen": -0.09917198866605759, "logits/rejected": -1.3583862781524658, "logps/chosen": -1.5266773700714111, "logps/rejected": -1.9281859397888184, "loss": 1.623, "nll_loss": 1.5749820470809937, "rewards/accuracies": 1.0, "rewards/chosen": -0.15266773104667664, "rewards/margins": 0.04015086218714714, "rewards/rejected": -0.19281861186027527, "step": 353 }, { "epoch": 0.557919621749409, "grad_norm": 0.18373258411884308, "learning_rate": 3.626393499270829e-06, "log_odds_chosen": 0.45253658294677734, "log_odds_ratio": -0.5001296997070312, "logits/chosen": -0.09345138818025589, "logits/rejected": -1.326229453086853, "logps/chosen": -1.4635684490203857, "logps/rejected": -1.8293596506118774, "loss": 1.5919, "nll_loss": 1.541857361793518, "rewards/accuracies": 1.0, "rewards/chosen": -0.14635683596134186, "rewards/margins": 0.03657911717891693, "rewards/rejected": -0.18293596804141998, "step": 354 }, { "epoch": 0.5594956658786446, "grad_norm": 0.19679243862628937, "learning_rate": 3.6231881089575466e-06, "log_odds_chosen": 0.6074280142784119, "log_odds_ratio": -0.43782979249954224, "logits/chosen": -0.16901959478855133, "logits/rejected": -1.4631741046905518, "logps/chosen": -1.5110644102096558, "logps/rejected": -2.0135817527770996, "loss": 1.5938, "nll_loss": 1.550012230873108, "rewards/accuracies": 1.0, "rewards/chosen": -0.15110644698143005, "rewards/margins": 0.05025171861052513, "rewards/rejected": -0.2013581544160843, "step": 355 }, { "epoch": 0.5610717100078803, "grad_norm": 0.2090955525636673, "learning_rate": 3.6199704562333945e-06, "log_odds_chosen": 0.4590110778808594, "log_odds_ratio": -0.4973413050174713, "logits/chosen": -0.13931547105312347, "logits/rejected": -1.3538612127304077, "logps/chosen": -1.4821869134902954, "logps/rejected": -1.8512073755264282, "loss": 1.5913, "nll_loss": 1.5415163040161133, "rewards/accuracies": 1.0, "rewards/chosen": -0.14821870625019073, "rewards/margins": 0.03690203279256821, "rewards/rejected": -0.18512074649333954, "step": 356 }, { "epoch": 0.5626477541371159, "grad_norm": 0.20289309322834015, "learning_rate": 3.6167405654062024e-06, "log_odds_chosen": 0.4794601798057556, "log_odds_ratio": -0.49567416310310364, "logits/chosen": -0.07716728746891022, "logits/rejected": -1.2874313592910767, "logps/chosen": -1.541725516319275, "logps/rejected": -1.9342597723007202, "loss": 1.6395, "nll_loss": 1.589914321899414, "rewards/accuracies": 1.0, "rewards/chosen": -0.15417256951332092, "rewards/margins": 0.03925342112779617, "rewards/rejected": -0.1934259682893753, "step": 357 }, { "epoch": 0.5642237982663515, "grad_norm": 0.1778980940580368, "learning_rate": 3.6134984608762515e-06, "log_odds_chosen": 0.5081315636634827, "log_odds_ratio": -0.4739688038825989, "logits/chosen": -0.08499579131603241, "logits/rejected": -1.3757938146591187, "logps/chosen": -1.4640223979949951, "logps/rejected": -1.8742594718933105, "loss": 1.5833, "nll_loss": 1.5359253883361816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1464022397994995, "rewards/margins": 0.041023701429367065, "rewards/rejected": -0.18742592632770538, "step": 358 }, { "epoch": 0.5657998423955871, "grad_norm": 0.2062489539384842, "learning_rate": 3.6102441671360945e-06, "log_odds_chosen": 0.4876347780227661, "log_odds_ratio": -0.48857545852661133, "logits/chosen": -0.12350551038980484, "logits/rejected": -1.2674330472946167, "logps/chosen": -1.6090378761291504, "logps/rejected": -2.016418218612671, "loss": 1.7003, "nll_loss": 1.651473879814148, "rewards/accuracies": 1.0, "rewards/chosen": -0.16090378165245056, "rewards/margins": 0.04073803871870041, "rewards/rejected": -0.20164184272289276, "step": 359 }, { "epoch": 0.5673758865248227, "grad_norm": 0.19282633066177368, "learning_rate": 3.6069777087703654e-06, "log_odds_chosen": 0.5007855296134949, "log_odds_ratio": -0.48002350330352783, "logits/chosen": -0.10314866900444031, "logits/rejected": -1.2184945344924927, "logps/chosen": -1.533939242362976, "logps/rejected": -1.9453692436218262, "loss": 1.6501, "nll_loss": 1.602098822593689, "rewards/accuracies": 1.0, "rewards/chosen": -0.1533939242362976, "rewards/margins": 0.041142985224723816, "rewards/rejected": -0.19453692436218262, "step": 360 }, { "epoch": 0.5689519306540584, "grad_norm": 0.1999576985836029, "learning_rate": 3.6036991104555973e-06, "log_odds_chosen": 0.4542830288410187, "log_odds_ratio": -0.5044661164283752, "logits/chosen": -0.08808690309524536, "logits/rejected": -1.3358922004699707, "logps/chosen": -1.6718225479125977, "logps/rejected": -2.0570645332336426, "loss": 1.7592, "nll_loss": 1.7087852954864502, "rewards/accuracies": 0.875, "rewards/chosen": -0.16718226671218872, "rewards/margins": 0.03852420300245285, "rewards/rejected": -0.20570647716522217, "step": 361 }, { "epoch": 0.570527974783294, "grad_norm": 0.19183827936649323, "learning_rate": 3.600408396960034e-06, "log_odds_chosen": 0.5055266618728638, "log_odds_ratio": -0.4799540042877197, "logits/chosen": -0.06735289096832275, "logits/rejected": -0.9921278953552246, "logps/chosen": -1.4769415855407715, "logps/rejected": -1.8809959888458252, "loss": 1.5713, "nll_loss": 1.523301601409912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476941704750061, "rewards/margins": 0.04040544107556343, "rewards/rejected": -0.18809959292411804, "step": 362 }, { "epoch": 0.5721040189125296, "grad_norm": 0.19298037886619568, "learning_rate": 3.5971055931434447e-06, "log_odds_chosen": 0.4434927701950073, "log_odds_ratio": -0.5033714771270752, "logits/chosen": -0.04916198179125786, "logits/rejected": -1.233707308769226, "logps/chosen": -1.5274627208709717, "logps/rejected": -1.8934059143066406, "loss": 1.6346, "nll_loss": 1.5842169523239136, "rewards/accuracies": 0.875, "rewards/chosen": -0.1527462899684906, "rewards/margins": 0.03659432381391525, "rewards/rejected": -0.18934059143066406, "step": 363 }, { "epoch": 0.5736800630417652, "grad_norm": 0.1897989809513092, "learning_rate": 3.5937907239569343e-06, "log_odds_chosen": 0.47198542952537537, "log_odds_ratio": -0.4953606128692627, "logits/chosen": -0.1299748420715332, "logits/rejected": -1.3614236116409302, "logps/chosen": -1.579548954963684, "logps/rejected": -1.9681391716003418, "loss": 1.6868, "nll_loss": 1.6372454166412354, "rewards/accuracies": 0.875, "rewards/chosen": -0.15795490145683289, "rewards/margins": 0.038859013468027115, "rewards/rejected": -0.1968139261007309, "step": 364 }, { "epoch": 0.5752561071710008, "grad_norm": 0.22458185255527496, "learning_rate": 3.5904638144427572e-06, "log_odds_chosen": 0.2747001647949219, "log_odds_ratio": -0.5726035833358765, "logits/chosen": -0.09000087529420853, "logits/rejected": -1.1089969873428345, "logps/chosen": -1.6369290351867676, "logps/rejected": -1.8634740114212036, "loss": 1.7333, "nll_loss": 1.6760823726654053, "rewards/accuracies": 0.75, "rewards/chosen": -0.1636928915977478, "rewards/margins": 0.02265450730919838, "rewards/rejected": -0.18634741008281708, "step": 365 }, { "epoch": 0.5768321513002365, "grad_norm": 0.2086506485939026, "learning_rate": 3.5871248897341246e-06, "log_odds_chosen": 0.5135898590087891, "log_odds_ratio": -0.4752338230609894, "logits/chosen": -0.12942443788051605, "logits/rejected": -1.0795626640319824, "logps/chosen": -1.4760322570800781, "logps/rejected": -1.8992903232574463, "loss": 1.598, "nll_loss": 1.5504556894302368, "rewards/accuracies": 1.0, "rewards/chosen": -0.14760322868824005, "rewards/margins": 0.042325813323259354, "rewards/rejected": -0.1899290531873703, "step": 366 }, { "epoch": 0.578408195429472, "grad_norm": 0.18183429539203644, "learning_rate": 3.5837739750550182e-06, "log_odds_chosen": 0.4922761619091034, "log_odds_ratio": -0.4857975244522095, "logits/chosen": -0.15072286128997803, "logits/rejected": -1.6683162450790405, "logps/chosen": -1.5550795793533325, "logps/rejected": -1.9653193950653076, "loss": 1.6554, "nll_loss": 1.6068187952041626, "rewards/accuracies": 1.0, "rewards/chosen": -0.15550795197486877, "rewards/margins": 0.04102398827672005, "rewards/rejected": -0.19653193652629852, "step": 367 }, { "epoch": 0.5799842395587076, "grad_norm": 0.1786677986383438, "learning_rate": 3.5804110957199977e-06, "log_odds_chosen": 0.5304347276687622, "log_odds_ratio": -0.4684828817844391, "logits/chosen": -0.031741030514240265, "logits/rejected": -1.2201794385910034, "logps/chosen": -1.533761739730835, "logps/rejected": -1.97342848777771, "loss": 1.6311, "nll_loss": 1.584226369857788, "rewards/accuracies": 1.0, "rewards/chosen": -0.15337617695331573, "rewards/margins": 0.04396669566631317, "rewards/rejected": -0.1973428726196289, "step": 368 }, { "epoch": 0.5815602836879432, "grad_norm": 0.19358626008033752, "learning_rate": 3.577036277134011e-06, "log_odds_chosen": 0.6033509373664856, "log_odds_ratio": -0.44030019640922546, "logits/chosen": -0.12189745157957077, "logits/rejected": -1.4489309787750244, "logps/chosen": -1.5189951658248901, "logps/rejected": -2.0174660682678223, "loss": 1.6305, "nll_loss": 1.5864982604980469, "rewards/accuracies": 1.0, "rewards/chosen": -0.1518995314836502, "rewards/margins": 0.04984709620475769, "rewards/rejected": -0.2017466127872467, "step": 369 }, { "epoch": 0.5831363278171788, "grad_norm": 0.20722126960754395, "learning_rate": 3.5736495447922e-06, "log_odds_chosen": 0.38122087717056274, "log_odds_ratio": -0.5253958106040955, "logits/chosen": -0.11854588240385056, "logits/rejected": -1.445725440979004, "logps/chosen": -1.643795371055603, "logps/rejected": -1.960480809211731, "loss": 1.7461, "nll_loss": 1.693605899810791, "rewards/accuracies": 1.0, "rewards/chosen": -0.1643795371055603, "rewards/margins": 0.031668562442064285, "rewards/rejected": -0.19604811072349548, "step": 370 }, { "epoch": 0.5847123719464145, "grad_norm": 0.1782001107931137, "learning_rate": 3.5702509242797096e-06, "log_odds_chosen": 0.7012959718704224, "log_odds_ratio": -0.41842737793922424, "logits/chosen": -0.13191047310829163, "logits/rejected": -1.4372180700302124, "logps/chosen": -1.4734841585159302, "logps/rejected": -2.0532171726226807, "loss": 1.5859, "nll_loss": 1.5440880060195923, "rewards/accuracies": 1.0, "rewards/chosen": -0.14734841883182526, "rewards/margins": 0.05797329545021057, "rewards/rejected": -0.20532171428203583, "step": 371 }, { "epoch": 0.5862884160756501, "grad_norm": 0.189020574092865, "learning_rate": 3.566840441271495e-06, "log_odds_chosen": 0.663593053817749, "log_odds_ratio": -0.43108314275741577, "logits/chosen": -0.09653455764055252, "logits/rejected": -1.4999427795410156, "logps/chosen": -1.4899102449417114, "logps/rejected": -2.0343017578125, "loss": 1.578, "nll_loss": 1.5348646640777588, "rewards/accuracies": 1.0, "rewards/chosen": -0.14899101853370667, "rewards/margins": 0.05443914607167244, "rewards/rejected": -0.2034301608800888, "step": 372 }, { "epoch": 0.5878644602048857, "grad_norm": 0.1943143755197525, "learning_rate": 3.5634181215321265e-06, "log_odds_chosen": 0.6287661790847778, "log_odds_ratio": -0.43060097098350525, "logits/chosen": -0.07774099707603455, "logits/rejected": -1.3174422979354858, "logps/chosen": -1.5436866283416748, "logps/rejected": -2.067762613296509, "loss": 1.6544, "nll_loss": 1.6113276481628418, "rewards/accuracies": 1.0, "rewards/chosen": -0.15436868369579315, "rewards/margins": 0.052407585084438324, "rewards/rejected": -0.20677624642848969, "step": 373 }, { "epoch": 0.5894405043341213, "grad_norm": 0.1871887594461441, "learning_rate": 3.5599839909155947e-06, "log_odds_chosen": 0.49737420678138733, "log_odds_ratio": -0.4843059182167053, "logits/chosen": -0.10240314900875092, "logits/rejected": -1.391071081161499, "logps/chosen": -1.5068795680999756, "logps/rejected": -1.9151724576950073, "loss": 1.6079, "nll_loss": 1.5594788789749146, "rewards/accuracies": 0.875, "rewards/chosen": -0.15068796277046204, "rewards/margins": 0.04082927852869034, "rewards/rejected": -0.19151723384857178, "step": 374 }, { "epoch": 0.5910165484633569, "grad_norm": 0.1926075965166092, "learning_rate": 3.556538075365116e-06, "log_odds_chosen": 0.5647552013397217, "log_odds_ratio": -0.4604189991950989, "logits/chosen": -0.05626612901687622, "logits/rejected": -1.3970637321472168, "logps/chosen": -1.5002104043960571, "logps/rejected": -1.962066411972046, "loss": 1.6008, "nll_loss": 1.5547971725463867, "rewards/accuracies": 1.0, "rewards/chosen": -0.1500210165977478, "rewards/margins": 0.046185605227947235, "rewards/rejected": -0.19620664417743683, "step": 375 }, { "epoch": 0.5925925925925926, "grad_norm": 0.20022441446781158, "learning_rate": 3.5530804009129367e-06, "log_odds_chosen": 0.5052685737609863, "log_odds_ratio": -0.4745987355709076, "logits/chosen": -0.11242527514696121, "logits/rejected": -1.191476583480835, "logps/chosen": -1.5387141704559326, "logps/rejected": -1.9558299779891968, "loss": 1.6266, "nll_loss": 1.5791561603546143, "rewards/accuracies": 1.0, "rewards/chosen": -0.15387141704559326, "rewards/margins": 0.041711580008268356, "rewards/rejected": -0.19558300077915192, "step": 376 }, { "epoch": 0.5941686367218282, "grad_norm": 0.1937384456396103, "learning_rate": 3.5496109936801368e-06, "log_odds_chosen": 0.49315646290779114, "log_odds_ratio": -0.48415568470954895, "logits/chosen": -0.17054055631160736, "logits/rejected": -1.4667962789535522, "logps/chosen": -1.5606714487075806, "logps/rejected": -1.9743766784667969, "loss": 1.6571, "nll_loss": 1.6086863279342651, "rewards/accuracies": 1.0, "rewards/chosen": -0.1560671329498291, "rewards/margins": 0.04137052595615387, "rewards/rejected": -0.19743765890598297, "step": 377 }, { "epoch": 0.5957446808510638, "grad_norm": 0.20795175433158875, "learning_rate": 3.546129879876429e-06, "log_odds_chosen": 0.3628261685371399, "log_odds_ratio": -0.5408048629760742, "logits/chosen": -0.05689922347664833, "logits/rejected": -1.130873203277588, "logps/chosen": -1.6209430694580078, "logps/rejected": -1.9227240085601807, "loss": 1.7149, "nll_loss": 1.6608681678771973, "rewards/accuracies": 0.875, "rewards/chosen": -0.1620943248271942, "rewards/margins": 0.03017808124423027, "rewards/rejected": -0.19227240979671478, "step": 378 }, { "epoch": 0.5973207249802994, "grad_norm": 0.19484388828277588, "learning_rate": 3.5426370857999662e-06, "log_odds_chosen": 0.3801731467247009, "log_odds_ratio": -0.5251167416572571, "logits/chosen": -0.10485132038593292, "logits/rejected": -1.2549301385879517, "logps/chosen": -1.6024380922317505, "logps/rejected": -1.9180117845535278, "loss": 1.7071, "nll_loss": 1.6545681953430176, "rewards/accuracies": 1.0, "rewards/chosen": -0.16024382412433624, "rewards/margins": 0.03155737742781639, "rewards/rejected": -0.19180117547512054, "step": 379 }, { "epoch": 0.598896769109535, "grad_norm": 0.19403113424777985, "learning_rate": 3.53913263783714e-06, "log_odds_chosen": 0.4171554744243622, "log_odds_ratio": -0.5190439820289612, "logits/chosen": -0.12759403884410858, "logits/rejected": -1.392691731452942, "logps/chosen": -1.555301308631897, "logps/rejected": -1.904205083847046, "loss": 1.6481, "nll_loss": 1.5961991548538208, "rewards/accuracies": 0.875, "rewards/chosen": -0.15553012490272522, "rewards/margins": 0.03489039093255997, "rewards/rejected": -0.1904205083847046, "step": 380 }, { "epoch": 0.6004728132387707, "grad_norm": 0.1852397322654724, "learning_rate": 3.53561656246238e-06, "log_odds_chosen": 0.6395785808563232, "log_odds_ratio": -0.42720136046409607, "logits/chosen": -0.2312593162059784, "logits/rejected": -1.6027368307113647, "logps/chosen": -1.5752849578857422, "logps/rejected": -2.1103007793426514, "loss": 1.6683, "nll_loss": 1.6255991458892822, "rewards/accuracies": 1.0, "rewards/chosen": -0.15752847492694855, "rewards/margins": 0.05350159481167793, "rewards/rejected": -0.21103009581565857, "step": 381 }, { "epoch": 0.6020488573680063, "grad_norm": 0.18580475449562073, "learning_rate": 3.532088886237956e-06, "log_odds_chosen": 0.539408802986145, "log_odds_ratio": -0.4628356993198395, "logits/chosen": -0.14560621976852417, "logits/rejected": -1.546494483947754, "logps/chosen": -1.6014196872711182, "logps/rejected": -2.0526225566864014, "loss": 1.6812, "nll_loss": 1.634964942932129, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601419895887375, "rewards/margins": 0.045120254158973694, "rewards/rejected": -0.20526225864887238, "step": 382 }, { "epoch": 0.6036249014972419, "grad_norm": 0.17321372032165527, "learning_rate": 3.528549635813778e-06, "log_odds_chosen": 0.5498687028884888, "log_odds_ratio": -0.46246442198753357, "logits/chosen": -0.18676355481147766, "logits/rejected": -1.3866394758224487, "logps/chosen": -1.4379013776779175, "logps/rejected": -1.884263277053833, "loss": 1.5412, "nll_loss": 1.494981288909912, "rewards/accuracies": 1.0, "rewards/chosen": -0.143790140748024, "rewards/margins": 0.04463617503643036, "rewards/rejected": -0.18842631578445435, "step": 383 }, { "epoch": 0.6052009456264775, "grad_norm": 0.20354455709457397, "learning_rate": 3.524998837927192e-06, "log_odds_chosen": 0.587373673915863, "log_odds_ratio": -0.4441196024417877, "logits/chosen": -0.14221185445785522, "logits/rejected": -1.3197717666625977, "logps/chosen": -1.5544791221618652, "logps/rejected": -2.0435311794281006, "loss": 1.6329, "nll_loss": 1.5884504318237305, "rewards/accuracies": 1.0, "rewards/chosen": -0.15544790029525757, "rewards/margins": 0.04890521243214607, "rewards/rejected": -0.20435310900211334, "step": 384 }, { "epoch": 0.6067769897557131, "grad_norm": 0.1994301825761795, "learning_rate": 3.5214365194027797e-06, "log_odds_chosen": 0.5964666604995728, "log_odds_ratio": -0.44221487641334534, "logits/chosen": -0.15480360388755798, "logits/rejected": -1.4440878629684448, "logps/chosen": -1.4780986309051514, "logps/rejected": -1.9680850505828857, "loss": 1.57, "nll_loss": 1.5257560014724731, "rewards/accuracies": 1.0, "rewards/chosen": -0.14780986309051514, "rewards/margins": 0.048998646438121796, "rewards/rejected": -0.19680851697921753, "step": 385 }, { "epoch": 0.6083530338849488, "grad_norm": 0.21158069372177124, "learning_rate": 3.517862707152157e-06, "log_odds_chosen": 0.45025360584259033, "log_odds_ratio": -0.5009865164756775, "logits/chosen": -0.06984852999448776, "logits/rejected": -1.1637235879898071, "logps/chosen": -1.6143461465835571, "logps/rejected": -1.9897408485412598, "loss": 1.695, "nll_loss": 1.6448723077774048, "rewards/accuracies": 1.0, "rewards/chosen": -0.1614346206188202, "rewards/margins": 0.03753947466611862, "rewards/rejected": -0.1989741027355194, "step": 386 }, { "epoch": 0.6099290780141844, "grad_norm": 0.21827860176563263, "learning_rate": 3.5142774281737674e-06, "log_odds_chosen": 0.6315152645111084, "log_odds_ratio": -0.4288613796234131, "logits/chosen": -0.15267856419086456, "logits/rejected": -1.3205287456512451, "logps/chosen": -1.6036657094955444, "logps/rejected": -2.1343271732330322, "loss": 1.6804, "nll_loss": 1.6375137567520142, "rewards/accuracies": 1.0, "rewards/chosen": -0.16036657989025116, "rewards/margins": 0.053066130727529526, "rewards/rejected": -0.21343271434307098, "step": 387 }, { "epoch": 0.61150512214342, "grad_norm": 0.17510953545570374, "learning_rate": 3.5106807095526817e-06, "log_odds_chosen": 0.6356069445610046, "log_odds_ratio": -0.4359210133552551, "logits/chosen": -0.1717139482498169, "logits/rejected": -1.565706729888916, "logps/chosen": -1.5656462907791138, "logps/rejected": -2.093924045562744, "loss": 1.6578, "nll_loss": 1.6142207384109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.1565646231174469, "rewards/margins": 0.05282779037952423, "rewards/rejected": -0.20939242839813232, "step": 388 }, { "epoch": 0.6130811662726556, "grad_norm": 0.18727731704711914, "learning_rate": 3.5070725784603905e-06, "log_odds_chosen": 0.537490963935852, "log_odds_ratio": -0.4669988751411438, "logits/chosen": -0.24123258888721466, "logits/rejected": -1.2403115034103394, "logps/chosen": -1.439449429512024, "logps/rejected": -1.8756340742111206, "loss": 1.5259, "nll_loss": 1.4791667461395264, "rewards/accuracies": 1.0, "rewards/chosen": -0.14394494891166687, "rewards/margins": 0.04361846297979355, "rewards/rejected": -0.18756340444087982, "step": 389 }, { "epoch": 0.6146572104018913, "grad_norm": 0.22596481442451477, "learning_rate": 3.503453062154602e-06, "log_odds_chosen": 0.4628780484199524, "log_odds_ratio": -0.49970224499702454, "logits/chosen": -0.1650674045085907, "logits/rejected": -1.2386726140975952, "logps/chosen": -1.602417230606079, "logps/rejected": -1.9950282573699951, "loss": 1.6994, "nll_loss": 1.6494615077972412, "rewards/accuracies": 0.875, "rewards/chosen": -0.1602417379617691, "rewards/margins": 0.0392610989511013, "rewards/rejected": -0.1995028257369995, "step": 390 }, { "epoch": 0.6162332545311269, "grad_norm": 0.19225400686264038, "learning_rate": 3.499822187979032e-06, "log_odds_chosen": 0.45917797088623047, "log_odds_ratio": -0.4991348385810852, "logits/chosen": -0.09038020670413971, "logits/rejected": -1.4409539699554443, "logps/chosen": -1.5594383478164673, "logps/rejected": -1.9403319358825684, "loss": 1.6617, "nll_loss": 1.6117753982543945, "rewards/accuracies": 0.875, "rewards/chosen": -0.1559438407421112, "rewards/margins": 0.03808935359120369, "rewards/rejected": -0.1940331906080246, "step": 391 }, { "epoch": 0.6178092986603625, "grad_norm": 0.1856825351715088, "learning_rate": 3.496179983363202e-06, "log_odds_chosen": 0.41265982389450073, "log_odds_ratio": -0.5119627118110657, "logits/chosen": -0.11190656572580338, "logits/rejected": -1.3574274778366089, "logps/chosen": -1.5668977499008179, "logps/rejected": -1.9084656238555908, "loss": 1.6514, "nll_loss": 1.6002510786056519, "rewards/accuracies": 1.0, "rewards/chosen": -0.15668979287147522, "rewards/margins": 0.034156784415245056, "rewards/rejected": -0.19084656238555908, "step": 392 }, { "epoch": 0.6193853427895981, "grad_norm": 0.20805980265140533, "learning_rate": 3.4925264758222268e-06, "log_odds_chosen": 0.6294342279434204, "log_odds_ratio": -0.43072307109832764, "logits/chosen": -0.16953016817569733, "logits/rejected": -1.2341816425323486, "logps/chosen": -1.5001755952835083, "logps/rejected": -2.0215494632720947, "loss": 1.5961, "nll_loss": 1.5530593395233154, "rewards/accuracies": 1.0, "rewards/chosen": -0.15001757442951202, "rewards/margins": 0.05213739350438118, "rewards/rejected": -0.2021549493074417, "step": 393 }, { "epoch": 0.6209613869188337, "grad_norm": 0.24518869817256927, "learning_rate": 3.488861692956611e-06, "log_odds_chosen": 0.5471794009208679, "log_odds_ratio": -0.4630282521247864, "logits/chosen": -0.19086423516273499, "logits/rejected": -1.339902639389038, "logps/chosen": -1.5402213335037231, "logps/rejected": -1.9952548742294312, "loss": 1.6296, "nll_loss": 1.5832523107528687, "rewards/accuracies": 1.0, "rewards/chosen": -0.15402214229106903, "rewards/margins": 0.04550333693623543, "rewards/rejected": -0.19952546060085297, "step": 394 }, { "epoch": 0.6225374310480694, "grad_norm": 0.2510071098804474, "learning_rate": 3.4851856624520394e-06, "log_odds_chosen": 0.5228825807571411, "log_odds_ratio": -0.46843814849853516, "logits/chosen": -0.13727201521396637, "logits/rejected": -1.3861788511276245, "logps/chosen": -1.572906494140625, "logps/rejected": -2.005622386932373, "loss": 1.6575, "nll_loss": 1.6106876134872437, "rewards/accuracies": 1.0, "rewards/chosen": -0.15729066729545593, "rewards/margins": 0.04327157884836197, "rewards/rejected": -0.2005622535943985, "step": 395 }, { "epoch": 0.624113475177305, "grad_norm": 0.20526158809661865, "learning_rate": 3.4814984120791664e-06, "log_odds_chosen": 0.5153719782829285, "log_odds_ratio": -0.4714086055755615, "logits/chosen": -0.13660681247711182, "logits/rejected": -1.2547574043273926, "logps/chosen": -1.5410287380218506, "logps/rejected": -1.964991807937622, "loss": 1.636, "nll_loss": 1.5889039039611816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541028618812561, "rewards/margins": 0.04239630699157715, "rewards/rejected": -0.19649919867515564, "step": 396 }, { "epoch": 0.6256895193065406, "grad_norm": 0.18158891797065735, "learning_rate": 3.477799969693407e-06, "log_odds_chosen": 0.47999995946884155, "log_odds_ratio": -0.490679532289505, "logits/chosen": -0.09590557217597961, "logits/rejected": -1.3949567079544067, "logps/chosen": -1.541025996208191, "logps/rejected": -1.9312851428985596, "loss": 1.6301, "nll_loss": 1.5809931755065918, "rewards/accuracies": 1.0, "rewards/chosen": -0.15410259366035461, "rewards/margins": 0.039025940001010895, "rewards/rejected": -0.1931285411119461, "step": 397 }, { "epoch": 0.6272655634357762, "grad_norm": 0.2004203349351883, "learning_rate": 3.474090363234728e-06, "log_odds_chosen": 0.766968309879303, "log_odds_ratio": -0.39646148681640625, "logits/chosen": -0.2562759518623352, "logits/rejected": -1.4337137937545776, "logps/chosen": -1.4752384424209595, "logps/rejected": -2.112752914428711, "loss": 1.5586, "nll_loss": 1.5189671516418457, "rewards/accuracies": 1.0, "rewards/chosen": -0.14752383530139923, "rewards/margins": 0.0637514516711235, "rewards/rejected": -0.21127529442310333, "step": 398 }, { "epoch": 0.6288416075650118, "grad_norm": 0.19742122292518616, "learning_rate": 3.4703696207274325e-06, "log_odds_chosen": 0.5179776549339294, "log_odds_ratio": -0.474680095911026, "logits/chosen": -0.14589636027812958, "logits/rejected": -1.2598799467086792, "logps/chosen": -1.556343913078308, "logps/rejected": -1.9830336570739746, "loss": 1.6301, "nll_loss": 1.5826400518417358, "rewards/accuracies": 1.0, "rewards/chosen": -0.15563438832759857, "rewards/margins": 0.042668960988521576, "rewards/rejected": -0.19830335676670074, "step": 399 }, { "epoch": 0.6304176516942475, "grad_norm": 0.18192359805107117, "learning_rate": 3.4666377702799545e-06, "log_odds_chosen": 0.5299201011657715, "log_odds_ratio": -0.4676084518432617, "logits/chosen": -0.24987564980983734, "logits/rejected": -1.5932269096374512, "logps/chosen": -1.46696138381958, "logps/rejected": -1.8967633247375488, "loss": 1.568, "nll_loss": 1.521193265914917, "rewards/accuracies": 1.0, "rewards/chosen": -0.14669615030288696, "rewards/margins": 0.042980194091796875, "rewards/rejected": -0.18967632949352264, "step": 400 }, { "epoch": 0.6319936958234831, "grad_norm": 0.19527654349803925, "learning_rate": 3.4628948400846417e-06, "log_odds_chosen": 0.6314361691474915, "log_odds_ratio": -0.4365447759628296, "logits/chosen": -0.1739773005247116, "logits/rejected": -1.4769560098648071, "logps/chosen": -1.5479360818862915, "logps/rejected": -2.0758628845214844, "loss": 1.637, "nll_loss": 1.5933518409729004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1547936052083969, "rewards/margins": 0.05279267579317093, "rewards/rejected": -0.20758628845214844, "step": 401 }, { "epoch": 0.6335697399527187, "grad_norm": 0.2035856693983078, "learning_rate": 3.4591408584175426e-06, "log_odds_chosen": 0.5278856158256531, "log_odds_ratio": -0.4733438193798065, "logits/chosen": -0.18014143407344818, "logits/rejected": -1.3621004819869995, "logps/chosen": -1.5849114656448364, "logps/rejected": -2.027892589569092, "loss": 1.6766, "nll_loss": 1.6292688846588135, "rewards/accuracies": 1.0, "rewards/chosen": -0.1584911346435547, "rewards/margins": 0.044298142194747925, "rewards/rejected": -0.2027892768383026, "step": 402 }, { "epoch": 0.6351457840819543, "grad_norm": 0.19495651125907898, "learning_rate": 3.4553758536381974e-06, "log_odds_chosen": 0.5365288853645325, "log_odds_ratio": -0.4719133973121643, "logits/chosen": -0.1622263491153717, "logits/rejected": -1.3355566263198853, "logps/chosen": -1.5399608612060547, "logps/rejected": -1.984842300415039, "loss": 1.6272, "nll_loss": 1.580039381980896, "rewards/accuracies": 1.0, "rewards/chosen": -0.153996080160141, "rewards/margins": 0.04448813945055008, "rewards/rejected": -0.19848422706127167, "step": 403 }, { "epoch": 0.6367218282111899, "grad_norm": 0.20605534315109253, "learning_rate": 3.451599854189418e-06, "log_odds_chosen": 0.5477701425552368, "log_odds_ratio": -0.46024447679519653, "logits/chosen": -0.13585253059864044, "logits/rejected": -1.0337761640548706, "logps/chosen": -1.54197359085083, "logps/rejected": -1.995017647743225, "loss": 1.6418, "nll_loss": 1.5958125591278076, "rewards/accuracies": 1.0, "rewards/chosen": -0.15419737994670868, "rewards/margins": 0.045304395258426666, "rewards/rejected": -0.19950176775455475, "step": 404 }, { "epoch": 0.6382978723404256, "grad_norm": 0.19387783110141754, "learning_rate": 3.4478128885970765e-06, "log_odds_chosen": 0.6080644130706787, "log_odds_ratio": -0.4360560178756714, "logits/chosen": -0.179177924990654, "logits/rejected": -1.3908740282058716, "logps/chosen": -1.604873776435852, "logps/rejected": -2.1161084175109863, "loss": 1.6909, "nll_loss": 1.6472656726837158, "rewards/accuracies": 1.0, "rewards/chosen": -0.16048739850521088, "rewards/margins": 0.051123470067977905, "rewards/rejected": -0.21161086857318878, "step": 405 }, { "epoch": 0.6398739164696612, "grad_norm": 0.190069779753685, "learning_rate": 3.44401498546989e-06, "log_odds_chosen": 0.42473104596138, "log_odds_ratio": -0.5118191242218018, "logits/chosen": -0.1188054233789444, "logits/rejected": -1.49931001663208, "logps/chosen": -1.5570849180221558, "logps/rejected": -1.9073197841644287, "loss": 1.6278, "nll_loss": 1.5765697956085205, "rewards/accuracies": 1.0, "rewards/chosen": -0.15570849180221558, "rewards/margins": 0.035023488104343414, "rewards/rejected": -0.1907319873571396, "step": 406 }, { "epoch": 0.6414499605988968, "grad_norm": 0.19107168912887573, "learning_rate": 3.4402061734992005e-06, "log_odds_chosen": 0.5350978374481201, "log_odds_ratio": -0.46566373109817505, "logits/chosen": -0.15249407291412354, "logits/rejected": -1.402602195739746, "logps/chosen": -1.5002124309539795, "logps/rejected": -1.9390612840652466, "loss": 1.5941, "nll_loss": 1.5475372076034546, "rewards/accuracies": 1.0, "rewards/chosen": -0.1500212401151657, "rewards/margins": 0.043884895741939545, "rewards/rejected": -0.19390614330768585, "step": 407 }, { "epoch": 0.6430260047281324, "grad_norm": 0.2040640264749527, "learning_rate": 3.4363864814587656e-06, "log_odds_chosen": 0.49293413758277893, "log_odds_ratio": -0.48022550344467163, "logits/chosen": -0.2637179493904114, "logits/rejected": -0.9641510248184204, "logps/chosen": -1.475205421447754, "logps/rejected": -1.8777835369110107, "loss": 1.5716, "nll_loss": 1.5235683917999268, "rewards/accuracies": 1.0, "rewards/chosen": -0.1475205421447754, "rewards/margins": 0.04025781527161598, "rewards/rejected": -0.18777838349342346, "step": 408 }, { "epoch": 0.644602048857368, "grad_norm": 0.20765496790409088, "learning_rate": 3.4325559382045343e-06, "log_odds_chosen": 0.4098273515701294, "log_odds_ratio": -0.5131589770317078, "logits/chosen": -0.23338492214679718, "logits/rejected": -1.4860385656356812, "logps/chosen": -1.4819140434265137, "logps/rejected": -1.8101638555526733, "loss": 1.5729, "nll_loss": 1.5215588808059692, "rewards/accuracies": 1.0, "rewards/chosen": -0.1481914222240448, "rewards/margins": 0.03282497450709343, "rewards/rejected": -0.18101638555526733, "step": 409 }, { "epoch": 0.6461780929866037, "grad_norm": 0.19961215555667877, "learning_rate": 3.4287145726744295e-06, "log_odds_chosen": 0.5392709374427795, "log_odds_ratio": -0.4628680646419525, "logits/chosen": -0.26481306552886963, "logits/rejected": -1.3813103437423706, "logps/chosen": -1.4664371013641357, "logps/rejected": -1.904196858406067, "loss": 1.5695, "nll_loss": 1.523188829421997, "rewards/accuracies": 1.0, "rewards/chosen": -0.146643728017807, "rewards/margins": 0.04377596825361252, "rewards/rejected": -0.19041968882083893, "step": 410 }, { "epoch": 0.6477541371158393, "grad_norm": 0.20400294661521912, "learning_rate": 3.4248624138881335e-06, "log_odds_chosen": 0.42437130212783813, "log_odds_ratio": -0.5254440307617188, "logits/chosen": -0.1493159532546997, "logits/rejected": -1.1513835191726685, "logps/chosen": -1.6029716730117798, "logps/rejected": -1.9682285785675049, "loss": 1.6887, "nll_loss": 1.6361618041992188, "rewards/accuracies": 0.875, "rewards/chosen": -0.16029717028141022, "rewards/margins": 0.03652569651603699, "rewards/rejected": -0.1968228816986084, "step": 411 }, { "epoch": 0.6493301812450749, "grad_norm": 0.20339736342430115, "learning_rate": 3.4209994909468672e-06, "log_odds_chosen": 0.6561870574951172, "log_odds_ratio": -0.42825421690940857, "logits/chosen": -0.28305602073669434, "logits/rejected": -1.04371976852417, "logps/chosen": -1.5072612762451172, "logps/rejected": -2.0518369674682617, "loss": 1.5933, "nll_loss": 1.5505071878433228, "rewards/accuracies": 1.0, "rewards/chosen": -0.15072615444660187, "rewards/margins": 0.054457567632198334, "rewards/rejected": -0.2051836997270584, "step": 412 }, { "epoch": 0.6509062253743105, "grad_norm": 0.19428101181983948, "learning_rate": 3.4171258330331667e-06, "log_odds_chosen": 0.43498852849006653, "log_odds_ratio": -0.5014755129814148, "logits/chosen": -0.12146922200918198, "logits/rejected": -1.1085965633392334, "logps/chosen": -1.593203067779541, "logps/rejected": -1.9516777992248535, "loss": 1.6774, "nll_loss": 1.627271056175232, "rewards/accuracies": 1.0, "rewards/chosen": -0.15932030975818634, "rewards/margins": 0.03584747388958931, "rewards/rejected": -0.19516779482364655, "step": 413 }, { "epoch": 0.6524822695035462, "grad_norm": 0.19590893387794495, "learning_rate": 3.4132414694106684e-06, "log_odds_chosen": 0.6712747812271118, "log_odds_ratio": -0.4207912087440491, "logits/chosen": -0.16028505563735962, "logits/rejected": -1.4717087745666504, "logps/chosen": -1.498020052909851, "logps/rejected": -2.0545201301574707, "loss": 1.5811, "nll_loss": 1.5389834642410278, "rewards/accuracies": 1.0, "rewards/chosen": -0.14980201423168182, "rewards/margins": 0.055649999529123306, "rewards/rejected": -0.20545199513435364, "step": 414 }, { "epoch": 0.6540583136327817, "grad_norm": 0.20174475014209747, "learning_rate": 3.409346429423884e-06, "log_odds_chosen": 0.4537242650985718, "log_odds_ratio": -0.4941532015800476, "logits/chosen": -0.010996952652931213, "logits/rejected": -1.3585155010223389, "logps/chosen": -1.5672545433044434, "logps/rejected": -1.9409823417663574, "loss": 1.6455, "nll_loss": 1.5961326360702515, "rewards/accuracies": 1.0, "rewards/chosen": -0.1567254513502121, "rewards/margins": 0.03737279772758484, "rewards/rejected": -0.19409826397895813, "step": 415 }, { "epoch": 0.6556343577620173, "grad_norm": 0.19159899652004242, "learning_rate": 3.40544074249798e-06, "log_odds_chosen": 0.6167906522750854, "log_odds_ratio": -0.4354286193847656, "logits/chosen": -0.18299099802970886, "logits/rejected": -1.46063232421875, "logps/chosen": -1.5565929412841797, "logps/rejected": -2.0718507766723633, "loss": 1.6407, "nll_loss": 1.5971307754516602, "rewards/accuracies": 1.0, "rewards/chosen": -0.15565930306911469, "rewards/margins": 0.0515257902443409, "rewards/rejected": -0.20718510448932648, "step": 416 }, { "epoch": 0.6572104018912529, "grad_norm": 0.20851117372512817, "learning_rate": 3.401524438138556e-06, "log_odds_chosen": 0.45928677916526794, "log_odds_ratio": -0.49514341354370117, "logits/chosen": -0.13402409851551056, "logits/rejected": -1.1145453453063965, "logps/chosen": -1.5616354942321777, "logps/rejected": -1.9419609308242798, "loss": 1.6266, "nll_loss": 1.5770922899246216, "rewards/accuracies": 1.0, "rewards/chosen": -0.1561635434627533, "rewards/margins": 0.03803255409002304, "rewards/rejected": -0.19419609010219574, "step": 417 }, { "epoch": 0.6587864460204885, "grad_norm": 0.19792215526103973, "learning_rate": 3.39759754593142e-06, "log_odds_chosen": 0.4961496889591217, "log_odds_ratio": -0.4824981689453125, "logits/chosen": -0.1806434839963913, "logits/rejected": -1.2893097400665283, "logps/chosen": -1.6215399503707886, "logps/rejected": -2.039522409439087, "loss": 1.6951, "nll_loss": 1.646848440170288, "rewards/accuracies": 1.0, "rewards/chosen": -0.16215398907661438, "rewards/margins": 0.04179824888706207, "rewards/rejected": -0.20395225286483765, "step": 418 }, { "epoch": 0.6603624901497241, "grad_norm": 0.2001093477010727, "learning_rate": 3.3936600955423683e-06, "log_odds_chosen": 0.7080036997795105, "log_odds_ratio": -0.4158882200717926, "logits/chosen": -0.10217966884374619, "logits/rejected": -1.1086851358413696, "logps/chosen": -1.5155251026153564, "logps/rejected": -2.1040117740631104, "loss": 1.603, "nll_loss": 1.5614415407180786, "rewards/accuracies": 1.0, "rewards/chosen": -0.15155251324176788, "rewards/margins": 0.05884869024157524, "rewards/rejected": -0.21040120720863342, "step": 419 }, { "epoch": 0.6619385342789598, "grad_norm": 0.1921870857477188, "learning_rate": 3.3897121167169573e-06, "log_odds_chosen": 0.39313969016075134, "log_odds_ratio": -0.5258656144142151, "logits/chosen": -0.22022226452827454, "logits/rejected": -1.3231024742126465, "logps/chosen": -1.4893752336502075, "logps/rejected": -1.808854341506958, "loss": 1.5882, "nll_loss": 1.535656452178955, "rewards/accuracies": 0.875, "rewards/chosen": -0.14893752336502075, "rewards/margins": 0.03194789960980415, "rewards/rejected": -0.1808854341506958, "step": 420 }, { "epoch": 0.6635145784081954, "grad_norm": 0.20021358132362366, "learning_rate": 3.38575363928028e-06, "log_odds_chosen": 0.6059412360191345, "log_odds_ratio": -0.4419128894805908, "logits/chosen": -0.14556629955768585, "logits/rejected": -1.1496225595474243, "logps/chosen": -1.4978344440460205, "logps/rejected": -1.997003197669983, "loss": 1.5713, "nll_loss": 1.52711820602417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497834473848343, "rewards/margins": 0.04991687089204788, "rewards/rejected": -0.19970029592514038, "step": 421 }, { "epoch": 0.665090622537431, "grad_norm": 0.19506679475307465, "learning_rate": 3.3817846931367452e-06, "log_odds_chosen": 0.415115624666214, "log_odds_ratio": -0.5117157101631165, "logits/chosen": -0.22510936856269836, "logits/rejected": -1.2167201042175293, "logps/chosen": -1.4807124137878418, "logps/rejected": -1.8138527870178223, "loss": 1.5701, "nll_loss": 1.5189181566238403, "rewards/accuracies": 1.0, "rewards/chosen": -0.1480712592601776, "rewards/margins": 0.033314019441604614, "rewards/rejected": -0.18138529360294342, "step": 422 }, { "epoch": 0.6666666666666666, "grad_norm": 0.19481457769870758, "learning_rate": 3.377805308269844e-06, "log_odds_chosen": 0.6872407793998718, "log_odds_ratio": -0.42233163118362427, "logits/chosen": -0.11342249810695648, "logits/rejected": -1.5072932243347168, "logps/chosen": -1.5372941493988037, "logps/rejected": -2.1134226322174072, "loss": 1.6132, "nll_loss": 1.5709176063537598, "rewards/accuracies": 1.0, "rewards/chosen": -0.15372943878173828, "rewards/margins": 0.05761285126209259, "rewards/rejected": -0.21134227514266968, "step": 423 }, { "epoch": 0.6682427107959023, "grad_norm": 0.18579819798469543, "learning_rate": 3.3738155147419275e-06, "log_odds_chosen": 0.6220219135284424, "log_odds_ratio": -0.44024914503097534, "logits/chosen": -0.235686257481575, "logits/rejected": -1.2404701709747314, "logps/chosen": -1.5340425968170166, "logps/rejected": -2.0529518127441406, "loss": 1.5954, "nll_loss": 1.5513544082641602, "rewards/accuracies": 1.0, "rewards/chosen": -0.15340426564216614, "rewards/margins": 0.05189090967178345, "rewards/rejected": -0.2052951604127884, "step": 424 }, { "epoch": 0.6698187549251379, "grad_norm": 0.19320693612098694, "learning_rate": 3.3698153426939824e-06, "log_odds_chosen": 0.7084161639213562, "log_odds_ratio": -0.4107830226421356, "logits/chosen": -0.2280699759721756, "logits/rejected": -1.0708644390106201, "logps/chosen": -1.463280439376831, "logps/rejected": -2.0446677207946777, "loss": 1.5378, "nll_loss": 1.4967448711395264, "rewards/accuracies": 1.0, "rewards/chosen": -0.14632803201675415, "rewards/margins": 0.05813872069120407, "rewards/rejected": -0.2044667750597, "step": 425 }, { "epoch": 0.6713947990543735, "grad_norm": 0.20736993849277496, "learning_rate": 3.3658048223453954e-06, "log_odds_chosen": 0.662844717502594, "log_odds_ratio": -0.4233693480491638, "logits/chosen": -0.1891903281211853, "logits/rejected": -1.40634024143219, "logps/chosen": -1.6040951013565063, "logps/rejected": -2.1638996601104736, "loss": 1.6514, "nll_loss": 1.6090670824050903, "rewards/accuracies": 1.0, "rewards/chosen": -0.16040951013565063, "rewards/margins": 0.05598045140504837, "rewards/rejected": -0.2163899689912796, "step": 426 }, { "epoch": 0.6729708431836091, "grad_norm": 0.1907954066991806, "learning_rate": 3.3617839839937337e-06, "log_odds_chosen": 0.6645872592926025, "log_odds_ratio": -0.4254325330257416, "logits/chosen": -0.11419974267482758, "logits/rejected": -1.4858274459838867, "logps/chosen": -1.5667697191238403, "logps/rejected": -2.1278765201568604, "loss": 1.6334, "nll_loss": 1.5908530950546265, "rewards/accuracies": 1.0, "rewards/chosen": -0.15667694807052612, "rewards/margins": 0.056110695004463196, "rewards/rejected": -0.2127876579761505, "step": 427 }, { "epoch": 0.6745468873128447, "grad_norm": 0.20678722858428955, "learning_rate": 3.3577528580145107e-06, "log_odds_chosen": 0.3481862545013428, "log_odds_ratio": -0.5371396541595459, "logits/chosen": -0.14534202218055725, "logits/rejected": -1.29691481590271, "logps/chosen": -1.585597276687622, "logps/rejected": -1.8742812871932983, "loss": 1.6676, "nll_loss": 1.6138746738433838, "rewards/accuracies": 1.0, "rewards/chosen": -0.15855972468852997, "rewards/margins": 0.028868405148386955, "rewards/rejected": -0.18742814660072327, "step": 428 }, { "epoch": 0.6761229314420804, "grad_norm": 0.19353458285331726, "learning_rate": 3.353711474860956e-06, "log_odds_chosen": 0.5981341600418091, "log_odds_ratio": -0.45493775606155396, "logits/chosen": -0.1791481226682663, "logits/rejected": -1.231849193572998, "logps/chosen": -1.5348036289215088, "logps/rejected": -2.0394744873046875, "loss": 1.597, "nll_loss": 1.5514580011367798, "rewards/accuracies": 1.0, "rewards/chosen": -0.15348035097122192, "rewards/margins": 0.05046708881855011, "rewards/rejected": -0.20394745469093323, "step": 429 }, { "epoch": 0.677698975571316, "grad_norm": 0.18463017046451569, "learning_rate": 3.3496598650637916e-06, "log_odds_chosen": 0.569009006023407, "log_odds_ratio": -0.45516982674598694, "logits/chosen": -0.20293018221855164, "logits/rejected": -1.1590790748596191, "logps/chosen": -1.435080647468567, "logps/rejected": -1.8941256999969482, "loss": 1.5186, "nll_loss": 1.4731093645095825, "rewards/accuracies": 1.0, "rewards/chosen": -0.14350807666778564, "rewards/margins": 0.0459044985473156, "rewards/rejected": -0.18941256403923035, "step": 430 }, { "epoch": 0.6792750197005516, "grad_norm": 0.18710088729858398, "learning_rate": 3.3455980592309923e-06, "log_odds_chosen": 0.619581401348114, "log_odds_ratio": -0.4386385977268219, "logits/chosen": -0.23650380969047546, "logits/rejected": -1.2405811548233032, "logps/chosen": -1.4119963645935059, "logps/rejected": -1.9160082340240479, "loss": 1.5042, "nll_loss": 1.460310459136963, "rewards/accuracies": 1.0, "rewards/chosen": -0.14119963347911835, "rewards/margins": 0.05040118098258972, "rewards/rejected": -0.19160079956054688, "step": 431 }, { "epoch": 0.6808510638297872, "grad_norm": 0.1992560178041458, "learning_rate": 3.341526088047562e-06, "log_odds_chosen": 0.5567487478256226, "log_odds_ratio": -0.4619826078414917, "logits/chosen": -0.17194384336471558, "logits/rejected": -1.293556571006775, "logps/chosen": -1.6019841432571411, "logps/rejected": -2.070882558822632, "loss": 1.662, "nll_loss": 1.6158294677734375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601984202861786, "rewards/margins": 0.046889837831258774, "rewards/rejected": -0.20708826184272766, "step": 432 }, { "epoch": 0.6824271079590228, "grad_norm": 0.226112961769104, "learning_rate": 3.3374439822752972e-06, "log_odds_chosen": 0.44421204924583435, "log_odds_ratio": -0.49906378984451294, "logits/chosen": -0.12680430710315704, "logits/rejected": -1.1431465148925781, "logps/chosen": -1.5594537258148193, "logps/rejected": -1.9281821250915527, "loss": 1.649, "nll_loss": 1.5991100072860718, "rewards/accuracies": 1.0, "rewards/chosen": -0.15594536066055298, "rewards/margins": 0.036872848868370056, "rewards/rejected": -0.19281822443008423, "step": 433 }, { "epoch": 0.6840031520882585, "grad_norm": 0.207797572016716, "learning_rate": 3.333351772752559e-06, "log_odds_chosen": 0.5869032144546509, "log_odds_ratio": -0.4558618366718292, "logits/chosen": -0.23912350833415985, "logits/rejected": -1.287145972251892, "logps/chosen": -1.6291857957839966, "logps/rejected": -2.120633602142334, "loss": 1.6932, "nll_loss": 1.6475987434387207, "rewards/accuracies": 1.0, "rewards/chosen": -0.1629185825586319, "rewards/margins": 0.049144770950078964, "rewards/rejected": -0.21206337213516235, "step": 434 }, { "epoch": 0.6855791962174941, "grad_norm": 0.20063838362693787, "learning_rate": 3.3292494903940338e-06, "log_odds_chosen": 0.7061201333999634, "log_odds_ratio": -0.40912342071533203, "logits/chosen": -0.1544758826494217, "logits/rejected": -1.3016716241836548, "logps/chosen": -1.5344663858413696, "logps/rejected": -2.1211295127868652, "loss": 1.6071, "nll_loss": 1.5661424398422241, "rewards/accuracies": 1.0, "rewards/chosen": -0.15344664454460144, "rewards/margins": 0.05866629630327225, "rewards/rejected": -0.2121129333972931, "step": 435 }, { "epoch": 0.6871552403467297, "grad_norm": 0.1838790476322174, "learning_rate": 3.3251371661905063e-06, "log_odds_chosen": 0.6065340638160706, "log_odds_ratio": -0.44656994938850403, "logits/chosen": -0.1865832805633545, "logits/rejected": -1.140378713607788, "logps/chosen": -1.3614064455032349, "logps/rejected": -1.8547477722167969, "loss": 1.4488, "nll_loss": 1.4041305780410767, "rewards/accuracies": 1.0, "rewards/chosen": -0.1361406445503235, "rewards/margins": 0.04933411255478859, "rewards/rejected": -0.18547475337982178, "step": 436 }, { "epoch": 0.6887312844759653, "grad_norm": 0.21491163969039917, "learning_rate": 3.321014831208622e-06, "log_odds_chosen": 0.5981911420822144, "log_odds_ratio": -0.44269564747810364, "logits/chosen": -0.19441677629947662, "logits/rejected": -1.1200268268585205, "logps/chosen": -1.4213942289352417, "logps/rejected": -1.9019947052001953, "loss": 1.5169, "nll_loss": 1.4725940227508545, "rewards/accuracies": 1.0, "rewards/chosen": -0.14213941991329193, "rewards/margins": 0.04806005209684372, "rewards/rejected": -0.19019947946071625, "step": 437 }, { "epoch": 0.6903073286052009, "grad_norm": 0.2163185477256775, "learning_rate": 3.316882516590652e-06, "log_odds_chosen": 0.6079857349395752, "log_odds_ratio": -0.43692946434020996, "logits/chosen": -0.18331696093082428, "logits/rejected": -1.3628792762756348, "logps/chosen": -1.4688149690628052, "logps/rejected": -1.966677188873291, "loss": 1.5508, "nll_loss": 1.5071358680725098, "rewards/accuracies": 1.0, "rewards/chosen": -0.14688150584697723, "rewards/margins": 0.04978622496128082, "rewards/rejected": -0.19666773080825806, "step": 438 }, { "epoch": 0.6918833727344366, "grad_norm": 0.21325580775737762, "learning_rate": 3.31274025355426e-06, "log_odds_chosen": 0.5819729566574097, "log_odds_ratio": -0.4491526484489441, "logits/chosen": -0.22322604060173035, "logits/rejected": -1.189731478691101, "logps/chosen": -1.5592323541641235, "logps/rejected": -2.043937921524048, "loss": 1.6116, "nll_loss": 1.5666409730911255, "rewards/accuracies": 1.0, "rewards/chosen": -0.15592323243618011, "rewards/margins": 0.04847054183483124, "rewards/rejected": -0.20439377427101135, "step": 439 }, { "epoch": 0.6934594168636722, "grad_norm": 0.20833250880241394, "learning_rate": 3.308588073392265e-06, "log_odds_chosen": 0.5521525144577026, "log_odds_ratio": -0.46361684799194336, "logits/chosen": -0.2142220437526703, "logits/rejected": -1.1978386640548706, "logps/chosen": -1.5905332565307617, "logps/rejected": -2.054898738861084, "loss": 1.6463, "nll_loss": 1.599968433380127, "rewards/accuracies": 1.0, "rewards/chosen": -0.15905332565307617, "rewards/margins": 0.046436551958322525, "rewards/rejected": -0.2054898738861084, "step": 440 }, { "epoch": 0.6950354609929078, "grad_norm": 0.19477160274982452, "learning_rate": 3.3044260074724035e-06, "log_odds_chosen": 0.6352304220199585, "log_odds_ratio": -0.43725699186325073, "logits/chosen": -0.20722348988056183, "logits/rejected": -1.4436430931091309, "logps/chosen": -1.5010461807250977, "logps/rejected": -2.0235297679901123, "loss": 1.5782, "nll_loss": 1.5344798564910889, "rewards/accuracies": 1.0, "rewards/chosen": -0.15010464191436768, "rewards/margins": 0.05224834755063057, "rewards/rejected": -0.20235297083854675, "step": 441 }, { "epoch": 0.6966115051221434, "grad_norm": 0.18534111976623535, "learning_rate": 3.300254087237097e-06, "log_odds_chosen": 0.5580976605415344, "log_odds_ratio": -0.457084059715271, "logits/chosen": -0.16514423489570618, "logits/rejected": -1.3200604915618896, "logps/chosen": -1.3929381370544434, "logps/rejected": -1.8415474891662598, "loss": 1.4993, "nll_loss": 1.4535483121871948, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392938196659088, "rewards/margins": 0.044860921800136566, "rewards/rejected": -0.18415474891662598, "step": 442 }, { "epoch": 0.698187549251379, "grad_norm": 0.26748111844062805, "learning_rate": 3.2960723442032105e-06, "log_odds_chosen": 0.7100386619567871, "log_odds_ratio": -0.4068644642829895, "logits/chosen": -0.23274515569210052, "logits/rejected": -1.637979507446289, "logps/chosen": -1.590896725654602, "logps/rejected": -2.1930923461914062, "loss": 1.6465, "nll_loss": 1.6058528423309326, "rewards/accuracies": 1.0, "rewards/chosen": -0.15908968448638916, "rewards/margins": 0.06021953374147415, "rewards/rejected": -0.21930919587612152, "step": 443 }, { "epoch": 0.6997635933806147, "grad_norm": 0.1990683227777481, "learning_rate": 3.291880809961814e-06, "log_odds_chosen": 0.6279516220092773, "log_odds_ratio": -0.4397643506526947, "logits/chosen": -0.15238967537879944, "logits/rejected": -1.2192305326461792, "logps/chosen": -1.5531084537506104, "logps/rejected": -2.074535369873047, "loss": 1.6241, "nll_loss": 1.5800902843475342, "rewards/accuracies": 1.0, "rewards/chosen": -0.15531083941459656, "rewards/margins": 0.0521426796913147, "rewards/rejected": -0.20745351910591125, "step": 444 }, { "epoch": 0.7013396375098503, "grad_norm": 0.19820798933506012, "learning_rate": 3.2876795161779473e-06, "log_odds_chosen": 0.7250336408615112, "log_odds_ratio": -0.3975331783294678, "logits/chosen": -0.18375059962272644, "logits/rejected": -1.543222188949585, "logps/chosen": -1.5645023584365845, "logps/rejected": -2.1766436100006104, "loss": 1.6162, "nll_loss": 1.5764946937561035, "rewards/accuracies": 1.0, "rewards/chosen": -0.15645024180412292, "rewards/margins": 0.0612141489982605, "rewards/rejected": -0.21766439080238342, "step": 445 }, { "epoch": 0.7029156816390859, "grad_norm": 0.19689838588237762, "learning_rate": 3.2834684945903776e-06, "log_odds_chosen": 0.5597304105758667, "log_odds_ratio": -0.45593225955963135, "logits/chosen": -0.2338670790195465, "logits/rejected": -1.2486504316329956, "logps/chosen": -1.4915810823440552, "logps/rejected": -1.9466543197631836, "loss": 1.572, "nll_loss": 1.5264508724212646, "rewards/accuracies": 1.0, "rewards/chosen": -0.14915812015533447, "rewards/margins": 0.045507319271564484, "rewards/rejected": -0.19466543197631836, "step": 446 }, { "epoch": 0.7044917257683215, "grad_norm": 0.2056231051683426, "learning_rate": 3.2792477770113624e-06, "log_odds_chosen": 0.5060315728187561, "log_odds_ratio": -0.47748908400535583, "logits/chosen": -0.32436949014663696, "logits/rejected": -1.376452922821045, "logps/chosen": -1.5770741701126099, "logps/rejected": -1.9976955652236938, "loss": 1.6348, "nll_loss": 1.5870327949523926, "rewards/accuracies": 1.0, "rewards/chosen": -0.15770742297172546, "rewards/margins": 0.04206214100122452, "rewards/rejected": -0.19976955652236938, "step": 447 }, { "epoch": 0.7060677698975572, "grad_norm": 0.2036747932434082, "learning_rate": 3.275017395326407e-06, "log_odds_chosen": 0.4934311509132385, "log_odds_ratio": -0.48520928621292114, "logits/chosen": -0.13775332272052765, "logits/rejected": -1.1801694631576538, "logps/chosen": -1.4680533409118652, "logps/rejected": -1.8637882471084595, "loss": 1.5528, "nll_loss": 1.5042613744735718, "rewards/accuracies": 0.875, "rewards/chosen": -0.14680534601211548, "rewards/margins": 0.039573490619659424, "rewards/rejected": -0.1863788366317749, "step": 448 }, { "epoch": 0.7076438140267928, "grad_norm": 0.1903351992368698, "learning_rate": 3.2707773814940244e-06, "log_odds_chosen": 0.6554431915283203, "log_odds_ratio": -0.42446404695510864, "logits/chosen": -0.15731389820575714, "logits/rejected": -1.3429402112960815, "logps/chosen": -1.4967751502990723, "logps/rejected": -2.0305981636047363, "loss": 1.5792, "nll_loss": 1.5367555618286133, "rewards/accuracies": 1.0, "rewards/chosen": -0.14967751502990723, "rewards/margins": 0.05338229984045029, "rewards/rejected": -0.20305980741977692, "step": 449 }, { "epoch": 0.7092198581560284, "grad_norm": 0.18632066249847412, "learning_rate": 3.2665277675454935e-06, "log_odds_chosen": 0.6589217185974121, "log_odds_ratio": -0.43469709157943726, "logits/chosen": -0.22264309227466583, "logits/rejected": -1.58710515499115, "logps/chosen": -1.5806881189346313, "logps/rejected": -2.1421871185302734, "loss": 1.6344, "nll_loss": 1.590897798538208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15806882083415985, "rewards/margins": 0.05614989995956421, "rewards/rejected": -0.21421872079372406, "step": 450 }, { "epoch": 0.710795902285264, "grad_norm": 0.1898472160100937, "learning_rate": 3.262268585584619e-06, "log_odds_chosen": 0.6008990406990051, "log_odds_ratio": -0.440044105052948, "logits/chosen": -0.1463158279657364, "logits/rejected": -1.3233999013900757, "logps/chosen": -1.5258179903030396, "logps/rejected": -2.0252106189727783, "loss": 1.5941, "nll_loss": 1.5501309633255005, "rewards/accuracies": 1.0, "rewards/chosen": -0.15258179605007172, "rewards/margins": 0.04993927478790283, "rewards/rejected": -0.20252105593681335, "step": 451 }, { "epoch": 0.7123719464144996, "grad_norm": 0.20325587689876556, "learning_rate": 3.2579998677874853e-06, "log_odds_chosen": 0.691182017326355, "log_odds_ratio": -0.41538918018341064, "logits/chosen": -0.23037710785865784, "logits/rejected": -1.2760132551193237, "logps/chosen": -1.5629483461380005, "logps/rejected": -2.1457886695861816, "loss": 1.6173, "nll_loss": 1.5757336616516113, "rewards/accuracies": 1.0, "rewards/chosen": -0.1562948226928711, "rewards/margins": 0.05828403681516647, "rewards/rejected": -0.21457885205745697, "step": 452 }, { "epoch": 0.7139479905437353, "grad_norm": 0.18862774968147278, "learning_rate": 3.2537216464022155e-06, "log_odds_chosen": 0.6802084445953369, "log_odds_ratio": -0.41921448707580566, "logits/chosen": -0.20463165640830994, "logits/rejected": -1.2661035060882568, "logps/chosen": -1.5623295307159424, "logps/rejected": -2.129488945007324, "loss": 1.6273, "nll_loss": 1.585338830947876, "rewards/accuracies": 1.0, "rewards/chosen": -0.15623293817043304, "rewards/margins": 0.0567159429192543, "rewards/rejected": -0.21294888854026794, "step": 453 }, { "epoch": 0.7155240346729709, "grad_norm": 0.1871948093175888, "learning_rate": 3.2494339537487314e-06, "log_odds_chosen": 0.5408557653427124, "log_odds_ratio": -0.46073442697525024, "logits/chosen": -0.23007997870445251, "logits/rejected": -1.5315394401550293, "logps/chosen": -1.5641494989395142, "logps/rejected": -2.0117013454437256, "loss": 1.6397, "nll_loss": 1.5936379432678223, "rewards/accuracies": 1.0, "rewards/chosen": -0.1564149558544159, "rewards/margins": 0.04475518316030502, "rewards/rejected": -0.2011701464653015, "step": 454 }, { "epoch": 0.7171000788022065, "grad_norm": 0.18707512319087982, "learning_rate": 3.2451368222185006e-06, "log_odds_chosen": 0.454687237739563, "log_odds_ratio": -0.4977053105831146, "logits/chosen": -0.2190241664648056, "logits/rejected": -1.1657060384750366, "logps/chosen": -1.5094616413116455, "logps/rejected": -1.8778409957885742, "loss": 1.5921, "nll_loss": 1.5422812700271606, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094618499279022, "rewards/margins": 0.03683791682124138, "rewards/rejected": -0.1877840906381607, "step": 455 }, { "epoch": 0.7186761229314421, "grad_norm": 0.17670530080795288, "learning_rate": 3.2408302842743007e-06, "log_odds_chosen": 0.6599798798561096, "log_odds_ratio": -0.4245462715625763, "logits/chosen": -0.1710800975561142, "logits/rejected": -1.3067679405212402, "logps/chosen": -1.4607981443405151, "logps/rejected": -2.002277374267578, "loss": 1.5484, "nll_loss": 1.505940556526184, "rewards/accuracies": 1.0, "rewards/chosen": -0.14607983827590942, "rewards/margins": 0.05414789542555809, "rewards/rejected": -0.20022772252559662, "step": 456 }, { "epoch": 0.7202521670606777, "grad_norm": 0.18531948328018188, "learning_rate": 3.2365143724499684e-06, "log_odds_chosen": 0.590911865234375, "log_odds_ratio": -0.4425351023674011, "logits/chosen": -0.1856268346309662, "logits/rejected": -1.3595974445343018, "logps/chosen": -1.5068074464797974, "logps/rejected": -1.9935933351516724, "loss": 1.5948, "nll_loss": 1.5505702495574951, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506807506084442, "rewards/margins": 0.04867858439683914, "rewards/rejected": -0.19935932755470276, "step": 457 }, { "epoch": 0.7218282111899134, "grad_norm": 0.19168265163898468, "learning_rate": 3.2321891193501564e-06, "log_odds_chosen": 0.5883606672286987, "log_odds_ratio": -0.4528968632221222, "logits/chosen": -0.16482576727867126, "logits/rejected": -0.8930314779281616, "logps/chosen": -1.510907530784607, "logps/rejected": -1.9958248138427734, "loss": 1.5783, "nll_loss": 1.532994031906128, "rewards/accuracies": 1.0, "rewards/chosen": -0.15109075605869293, "rewards/margins": 0.04849172383546829, "rewards/rejected": -0.19958247244358063, "step": 458 }, { "epoch": 0.723404255319149, "grad_norm": 0.18463407456874847, "learning_rate": 3.2278545576500858e-06, "log_odds_chosen": 0.8210570812225342, "log_odds_ratio": -0.37329068779945374, "logits/chosen": -0.08877343684434891, "logits/rejected": -1.078304648399353, "logps/chosen": -1.3643009662628174, "logps/rejected": -2.0353879928588867, "loss": 1.4512, "nll_loss": 1.4138658046722412, "rewards/accuracies": 1.0, "rewards/chosen": -0.13643008470535278, "rewards/margins": 0.06710872799158096, "rewards/rejected": -0.20353882014751434, "step": 459 }, { "epoch": 0.7249802994483846, "grad_norm": 0.19575412571430206, "learning_rate": 3.223510720095299e-06, "log_odds_chosen": 0.785904049873352, "log_odds_ratio": -0.38319867849349976, "logits/chosen": -0.2679174542427063, "logits/rejected": -1.3926105499267578, "logps/chosen": -1.5412414073944092, "logps/rejected": -2.202934980392456, "loss": 1.5979, "nll_loss": 1.5595486164093018, "rewards/accuracies": 1.0, "rewards/chosen": -0.15412414073944092, "rewards/margins": 0.0661693587899208, "rewards/rejected": -0.22029350697994232, "step": 460 }, { "epoch": 0.7265563435776202, "grad_norm": 0.18656474351882935, "learning_rate": 3.2191576395014158e-06, "log_odds_chosen": 0.7210904359817505, "log_odds_ratio": -0.3992398679256439, "logits/chosen": -0.20938719809055328, "logits/rejected": -1.283248782157898, "logps/chosen": -1.4764115810394287, "logps/rejected": -2.072150707244873, "loss": 1.5461, "nll_loss": 1.5061570405960083, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476411670446396, "rewards/margins": 0.059573911130428314, "rewards/rejected": -0.2072150707244873, "step": 461 }, { "epoch": 0.7281323877068558, "grad_norm": 0.18474119901657104, "learning_rate": 3.2147953487538794e-06, "log_odds_chosen": 0.6938648223876953, "log_odds_ratio": -0.4105943739414215, "logits/chosen": -0.14119039475917816, "logits/rejected": -1.3704811334609985, "logps/chosen": -1.4421080350875854, "logps/rejected": -2.0102922916412354, "loss": 1.514, "nll_loss": 1.4729448556900024, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442108154296875, "rewards/margins": 0.05681842565536499, "rewards/rejected": -0.2010292410850525, "step": 462 }, { "epoch": 0.7297084318360915, "grad_norm": 0.19482626020908356, "learning_rate": 3.2104238808077133e-06, "log_odds_chosen": 0.5768538117408752, "log_odds_ratio": -0.45441049337387085, "logits/chosen": -0.169452965259552, "logits/rejected": -1.0683661699295044, "logps/chosen": -1.4814167022705078, "logps/rejected": -1.9551403522491455, "loss": 1.5563, "nll_loss": 1.5108129978179932, "rewards/accuracies": 1.0, "rewards/chosen": -0.14814168214797974, "rewards/margins": 0.04737236350774765, "rewards/rejected": -0.1955140382051468, "step": 463 }, { "epoch": 0.731284475965327, "grad_norm": 0.18169742822647095, "learning_rate": 3.2060432686872704e-06, "log_odds_chosen": 0.8345743417739868, "log_odds_ratio": -0.3699982464313507, "logits/chosen": -0.2313491553068161, "logits/rejected": -1.2104275226593018, "logps/chosen": -1.387764811515808, "logps/rejected": -2.067291736602783, "loss": 1.4733, "nll_loss": 1.436316728591919, "rewards/accuracies": 1.0, "rewards/chosen": -0.138776496052742, "rewards/margins": 0.06795267760753632, "rewards/rejected": -0.20672915875911713, "step": 464 }, { "epoch": 0.7328605200945626, "grad_norm": 0.18539521098136902, "learning_rate": 3.201653545485982e-06, "log_odds_chosen": 0.6590836048126221, "log_odds_ratio": -0.42436298727989197, "logits/chosen": -0.14550940692424774, "logits/rejected": -1.2948503494262695, "logps/chosen": -1.5323055982589722, "logps/rejected": -2.083099365234375, "loss": 1.5889, "nll_loss": 1.5465004444122314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15323056280612946, "rewards/margins": 0.0550793781876564, "rewards/rejected": -0.20830994844436646, "step": 465 }, { "epoch": 0.7344365642237982, "grad_norm": 0.2062307447195053, "learning_rate": 3.197254744366111e-06, "log_odds_chosen": 0.67624831199646, "log_odds_ratio": -0.41811689734458923, "logits/chosen": -0.1269284039735794, "logits/rejected": -1.2436720132827759, "logps/chosen": -1.442671775817871, "logps/rejected": -1.994502305984497, "loss": 1.519, "nll_loss": 1.4771640300750732, "rewards/accuracies": 1.0, "rewards/chosen": -0.14426717162132263, "rewards/margins": 0.0551830530166626, "rewards/rejected": -0.19945020973682404, "step": 466 }, { "epoch": 0.7360126083530338, "grad_norm": 0.19510377943515778, "learning_rate": 3.192846898558498e-06, "log_odds_chosen": 0.5479187369346619, "log_odds_ratio": -0.4584289491176605, "logits/chosen": -0.17342276871204376, "logits/rejected": -1.2843399047851562, "logps/chosen": -1.5753339529037476, "logps/rejected": -2.0328989028930664, "loss": 1.6463, "nll_loss": 1.600473403930664, "rewards/accuracies": 1.0, "rewards/chosen": -0.15753339231014252, "rewards/margins": 0.045756496489048004, "rewards/rejected": -0.20328989624977112, "step": 467 }, { "epoch": 0.7375886524822695, "grad_norm": 0.19615499675273895, "learning_rate": 3.188430041362313e-06, "log_odds_chosen": 0.5022854804992676, "log_odds_ratio": -0.4808230698108673, "logits/chosen": -0.1259315460920334, "logits/rejected": -1.3108328580856323, "logps/chosen": -1.5695524215698242, "logps/rejected": -1.9884154796600342, "loss": 1.6285, "nll_loss": 1.580439567565918, "rewards/accuracies": 1.0, "rewards/chosen": -0.15695525705814362, "rewards/margins": 0.04188628867268562, "rewards/rejected": -0.19884154200553894, "step": 468 }, { "epoch": 0.7391646966115051, "grad_norm": 0.20162400603294373, "learning_rate": 3.184004206144803e-06, "log_odds_chosen": 0.7329556941986084, "log_odds_ratio": -0.4027223289012909, "logits/chosen": -0.23247480392456055, "logits/rejected": -1.2680878639221191, "logps/chosen": -1.4653428792953491, "logps/rejected": -2.0662801265716553, "loss": 1.5357, "nll_loss": 1.4954301118850708, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465342789888382, "rewards/margins": 0.06009373068809509, "rewards/rejected": -0.20662802457809448, "step": 469 }, { "epoch": 0.7407407407407407, "grad_norm": 0.18583819270133972, "learning_rate": 3.1795694263410386e-06, "log_odds_chosen": 0.7982565760612488, "log_odds_ratio": -0.38432639837265015, "logits/chosen": -0.19295667111873627, "logits/rejected": -1.293751835823059, "logps/chosen": -1.4557034969329834, "logps/rejected": -2.122990608215332, "loss": 1.5197, "nll_loss": 1.48124098777771, "rewards/accuracies": 1.0, "rewards/chosen": -0.14557035267353058, "rewards/margins": 0.06672872602939606, "rewards/rejected": -0.21229907870292664, "step": 470 }, { "epoch": 0.7423167848699763, "grad_norm": 0.1854601353406906, "learning_rate": 3.1751257354536634e-06, "log_odds_chosen": 0.552662193775177, "log_odds_ratio": -0.4665309488773346, "logits/chosen": -0.1528901308774948, "logits/rejected": -1.371885061264038, "logps/chosen": -1.4518961906433105, "logps/rejected": -1.9063466787338257, "loss": 1.5332, "nll_loss": 1.4865120649337769, "rewards/accuracies": 0.875, "rewards/chosen": -0.14518961310386658, "rewards/margins": 0.04544505476951599, "rewards/rejected": -0.19063468277454376, "step": 471 }, { "epoch": 0.7438928289992119, "grad_norm": 0.1899978667497635, "learning_rate": 3.1706731670526394e-06, "log_odds_chosen": 0.6217374205589294, "log_odds_ratio": -0.43196773529052734, "logits/chosen": -0.2525237500667572, "logits/rejected": -1.3043723106384277, "logps/chosen": -1.5042320489883423, "logps/rejected": -2.0150396823883057, "loss": 1.5598, "nll_loss": 1.5165841579437256, "rewards/accuracies": 1.0, "rewards/chosen": -0.15042319893836975, "rewards/margins": 0.05108076333999634, "rewards/rejected": -0.2015039622783661, "step": 472 }, { "epoch": 0.7454688731284476, "grad_norm": 0.1951638162136078, "learning_rate": 3.166211754774994e-06, "log_odds_chosen": 0.6629724502563477, "log_odds_ratio": -0.422730028629303, "logits/chosen": -0.20018966495990753, "logits/rejected": -1.4212433099746704, "logps/chosen": -1.5557457208633423, "logps/rejected": -2.1103403568267822, "loss": 1.6437, "nll_loss": 1.6013872623443604, "rewards/accuracies": 1.0, "rewards/chosen": -0.15557457506656647, "rewards/margins": 0.055459462106227875, "rewards/rejected": -0.21103402972221375, "step": 473 }, { "epoch": 0.7470449172576832, "grad_norm": 0.19168664515018463, "learning_rate": 3.1617415323245665e-06, "log_odds_chosen": 0.6726161241531372, "log_odds_ratio": -0.42290619015693665, "logits/chosen": -0.2400115728378296, "logits/rejected": -1.3783491849899292, "logps/chosen": -1.4658328294754028, "logps/rejected": -2.0105373859405518, "loss": 1.5266, "nll_loss": 1.4843122959136963, "rewards/accuracies": 1.0, "rewards/chosen": -0.14658328890800476, "rewards/margins": 0.054470453411340714, "rewards/rejected": -0.20105375349521637, "step": 474 }, { "epoch": 0.7486209613869188, "grad_norm": 0.18189279735088348, "learning_rate": 3.157262533471752e-06, "log_odds_chosen": 0.7140947580337524, "log_odds_ratio": -0.40235432982444763, "logits/chosen": -0.15209892392158508, "logits/rejected": -1.3825089931488037, "logps/chosen": -1.5169684886932373, "logps/rejected": -2.1144258975982666, "loss": 1.5855, "nll_loss": 1.54523503780365, "rewards/accuracies": 1.0, "rewards/chosen": -0.15169686079025269, "rewards/margins": 0.05974572151899338, "rewards/rejected": -0.21144257485866547, "step": 475 }, { "epoch": 0.7501970055161544, "grad_norm": 0.19609789550304413, "learning_rate": 3.1527747920532468e-06, "log_odds_chosen": 0.565768837928772, "log_odds_ratio": -0.4514068365097046, "logits/chosen": -0.16712833940982819, "logits/rejected": -1.1320550441741943, "logps/chosen": -1.4766864776611328, "logps/rejected": -1.9391474723815918, "loss": 1.5527, "nll_loss": 1.5075533390045166, "rewards/accuracies": 1.0, "rewards/chosen": -0.14766864478588104, "rewards/margins": 0.04624609276652336, "rewards/rejected": -0.1939147412776947, "step": 476 }, { "epoch": 0.75177304964539, "grad_norm": 0.19328206777572632, "learning_rate": 3.148278341971795e-06, "log_odds_chosen": 0.65244460105896, "log_odds_ratio": -0.4249870181083679, "logits/chosen": -0.22436018288135529, "logits/rejected": -1.223185420036316, "logps/chosen": -1.5077931880950928, "logps/rejected": -2.0486738681793213, "loss": 1.5739, "nll_loss": 1.5314006805419922, "rewards/accuracies": 1.0, "rewards/chosen": -0.15077932178974152, "rewards/margins": 0.054088056087493896, "rewards/rejected": -0.2048673778772354, "step": 477 }, { "epoch": 0.7533490937746257, "grad_norm": 0.20173610746860504, "learning_rate": 3.143773217195929e-06, "log_odds_chosen": 0.73219895362854, "log_odds_ratio": -0.4004945158958435, "logits/chosen": -0.2401634305715561, "logits/rejected": -1.3643122911453247, "logps/chosen": -1.5717616081237793, "logps/rejected": -2.18953800201416, "loss": 1.6288, "nll_loss": 1.5887385606765747, "rewards/accuracies": 1.0, "rewards/chosen": -0.1571761518716812, "rewards/margins": 0.06177765130996704, "rewards/rejected": -0.21895381808280945, "step": 478 }, { "epoch": 0.7549251379038613, "grad_norm": 0.19656141102313995, "learning_rate": 3.139259451759714e-06, "log_odds_chosen": 0.5744882225990295, "log_odds_ratio": -0.4510755240917206, "logits/chosen": -0.24257460236549377, "logits/rejected": -1.3083908557891846, "logps/chosen": -1.5712709426879883, "logps/rejected": -2.050100564956665, "loss": 1.644, "nll_loss": 1.5989316701889038, "rewards/accuracies": 1.0, "rewards/chosen": -0.15712709724903107, "rewards/margins": 0.047882966697216034, "rewards/rejected": -0.2050100713968277, "step": 479 }, { "epoch": 0.7565011820330969, "grad_norm": 0.18602579832077026, "learning_rate": 3.134737079762493e-06, "log_odds_chosen": 0.6003292202949524, "log_odds_ratio": -0.4424300193786621, "logits/chosen": -0.1941242814064026, "logits/rejected": -1.311800479888916, "logps/chosen": -1.501556634902954, "logps/rejected": -1.9944710731506348, "loss": 1.5673, "nll_loss": 1.5230939388275146, "rewards/accuracies": 1.0, "rewards/chosen": -0.15015564858913422, "rewards/margins": 0.04929143935441971, "rewards/rejected": -0.19944709539413452, "step": 480 }, { "epoch": 0.7580772261623325, "grad_norm": 0.20337559282779694, "learning_rate": 3.130206135368626e-06, "log_odds_chosen": 0.6041734218597412, "log_odds_ratio": -0.4474312365055084, "logits/chosen": -0.24175474047660828, "logits/rejected": -0.9721249341964722, "logps/chosen": -1.425898790359497, "logps/rejected": -1.9149250984191895, "loss": 1.5167, "nll_loss": 1.471928596496582, "rewards/accuracies": 1.0, "rewards/chosen": -0.14258988201618195, "rewards/margins": 0.04890260472893715, "rewards/rejected": -0.19149249792099, "step": 481 }, { "epoch": 0.7596532702915682, "grad_norm": 0.19193829596042633, "learning_rate": 3.1256666528072327e-06, "log_odds_chosen": 0.7867165803909302, "log_odds_ratio": -0.38077130913734436, "logits/chosen": -0.23528993129730225, "logits/rejected": -1.075020670890808, "logps/chosen": -1.472687840461731, "logps/rejected": -2.124319553375244, "loss": 1.5375, "nll_loss": 1.499396800994873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14726878702640533, "rewards/margins": 0.06516318768262863, "rewards/rejected": -0.21243198215961456, "step": 482 }, { "epoch": 0.7612293144208038, "grad_norm": 0.18599654734134674, "learning_rate": 3.121118666371937e-06, "log_odds_chosen": 0.595000147819519, "log_odds_ratio": -0.4452923834323883, "logits/chosen": -0.15184305608272552, "logits/rejected": -1.538995623588562, "logps/chosen": -1.5383219718933105, "logps/rejected": -2.033515453338623, "loss": 1.5951, "nll_loss": 1.550559639930725, "rewards/accuracies": 1.0, "rewards/chosen": -0.15383221209049225, "rewards/margins": 0.049519333988428116, "rewards/rejected": -0.20335155725479126, "step": 483 }, { "epoch": 0.7628053585500394, "grad_norm": 0.21291442215442657, "learning_rate": 3.1165622104206034e-06, "log_odds_chosen": 0.770659863948822, "log_odds_ratio": -0.3848019242286682, "logits/chosen": -0.3678995370864868, "logits/rejected": -1.2183688879013062, "logps/chosen": -1.4637348651885986, "logps/rejected": -2.1029365062713623, "loss": 1.5321, "nll_loss": 1.4936531782150269, "rewards/accuracies": 1.0, "rewards/chosen": -0.14637349545955658, "rewards/margins": 0.06392017006874084, "rewards/rejected": -0.21029365062713623, "step": 484 }, { "epoch": 0.764381402679275, "grad_norm": 0.18177950382232666, "learning_rate": 3.1119973193750816e-06, "log_odds_chosen": 0.6704604029655457, "log_odds_ratio": -0.41766875982284546, "logits/chosen": -0.2663368880748749, "logits/rejected": -1.2851604223251343, "logps/chosen": -1.475534439086914, "logps/rejected": -2.027827739715576, "loss": 1.536, "nll_loss": 1.4942355155944824, "rewards/accuracies": 1.0, "rewards/chosen": -0.1475534588098526, "rewards/margins": 0.05522932484745979, "rewards/rejected": -0.2027827799320221, "step": 485 }, { "epoch": 0.7659574468085106, "grad_norm": 0.21737127006053925, "learning_rate": 3.1074240277209408e-06, "log_odds_chosen": 0.6194983124732971, "log_odds_ratio": -0.4379619061946869, "logits/chosen": -0.24437181651592255, "logits/rejected": -1.2440481185913086, "logps/chosen": -1.494814157485962, "logps/rejected": -2.00154709815979, "loss": 1.5423, "nll_loss": 1.4985466003417969, "rewards/accuracies": 1.0, "rewards/chosen": -0.1494814157485962, "rewards/margins": 0.05067329481244087, "rewards/rejected": -0.20015469193458557, "step": 486 }, { "epoch": 0.7675334909377463, "grad_norm": 0.20863457024097443, "learning_rate": 3.102842370007217e-06, "log_odds_chosen": 0.6833222508430481, "log_odds_ratio": -0.41733595728874207, "logits/chosen": -0.16676893830299377, "logits/rejected": -1.15752375125885, "logps/chosen": -1.5041608810424805, "logps/rejected": -2.0647218227386475, "loss": 1.5703, "nll_loss": 1.5285258293151855, "rewards/accuracies": 1.0, "rewards/chosen": -0.1504160761833191, "rewards/margins": 0.05605611205101013, "rewards/rejected": -0.20647220313549042, "step": 487 }, { "epoch": 0.7691095350669819, "grad_norm": 0.1944776177406311, "learning_rate": 3.0982523808461454e-06, "log_odds_chosen": 0.5529102683067322, "log_odds_ratio": -0.4605258107185364, "logits/chosen": -0.15431943535804749, "logits/rejected": -1.2589623928070068, "logps/chosen": -1.5484925508499146, "logps/rejected": -2.0021567344665527, "loss": 1.6075, "nll_loss": 1.561496376991272, "rewards/accuracies": 1.0, "rewards/chosen": -0.15484926104545593, "rewards/margins": 0.04536642134189606, "rewards/rejected": -0.2002156674861908, "step": 488 }, { "epoch": 0.7706855791962175, "grad_norm": 0.18545880913734436, "learning_rate": 3.0936540949129006e-06, "log_odds_chosen": 0.6402296423912048, "log_odds_ratio": -0.4339551031589508, "logits/chosen": -0.2207900583744049, "logits/rejected": -1.1871285438537598, "logps/chosen": -1.5241451263427734, "logps/rejected": -2.057253122329712, "loss": 1.5905, "nll_loss": 1.5471105575561523, "rewards/accuracies": 1.0, "rewards/chosen": -0.15241453051567078, "rewards/margins": 0.053310781717300415, "rewards/rejected": -0.20572529733181, "step": 489 }, { "epoch": 0.7722616233254531, "grad_norm": 0.178094744682312, "learning_rate": 3.0890475469453378e-06, "log_odds_chosen": 0.7427234053611755, "log_odds_ratio": -0.40040361881256104, "logits/chosen": -0.2081877589225769, "logits/rejected": -1.3924274444580078, "logps/chosen": -1.4031827449798584, "logps/rejected": -2.006999969482422, "loss": 1.4721, "nll_loss": 1.4321045875549316, "rewards/accuracies": 1.0, "rewards/chosen": -0.14031827449798584, "rewards/margins": 0.06038173660635948, "rewards/rejected": -0.20069998502731323, "step": 490 }, { "epoch": 0.7738376674546887, "grad_norm": 0.18201249837875366, "learning_rate": 3.0844327717437263e-06, "log_odds_chosen": 0.6974368691444397, "log_odds_ratio": -0.41516321897506714, "logits/chosen": -0.21113747358322144, "logits/rejected": -1.2565526962280273, "logps/chosen": -1.4272940158843994, "logps/rejected": -2.0010313987731934, "loss": 1.4995, "nll_loss": 1.4580097198486328, "rewards/accuracies": 1.0, "rewards/chosen": -0.14272941648960114, "rewards/margins": 0.057373758405447006, "rewards/rejected": -0.20010316371917725, "step": 491 }, { "epoch": 0.7754137115839244, "grad_norm": 0.1853957176208496, "learning_rate": 3.0798098041704892e-06, "log_odds_chosen": 0.5092182159423828, "log_odds_ratio": -0.47541776299476624, "logits/chosen": -0.15838466584682465, "logits/rejected": -1.3052377700805664, "logps/chosen": -1.361024260520935, "logps/rejected": -1.7661519050598145, "loss": 1.4535, "nll_loss": 1.405916690826416, "rewards/accuracies": 1.0, "rewards/chosen": -0.13610242307186127, "rewards/margins": 0.04051277041435242, "rewards/rejected": -0.1766151785850525, "step": 492 }, { "epoch": 0.77698975571316, "grad_norm": 0.1893599033355713, "learning_rate": 3.0751786791499368e-06, "log_odds_chosen": 0.6547857522964478, "log_odds_ratio": -0.42588385939598083, "logits/chosen": -0.17713405191898346, "logits/rejected": -1.2708368301391602, "logps/chosen": -1.5448169708251953, "logps/rejected": -2.0935890674591064, "loss": 1.6139, "nll_loss": 1.5712815523147583, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544817090034485, "rewards/margins": 0.05487719178199768, "rewards/rejected": -0.20935890078544617, "step": 493 }, { "epoch": 0.7785657998423956, "grad_norm": 0.18722397089004517, "learning_rate": 3.070539431668008e-06, "log_odds_chosen": 0.6233894228935242, "log_odds_ratio": -0.43421441316604614, "logits/chosen": -0.1989862620830536, "logits/rejected": -1.2033601999282837, "logps/chosen": -1.5463478565216064, "logps/rejected": -2.0686848163604736, "loss": 1.6068, "nll_loss": 1.5633586645126343, "rewards/accuracies": 1.0, "rewards/chosen": -0.15463480353355408, "rewards/margins": 0.05223367363214493, "rewards/rejected": -0.2068684697151184, "step": 494 }, { "epoch": 0.7801418439716312, "grad_norm": 0.19007954001426697, "learning_rate": 3.0658920967720018e-06, "log_odds_chosen": 0.7926431894302368, "log_odds_ratio": -0.38211071491241455, "logits/chosen": -0.3403100073337555, "logits/rejected": -1.2223634719848633, "logps/chosen": -1.4851934909820557, "logps/rejected": -2.1482720375061035, "loss": 1.5559, "nll_loss": 1.5176681280136108, "rewards/accuracies": 1.0, "rewards/chosen": -0.148519366979599, "rewards/margins": 0.06630785018205643, "rewards/rejected": -0.21482720971107483, "step": 495 }, { "epoch": 0.7817178881008668, "grad_norm": 0.1839301884174347, "learning_rate": 3.0612367095703116e-06, "log_odds_chosen": 0.7492038607597351, "log_odds_ratio": -0.39107295870780945, "logits/chosen": -0.18261493742465973, "logits/rejected": -1.4831030368804932, "logps/chosen": -1.5249770879745483, "logps/rejected": -2.153897762298584, "loss": 1.5805, "nll_loss": 1.5413737297058105, "rewards/accuracies": 1.0, "rewards/chosen": -0.15249772369861603, "rewards/margins": 0.06289205700159073, "rewards/rejected": -0.21538978815078735, "step": 496 }, { "epoch": 0.7832939322301025, "grad_norm": 0.19066324830055237, "learning_rate": 3.056573305232167e-06, "log_odds_chosen": 0.7923998832702637, "log_odds_ratio": -0.380237877368927, "logits/chosen": -0.20253872871398926, "logits/rejected": -1.41769278049469, "logps/chosen": -1.4783601760864258, "logps/rejected": -2.1322264671325684, "loss": 1.5446, "nll_loss": 1.5065717697143555, "rewards/accuracies": 1.0, "rewards/chosen": -0.14783601462841034, "rewards/margins": 0.06538661569356918, "rewards/rejected": -0.21322263777256012, "step": 497 }, { "epoch": 0.7848699763593381, "grad_norm": 0.19244952499866486, "learning_rate": 3.051901918987359e-06, "log_odds_chosen": 0.7435587048530579, "log_odds_ratio": -0.39330264925956726, "logits/chosen": -0.33268722891807556, "logits/rejected": -1.3394025564193726, "logps/chosen": -1.4223885536193848, "logps/rejected": -2.0310678482055664, "loss": 1.4913, "nll_loss": 1.4519734382629395, "rewards/accuracies": 1.0, "rewards/chosen": -0.14223885536193848, "rewards/margins": 0.06086793541908264, "rewards/rejected": -0.20310677587985992, "step": 498 }, { "epoch": 0.7864460204885737, "grad_norm": 0.22321587800979614, "learning_rate": 3.047222586125979e-06, "log_odds_chosen": 0.7735339999198914, "log_odds_ratio": -0.3840080797672272, "logits/chosen": -0.1548488885164261, "logits/rejected": -0.9053974151611328, "logps/chosen": -1.431384801864624, "logps/rejected": -2.068115234375, "loss": 1.51, "nll_loss": 1.4715591669082642, "rewards/accuracies": 1.0, "rewards/chosen": -0.14313849806785583, "rewards/margins": 0.06367303431034088, "rewards/rejected": -0.20681151747703552, "step": 499 }, { "epoch": 0.7880220646178093, "grad_norm": 0.18041643500328064, "learning_rate": 3.042535341998152e-06, "log_odds_chosen": 0.5752748847007751, "log_odds_ratio": -0.4517236649990082, "logits/chosen": -0.07348179817199707, "logits/rejected": -1.2616822719573975, "logps/chosen": -1.578834056854248, "logps/rejected": -2.061535120010376, "loss": 1.6247, "nll_loss": 1.579504370689392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1578833907842636, "rewards/margins": 0.04827011376619339, "rewards/rejected": -0.2061535120010376, "step": 500 }, { "epoch": 0.789598108747045, "grad_norm": 0.19028055667877197, "learning_rate": 3.037840222013769e-06, "log_odds_chosen": 0.6691651344299316, "log_odds_ratio": -0.42311063408851624, "logits/chosen": -0.16774022579193115, "logits/rejected": -0.9798950552940369, "logps/chosen": -1.51216459274292, "logps/rejected": -2.0689010620117188, "loss": 1.5686, "nll_loss": 1.526324987411499, "rewards/accuracies": 1.0, "rewards/chosen": -0.15121646225452423, "rewards/margins": 0.05567363277077675, "rewards/rejected": -0.20689009130001068, "step": 501 }, { "epoch": 0.7911741528762806, "grad_norm": 0.18513523042201996, "learning_rate": 3.033137261642219e-06, "log_odds_chosen": 0.8165162801742554, "log_odds_ratio": -0.3758889436721802, "logits/chosen": -0.19638784229755402, "logits/rejected": -1.2416471242904663, "logps/chosen": -1.4641826152801514, "logps/rejected": -2.142982006072998, "loss": 1.5202, "nll_loss": 1.4826549291610718, "rewards/accuracies": 1.0, "rewards/chosen": -0.1464182585477829, "rewards/margins": 0.06787993758916855, "rewards/rejected": -0.21429818868637085, "step": 502 }, { "epoch": 0.7927501970055162, "grad_norm": 0.18089807033538818, "learning_rate": 3.02842649641212e-06, "log_odds_chosen": 0.7593850493431091, "log_odds_ratio": -0.3924046754837036, "logits/chosen": -0.10785488039255142, "logits/rejected": -1.0916216373443604, "logps/chosen": -1.5069478750228882, "logps/rejected": -2.1434710025787354, "loss": 1.5697, "nll_loss": 1.5304501056671143, "rewards/accuracies": 1.0, "rewards/chosen": -0.15069478750228882, "rewards/margins": 0.06365231424570084, "rewards/rejected": -0.21434709429740906, "step": 503 }, { "epoch": 0.7943262411347518, "grad_norm": 0.18175731599330902, "learning_rate": 3.0237079619110554e-06, "log_odds_chosen": 0.8134419918060303, "log_odds_ratio": -0.37328994274139404, "logits/chosen": -0.22376924753189087, "logits/rejected": -1.4354217052459717, "logps/chosen": -1.4759913682937622, "logps/rejected": -2.146923303604126, "loss": 1.5336, "nll_loss": 1.496294617652893, "rewards/accuracies": 1.0, "rewards/chosen": -0.14759913086891174, "rewards/margins": 0.06709320843219757, "rewards/rejected": -0.21469233930110931, "step": 504 }, { "epoch": 0.7959022852639874, "grad_norm": 0.2029658555984497, "learning_rate": 3.0189816937852976e-06, "log_odds_chosen": 0.7895228266716003, "log_odds_ratio": -0.38117578625679016, "logits/chosen": -0.28800487518310547, "logits/rejected": -1.3646546602249146, "logps/chosen": -1.4555811882019043, "logps/rejected": -2.1068646907806396, "loss": 1.5037, "nll_loss": 1.465606689453125, "rewards/accuracies": 1.0, "rewards/chosen": -0.14555811882019043, "rewards/margins": 0.06512835621833801, "rewards/rejected": -0.21068646013736725, "step": 505 }, { "epoch": 0.797478329393223, "grad_norm": 0.18529628217220306, "learning_rate": 3.014247727739546e-06, "log_odds_chosen": 0.9279834628105164, "log_odds_ratio": -0.3403416872024536, "logits/chosen": -0.2272339165210724, "logits/rejected": -1.315860390663147, "logps/chosen": -1.4553430080413818, "logps/rejected": -2.2307143211364746, "loss": 1.5119, "nll_loss": 1.4778820276260376, "rewards/accuracies": 1.0, "rewards/chosen": -0.14553430676460266, "rewards/margins": 0.07753713428974152, "rewards/rejected": -0.22307144105434418, "step": 506 }, { "epoch": 0.7990543735224587, "grad_norm": 0.19259987771511078, "learning_rate": 3.009506099536653e-06, "log_odds_chosen": 0.67513507604599, "log_odds_ratio": -0.4143328070640564, "logits/chosen": -0.17503556609153748, "logits/rejected": -1.3449033498764038, "logps/chosen": -1.5120768547058105, "logps/rejected": -2.0729129314422607, "loss": 1.5515, "nll_loss": 1.510113000869751, "rewards/accuracies": 1.0, "rewards/chosen": -0.15120768547058105, "rewards/margins": 0.056083619594573975, "rewards/rejected": -0.20729129016399384, "step": 507 }, { "epoch": 0.8006304176516943, "grad_norm": 0.18156689405441284, "learning_rate": 3.0047568449973544e-06, "log_odds_chosen": 0.9034937620162964, "log_odds_ratio": -0.3491254448890686, "logits/chosen": -0.2639719247817993, "logits/rejected": -1.3573088645935059, "logps/chosen": -1.362982153892517, "logps/rejected": -2.101900339126587, "loss": 1.4246, "nll_loss": 1.3897302150726318, "rewards/accuracies": 1.0, "rewards/chosen": -0.13629822432994843, "rewards/margins": 0.07389181852340698, "rewards/rejected": -0.2101900279521942, "step": 508 }, { "epoch": 0.8022064617809299, "grad_norm": 0.19325587153434753, "learning_rate": 3e-06, "log_odds_chosen": 0.5838625431060791, "log_odds_ratio": -0.45025166869163513, "logits/chosen": -0.22336238622665405, "logits/rejected": -1.168703317642212, "logps/chosen": -1.4419538974761963, "logps/rejected": -1.9156994819641113, "loss": 1.5075, "nll_loss": 1.4625194072723389, "rewards/accuracies": 1.0, "rewards/chosen": -0.14419539272785187, "rewards/margins": 0.047374557703733444, "rewards/rejected": -0.1915699541568756, "step": 509 }, { "epoch": 0.8037825059101655, "grad_norm": 0.1798471063375473, "learning_rate": 2.9952356004802813e-06, "log_odds_chosen": 0.5265605449676514, "log_odds_ratio": -0.4680787920951843, "logits/chosen": -0.17943690717220306, "logits/rejected": -1.2646390199661255, "logps/chosen": -1.5416685342788696, "logps/rejected": -1.97737455368042, "loss": 1.5931, "nll_loss": 1.546276569366455, "rewards/accuracies": 1.0, "rewards/chosen": -0.15416686236858368, "rewards/margins": 0.04357059299945831, "rewards/rejected": -0.197737455368042, "step": 510 }, { "epoch": 0.8053585500394012, "grad_norm": 0.17970964312553406, "learning_rate": 2.9904636824309625e-06, "log_odds_chosen": 0.5488556623458862, "log_odds_ratio": -0.4573056697845459, "logits/chosen": -0.20830032229423523, "logits/rejected": -0.967231273651123, "logps/chosen": -1.4516727924346924, "logps/rejected": -1.8982791900634766, "loss": 1.517, "nll_loss": 1.4712245464324951, "rewards/accuracies": 1.0, "rewards/chosen": -0.145167276263237, "rewards/margins": 0.04466064274311066, "rewards/rejected": -0.18982790410518646, "step": 511 }, { "epoch": 0.8069345941686368, "grad_norm": 0.20201221108436584, "learning_rate": 2.985684281901603e-06, "log_odds_chosen": 0.5933498740196228, "log_odds_ratio": -0.44515395164489746, "logits/chosen": -0.1818694919347763, "logits/rejected": -1.109740972518921, "logps/chosen": -1.5751166343688965, "logps/rejected": -2.0681002140045166, "loss": 1.6217, "nll_loss": 1.5771454572677612, "rewards/accuracies": 1.0, "rewards/chosen": -0.15751168131828308, "rewards/margins": 0.04929835721850395, "rewards/rejected": -0.20681002736091614, "step": 512 }, { "epoch": 0.8085106382978723, "grad_norm": 0.19857411086559296, "learning_rate": 2.980897434998293e-06, "log_odds_chosen": 0.8362709283828735, "log_odds_ratio": -0.36665236949920654, "logits/chosen": -0.26140159368515015, "logits/rejected": -1.1785284280776978, "logps/chosen": -1.4174448251724243, "logps/rejected": -2.10254168510437, "loss": 1.4779, "nll_loss": 1.4412541389465332, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417444795370102, "rewards/margins": 0.06850968301296234, "rewards/rejected": -0.21025416254997253, "step": 513 }, { "epoch": 0.8100866824271079, "grad_norm": 0.2068711668252945, "learning_rate": 2.976103177883374e-06, "log_odds_chosen": 0.6137781143188477, "log_odds_ratio": -0.44573453068733215, "logits/chosen": -0.20057058334350586, "logits/rejected": -1.0517152547836304, "logps/chosen": -1.5677953958511353, "logps/rejected": -2.0858044624328613, "loss": 1.622, "nll_loss": 1.5774500370025635, "rewards/accuracies": 1.0, "rewards/chosen": -0.15677955746650696, "rewards/margins": 0.05180090665817261, "rewards/rejected": -0.20858046412467957, "step": 514 }, { "epoch": 0.8116627265563435, "grad_norm": 0.1855117529630661, "learning_rate": 2.971301546775167e-06, "log_odds_chosen": 0.7630009651184082, "log_odds_ratio": -0.3854103684425354, "logits/chosen": -0.2538478374481201, "logits/rejected": -1.3625380992889404, "logps/chosen": -1.476394772529602, "logps/rejected": -2.106713056564331, "loss": 1.538, "nll_loss": 1.4994207620620728, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476394683122635, "rewards/margins": 0.06303183734416962, "rewards/rejected": -0.2106713056564331, "step": 515 }, { "epoch": 0.8132387706855791, "grad_norm": 0.19397298991680145, "learning_rate": 2.966492577947704e-06, "log_odds_chosen": 0.6858218908309937, "log_odds_ratio": -0.411394327878952, "logits/chosen": -0.237920880317688, "logits/rejected": -1.3812801837921143, "logps/chosen": -1.584381341934204, "logps/rejected": -2.1624438762664795, "loss": 1.6503, "nll_loss": 1.6091707944869995, "rewards/accuracies": 1.0, "rewards/chosen": -0.15843814611434937, "rewards/margins": 0.05780624598264694, "rewards/rejected": -0.2162443995475769, "step": 516 }, { "epoch": 0.8148148148148148, "grad_norm": 0.184777170419693, "learning_rate": 2.9616763077304457e-06, "log_odds_chosen": 0.8034108877182007, "log_odds_ratio": -0.3745085597038269, "logits/chosen": -0.1650959551334381, "logits/rejected": -1.287904143333435, "logps/chosen": -1.512897253036499, "logps/rejected": -2.18365216255188, "loss": 1.5811, "nll_loss": 1.5436216592788696, "rewards/accuracies": 1.0, "rewards/chosen": -0.15128973126411438, "rewards/margins": 0.06707549095153809, "rewards/rejected": -0.21836523711681366, "step": 517 }, { "epoch": 0.8163908589440504, "grad_norm": 0.18303681910037994, "learning_rate": 2.956852772508014e-06, "log_odds_chosen": 0.6197808384895325, "log_odds_ratio": -0.44592535495758057, "logits/chosen": -0.17690420150756836, "logits/rejected": -0.9530738592147827, "logps/chosen": -1.4160338640213013, "logps/rejected": -1.9253976345062256, "loss": 1.477, "nll_loss": 1.4324525594711304, "rewards/accuracies": 1.0, "rewards/chosen": -0.14160338044166565, "rewards/margins": 0.050936371088027954, "rewards/rejected": -0.1925397515296936, "step": 518 }, { "epoch": 0.817966903073286, "grad_norm": 0.20301342010498047, "learning_rate": 2.952022008719914e-06, "log_odds_chosen": 0.4952373206615448, "log_odds_ratio": -0.4794991910457611, "logits/chosen": -0.17073026299476624, "logits/rejected": -0.9922412633895874, "logps/chosen": -1.5578581094741821, "logps/rejected": -1.967944622039795, "loss": 1.6136, "nll_loss": 1.5656940937042236, "rewards/accuracies": 1.0, "rewards/chosen": -0.15578582882881165, "rewards/margins": 0.041008636355400085, "rewards/rejected": -0.19679445028305054, "step": 519 }, { "epoch": 0.8195429472025216, "grad_norm": 0.18457186222076416, "learning_rate": 2.9471840528602573e-06, "log_odds_chosen": 0.6501960158348083, "log_odds_ratio": -0.4303664565086365, "logits/chosen": -0.3231818675994873, "logits/rejected": -1.0612550973892212, "logps/chosen": -1.5010353326797485, "logps/rejected": -2.0381951332092285, "loss": 1.5551, "nll_loss": 1.51203191280365, "rewards/accuracies": 1.0, "rewards/chosen": -0.15010352432727814, "rewards/margins": 0.053715985268354416, "rewards/rejected": -0.20381951332092285, "step": 520 }, { "epoch": 0.8211189913317573, "grad_norm": 0.2020522803068161, "learning_rate": 2.9423389414774914e-06, "log_odds_chosen": 0.7233219742774963, "log_odds_ratio": -0.4016837179660797, "logits/chosen": -0.2887951135635376, "logits/rejected": -1.3780571222305298, "logps/chosen": -1.4648807048797607, "logps/rejected": -2.063239812850952, "loss": 1.5239, "nll_loss": 1.4837396144866943, "rewards/accuracies": 1.0, "rewards/chosen": -0.14648807048797607, "rewards/margins": 0.05983591079711914, "rewards/rejected": -0.20632398128509521, "step": 521 }, { "epoch": 0.8226950354609929, "grad_norm": 0.1893676072359085, "learning_rate": 2.9374867111741174e-06, "log_odds_chosen": 0.7161247134208679, "log_odds_ratio": -0.40280526876449585, "logits/chosen": -0.17054280638694763, "logits/rejected": -1.3533788919448853, "logps/chosen": -1.4935866594314575, "logps/rejected": -2.0865988731384277, "loss": 1.5451, "nll_loss": 1.5048449039459229, "rewards/accuracies": 1.0, "rewards/chosen": -0.14935867488384247, "rewards/margins": 0.059301216155290604, "rewards/rejected": -0.20865988731384277, "step": 522 }, { "epoch": 0.8242710795902285, "grad_norm": 0.20184732973575592, "learning_rate": 2.9326273986064177e-06, "log_odds_chosen": 0.7819587588310242, "log_odds_ratio": -0.3818580210208893, "logits/chosen": -0.22239316999912262, "logits/rejected": -1.2218939065933228, "logps/chosen": -1.511752724647522, "logps/rejected": -2.164085865020752, "loss": 1.5754, "nll_loss": 1.5372285842895508, "rewards/accuracies": 1.0, "rewards/chosen": -0.15117527544498444, "rewards/margins": 0.06523331999778748, "rewards/rejected": -0.21640858054161072, "step": 523 }, { "epoch": 0.8258471237194641, "grad_norm": 0.20823800563812256, "learning_rate": 2.9277610404841787e-06, "log_odds_chosen": 0.6158959865570068, "log_odds_ratio": -0.43306228518486023, "logits/chosen": -0.26541417837142944, "logits/rejected": -1.056063175201416, "logps/chosen": -1.48806893825531, "logps/rejected": -1.9945056438446045, "loss": 1.5477, "nll_loss": 1.5044422149658203, "rewards/accuracies": 1.0, "rewards/chosen": -0.14880691468715668, "rewards/margins": 0.0506436824798584, "rewards/rejected": -0.19945058226585388, "step": 524 }, { "epoch": 0.8274231678486997, "grad_norm": 0.1981971710920334, "learning_rate": 2.9228876735704107e-06, "log_odds_chosen": 0.5651233196258545, "log_odds_ratio": -0.4532637894153595, "logits/chosen": -0.23406416177749634, "logits/rejected": -1.1756788492202759, "logps/chosen": -1.3835755586624146, "logps/rejected": -1.8319984674453735, "loss": 1.4524, "nll_loss": 1.40702486038208, "rewards/accuracies": 1.0, "rewards/chosen": -0.13835756480693817, "rewards/margins": 0.044842299073934555, "rewards/rejected": -0.18319985270500183, "step": 525 }, { "epoch": 0.8289992119779354, "grad_norm": 0.20701991021633148, "learning_rate": 2.9180073346810738e-06, "log_odds_chosen": 0.5971174240112305, "log_odds_ratio": -0.44226890802383423, "logits/chosen": -0.2447807490825653, "logits/rejected": -1.2080211639404297, "logps/chosen": -1.6501985788345337, "logps/rejected": -2.156846284866333, "loss": 1.6873, "nll_loss": 1.6431210041046143, "rewards/accuracies": 1.0, "rewards/chosen": -0.16501986980438232, "rewards/margins": 0.05066476762294769, "rewards/rejected": -0.21568462252616882, "step": 526 }, { "epoch": 0.830575256107171, "grad_norm": 0.1888459324836731, "learning_rate": 2.9131200606847957e-06, "log_odds_chosen": 0.6569997668266296, "log_odds_ratio": -0.4262959361076355, "logits/chosen": -0.1883625090122223, "logits/rejected": -1.2298457622528076, "logps/chosen": -1.518366813659668, "logps/rejected": -2.064444065093994, "loss": 1.5756, "nll_loss": 1.5329397916793823, "rewards/accuracies": 1.0, "rewards/chosen": -0.15183669328689575, "rewards/margins": 0.054607708007097244, "rewards/rejected": -0.2064443975687027, "step": 527 }, { "epoch": 0.8321513002364066, "grad_norm": 0.19664834439754486, "learning_rate": 2.9082258885025995e-06, "log_odds_chosen": 0.7270923256874084, "log_odds_ratio": -0.4016813337802887, "logits/chosen": -0.34029343724250793, "logits/rejected": -1.2684504985809326, "logps/chosen": -1.5683590173721313, "logps/rejected": -2.1794798374176025, "loss": 1.6142, "nll_loss": 1.5739948749542236, "rewards/accuracies": 1.0, "rewards/chosen": -0.1568359136581421, "rewards/margins": 0.06111207604408264, "rewards/rejected": -0.21794798970222473, "step": 528 }, { "epoch": 0.8337273443656422, "grad_norm": 0.2009236365556717, "learning_rate": 2.9033248551076167e-06, "log_odds_chosen": 0.5915142893791199, "log_odds_ratio": -0.4430904686450958, "logits/chosen": -0.19656020402908325, "logits/rejected": -1.0940089225769043, "logps/chosen": -1.6268205642700195, "logps/rejected": -2.125175714492798, "loss": 1.6809, "nll_loss": 1.6366134881973267, "rewards/accuracies": 1.0, "rewards/chosen": -0.16268205642700195, "rewards/margins": 0.04983552545309067, "rewards/rejected": -0.21251758933067322, "step": 529 }, { "epoch": 0.8353033884948778, "grad_norm": 0.19963550567626953, "learning_rate": 2.8984169975248138e-06, "log_odds_chosen": 0.7648955583572388, "log_odds_ratio": -0.38624370098114014, "logits/chosen": -0.23580212891101837, "logits/rejected": -1.1444816589355469, "logps/chosen": -1.4914474487304688, "logps/rejected": -2.1245110034942627, "loss": 1.558, "nll_loss": 1.5194083452224731, "rewards/accuracies": 1.0, "rewards/chosen": -0.1491447538137436, "rewards/margins": 0.06330635398626328, "rewards/rejected": -0.21245113015174866, "step": 530 }, { "epoch": 0.8368794326241135, "grad_norm": 0.18562045693397522, "learning_rate": 2.893502352830712e-06, "log_odds_chosen": 0.9332537055015564, "log_odds_ratio": -0.33736011385917664, "logits/chosen": -0.26882943511009216, "logits/rejected": -1.3374062776565552, "logps/chosen": -1.3956873416900635, "logps/rejected": -2.1667256355285645, "loss": 1.4433, "nll_loss": 1.409551739692688, "rewards/accuracies": 1.0, "rewards/chosen": -0.1395687609910965, "rewards/margins": 0.07710380852222443, "rewards/rejected": -0.21667256951332092, "step": 531 }, { "epoch": 0.8384554767533491, "grad_norm": 0.18261106312274933, "learning_rate": 2.888580958153103e-06, "log_odds_chosen": 0.7341670989990234, "log_odds_ratio": -0.39641058444976807, "logits/chosen": -0.27277103066444397, "logits/rejected": -1.2492622137069702, "logps/chosen": -1.444199562072754, "logps/rejected": -2.0478129386901855, "loss": 1.5137, "nll_loss": 1.4740546941757202, "rewards/accuracies": 1.0, "rewards/chosen": -0.14441995322704315, "rewards/margins": 0.06036132946610451, "rewards/rejected": -0.20478127896785736, "step": 532 }, { "epoch": 0.8400315208825847, "grad_norm": 0.21797138452529907, "learning_rate": 2.8836528506707733e-06, "log_odds_chosen": 0.8146068453788757, "log_odds_ratio": -0.37162038683891296, "logits/chosen": -0.2771569490432739, "logits/rejected": -1.206723928451538, "logps/chosen": -1.5148285627365112, "logps/rejected": -2.1994688510894775, "loss": 1.5648, "nll_loss": 1.5275968313217163, "rewards/accuracies": 1.0, "rewards/chosen": -0.15148288011550903, "rewards/margins": 0.06846403330564499, "rewards/rejected": -0.21994687616825104, "step": 533 }, { "epoch": 0.8416075650118203, "grad_norm": 0.18425217270851135, "learning_rate": 2.878718067613222e-06, "log_odds_chosen": 0.6143471598625183, "log_odds_ratio": -0.43677818775177, "logits/chosen": -0.22780869901180267, "logits/rejected": -1.186183214187622, "logps/chosen": -1.566484808921814, "logps/rejected": -2.0800623893737793, "loss": 1.6194, "nll_loss": 1.5757222175598145, "rewards/accuracies": 1.0, "rewards/chosen": -0.15664850175380707, "rewards/margins": 0.05135776102542877, "rewards/rejected": -0.20800624787807465, "step": 534 }, { "epoch": 0.843183609141056, "grad_norm": 0.20346811413764954, "learning_rate": 2.8737766462603763e-06, "log_odds_chosen": 0.757498562335968, "log_odds_ratio": -0.39962002635002136, "logits/chosen": -0.2183956801891327, "logits/rejected": -1.0292649269104004, "logps/chosen": -1.5032674074172974, "logps/rejected": -2.140925407409668, "loss": 1.5535, "nll_loss": 1.5135215520858765, "rewards/accuracies": 1.0, "rewards/chosen": -0.15032674372196198, "rewards/margins": 0.06376579403877258, "rewards/rejected": -0.21409253776073456, "step": 535 }, { "epoch": 0.8447596532702916, "grad_norm": 0.18760064244270325, "learning_rate": 2.8688286239423167e-06, "log_odds_chosen": 0.6436713337898254, "log_odds_ratio": -0.4325043261051178, "logits/chosen": -0.21478521823883057, "logits/rejected": -1.2076549530029297, "logps/chosen": -1.544159173965454, "logps/rejected": -2.0843026638031006, "loss": 1.5881, "nll_loss": 1.544856071472168, "rewards/accuracies": 1.0, "rewards/chosen": -0.15441590547561646, "rewards/margins": 0.05401436612010002, "rewards/rejected": -0.20843026041984558, "step": 536 }, { "epoch": 0.8463356973995272, "grad_norm": 0.18854977190494537, "learning_rate": 2.8638740380389862e-06, "log_odds_chosen": 0.7302975058555603, "log_odds_ratio": -0.39930465817451477, "logits/chosen": -0.2619031071662903, "logits/rejected": -1.3238346576690674, "logps/chosen": -1.4519808292388916, "logps/rejected": -2.0523629188537598, "loss": 1.5152, "nll_loss": 1.4752285480499268, "rewards/accuracies": 1.0, "rewards/chosen": -0.14519809186458588, "rewards/margins": 0.06003819406032562, "rewards/rejected": -0.2052362859249115, "step": 537 }, { "epoch": 0.8479117415287628, "grad_norm": 0.20675887167453766, "learning_rate": 2.8589129259799164e-06, "log_odds_chosen": 0.8192201852798462, "log_odds_ratio": -0.3788171708583832, "logits/chosen": -0.2638776898384094, "logits/rejected": -1.114193320274353, "logps/chosen": -1.5262513160705566, "logps/rejected": -2.216820240020752, "loss": 1.5718, "nll_loss": 1.5339343547821045, "rewards/accuracies": 1.0, "rewards/chosen": -0.15262514352798462, "rewards/margins": 0.0690569132566452, "rewards/rejected": -0.22168205678462982, "step": 538 }, { "epoch": 0.8494877856579984, "grad_norm": 0.1934932917356491, "learning_rate": 2.853945325243938e-06, "log_odds_chosen": 0.6721277832984924, "log_odds_ratio": -0.4139913022518158, "logits/chosen": -0.26754871010780334, "logits/rejected": -1.1163392066955566, "logps/chosen": -1.5401967763900757, "logps/rejected": -2.0996975898742676, "loss": 1.587, "nll_loss": 1.5455737113952637, "rewards/accuracies": 1.0, "rewards/chosen": -0.15401966869831085, "rewards/margins": 0.055950067937374115, "rewards/rejected": -0.20996975898742676, "step": 539 }, { "epoch": 0.851063829787234, "grad_norm": 1.8241883516311646, "learning_rate": 2.848971273358903e-06, "log_odds_chosen": 0.8321257829666138, "log_odds_ratio": -0.3642383813858032, "logits/chosen": -0.3249572813510895, "logits/rejected": -1.1298644542694092, "logps/chosen": -1.4748423099517822, "logps/rejected": -2.1679956912994385, "loss": 1.5084, "nll_loss": 1.472002387046814, "rewards/accuracies": 1.0, "rewards/chosen": -0.14748422801494598, "rewards/margins": 0.0693153589963913, "rewards/rejected": -0.2167995721101761, "step": 540 }, { "epoch": 0.8526398739164697, "grad_norm": 0.20980137586593628, "learning_rate": 2.843990807901397e-06, "log_odds_chosen": 0.6258372068405151, "log_odds_ratio": -0.4339146912097931, "logits/chosen": -0.2298620641231537, "logits/rejected": -0.9028375744819641, "logps/chosen": -1.5608817338943481, "logps/rejected": -2.083118438720703, "loss": 1.6158, "nll_loss": 1.5724146366119385, "rewards/accuracies": 1.0, "rewards/chosen": -0.15608817338943481, "rewards/margins": 0.052223674952983856, "rewards/rejected": -0.20831184089183807, "step": 541 }, { "epoch": 0.8542159180457053, "grad_norm": 0.20256595313549042, "learning_rate": 2.839003966496458e-06, "log_odds_chosen": 0.8089483976364136, "log_odds_ratio": -0.38441747426986694, "logits/chosen": -0.14752182364463806, "logits/rejected": -1.2424575090408325, "logps/chosen": -1.526769757270813, "logps/rejected": -2.205688953399658, "loss": 1.5839, "nll_loss": 1.5454857349395752, "rewards/accuracies": 1.0, "rewards/chosen": -0.15267698466777802, "rewards/margins": 0.067891925573349, "rewards/rejected": -0.22056889533996582, "step": 542 }, { "epoch": 0.8557919621749409, "grad_norm": 0.18522503972053528, "learning_rate": 2.8340107868172905e-06, "log_odds_chosen": 0.7727735638618469, "log_odds_ratio": -0.3861379623413086, "logits/chosen": -0.26055678725242615, "logits/rejected": -1.236580491065979, "logps/chosen": -1.369378685951233, "logps/rejected": -1.999671459197998, "loss": 1.428, "nll_loss": 1.3893834352493286, "rewards/accuracies": 1.0, "rewards/chosen": -0.13693787157535553, "rewards/margins": 0.06302928924560547, "rewards/rejected": -0.1999671459197998, "step": 543 }, { "epoch": 0.8573680063041765, "grad_norm": 0.18243786692619324, "learning_rate": 2.8290113065849826e-06, "log_odds_chosen": 0.8464156985282898, "log_odds_ratio": -0.36339235305786133, "logits/chosen": -0.22719234228134155, "logits/rejected": -1.3813166618347168, "logps/chosen": -1.4748934507369995, "logps/rejected": -2.179795742034912, "loss": 1.5313, "nll_loss": 1.4949686527252197, "rewards/accuracies": 1.0, "rewards/chosen": -0.14748935401439667, "rewards/margins": 0.07049023360013962, "rewards/rejected": -0.2179795652627945, "step": 544 }, { "epoch": 0.8589440504334122, "grad_norm": 0.20985384285449982, "learning_rate": 2.8240055635682193e-06, "log_odds_chosen": 0.971196174621582, "log_odds_ratio": -0.33422982692718506, "logits/chosen": -0.2657662630081177, "logits/rejected": -1.2640752792358398, "logps/chosen": -1.4483425617218018, "logps/rejected": -2.2618367671966553, "loss": 1.4861, "nll_loss": 1.4526761770248413, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448342651128769, "rewards/margins": 0.08134942501783371, "rewards/rejected": -0.2261836677789688, "step": 545 }, { "epoch": 0.8605200945626478, "grad_norm": 0.19999882578849792, "learning_rate": 2.8189935955829973e-06, "log_odds_chosen": 0.8773307204246521, "log_odds_ratio": -0.35459497570991516, "logits/chosen": -0.25905877351760864, "logits/rejected": -1.07989501953125, "logps/chosen": -1.5043911933898926, "logps/rejected": -2.2429165840148926, "loss": 1.5494, "nll_loss": 1.5139833688735962, "rewards/accuracies": 1.0, "rewards/chosen": -0.15043911337852478, "rewards/margins": 0.0738525539636612, "rewards/rejected": -0.22429165244102478, "step": 546 }, { "epoch": 0.8620961386918834, "grad_norm": 0.19614462554454803, "learning_rate": 2.813975440492342e-06, "log_odds_chosen": 0.7755805253982544, "log_odds_ratio": -0.387691855430603, "logits/chosen": -0.28992438316345215, "logits/rejected": -1.2587565183639526, "logps/chosen": -1.501508116722107, "logps/rejected": -2.146627902984619, "loss": 1.5625, "nll_loss": 1.5237247943878174, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501508206129074, "rewards/margins": 0.0645119845867157, "rewards/rejected": -0.2146628051996231, "step": 547 }, { "epoch": 0.863672182821119, "grad_norm": 0.22099299728870392, "learning_rate": 2.8089511362060182e-06, "log_odds_chosen": 0.6048762798309326, "log_odds_ratio": -0.43883612751960754, "logits/chosen": -0.3003285229206085, "logits/rejected": -1.1491910219192505, "logps/chosen": -1.6390552520751953, "logps/rejected": -2.1528773307800293, "loss": 1.6681, "nll_loss": 1.624230980873108, "rewards/accuracies": 1.0, "rewards/chosen": -0.163905531167984, "rewards/margins": 0.05138222128152847, "rewards/rejected": -0.21528775990009308, "step": 548 }, { "epoch": 0.8652482269503546, "grad_norm": 0.21315144002437592, "learning_rate": 2.803920720680244e-06, "log_odds_chosen": 0.7596380710601807, "log_odds_ratio": -0.3877980411052704, "logits/chosen": -0.2508217394351959, "logits/rejected": -1.285441279411316, "logps/chosen": -1.497770071029663, "logps/rejected": -2.128038167953491, "loss": 1.5514, "nll_loss": 1.5126042366027832, "rewards/accuracies": 1.0, "rewards/chosen": -0.14977701008319855, "rewards/margins": 0.0630268082022667, "rewards/rejected": -0.21280381083488464, "step": 549 }, { "epoch": 0.8668242710795903, "grad_norm": 0.20469704270362854, "learning_rate": 2.7988842319174075e-06, "log_odds_chosen": 0.8056274056434631, "log_odds_ratio": -0.37798529863357544, "logits/chosen": -0.3726116418838501, "logits/rejected": -1.3260782957077026, "logps/chosen": -1.5661325454711914, "logps/rejected": -2.247863292694092, "loss": 1.603, "nll_loss": 1.5651633739471436, "rewards/accuracies": 1.0, "rewards/chosen": -0.15661326050758362, "rewards/margins": 0.06817308068275452, "rewards/rejected": -0.22478632628917694, "step": 550 }, { "epoch": 0.8684003152088259, "grad_norm": 0.1801159828901291, "learning_rate": 2.7938417079657743e-06, "log_odds_chosen": 0.8063299655914307, "log_odds_ratio": -0.3746653199195862, "logits/chosen": -0.3093138635158539, "logits/rejected": -1.38007652759552, "logps/chosen": -1.4094195365905762, "logps/rejected": -2.07232403755188, "loss": 1.4644, "nll_loss": 1.426937222480774, "rewards/accuracies": 1.0, "rewards/chosen": -0.14094194769859314, "rewards/margins": 0.06629044562578201, "rewards/rejected": -0.20723240077495575, "step": 551 }, { "epoch": 0.8699763593380615, "grad_norm": 0.18272462487220764, "learning_rate": 2.7887931869192047e-06, "log_odds_chosen": 0.7081438302993774, "log_odds_ratio": -0.40384554862976074, "logits/chosen": -0.3084084391593933, "logits/rejected": -1.2090731859207153, "logps/chosen": -1.4903384447097778, "logps/rejected": -2.0772554874420166, "loss": 1.5403, "nll_loss": 1.4998891353607178, "rewards/accuracies": 1.0, "rewards/chosen": -0.14903384447097778, "rewards/margins": 0.05869169905781746, "rewards/rejected": -0.20772555470466614, "step": 552 }, { "epoch": 0.8715524034672971, "grad_norm": 0.19639180600643158, "learning_rate": 2.783738706916865e-06, "log_odds_chosen": 0.847165584564209, "log_odds_ratio": -0.3647855520248413, "logits/chosen": -0.28008660674095154, "logits/rejected": -1.3739715814590454, "logps/chosen": -1.526125431060791, "logps/rejected": -2.23525333404541, "loss": 1.566, "nll_loss": 1.5295474529266357, "rewards/accuracies": 1.0, "rewards/chosen": -0.152612566947937, "rewards/margins": 0.07091278582811356, "rewards/rejected": -0.22352533042430878, "step": 553 }, { "epoch": 0.8731284475965327, "grad_norm": 0.19625112414360046, "learning_rate": 2.7786783061429356e-06, "log_odds_chosen": 0.6983753442764282, "log_odds_ratio": -0.40515345335006714, "logits/chosen": -0.34866851568222046, "logits/rejected": -1.0440998077392578, "logps/chosen": -1.4631657600402832, "logps/rejected": -2.0370101928710938, "loss": 1.4995, "nll_loss": 1.459031105041504, "rewards/accuracies": 1.0, "rewards/chosen": -0.14631657302379608, "rewards/margins": 0.05738444626331329, "rewards/rejected": -0.20370101928710938, "step": 554 }, { "epoch": 0.8747044917257684, "grad_norm": 0.19040443003177643, "learning_rate": 2.7736120228263287e-06, "log_odds_chosen": 0.8665981292724609, "log_odds_ratio": -0.35975712537765503, "logits/chosen": -0.26248475909233093, "logits/rejected": -1.2065774202346802, "logps/chosen": -1.4502075910568237, "logps/rejected": -2.1716394424438477, "loss": 1.5014, "nll_loss": 1.4654600620269775, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450207531452179, "rewards/margins": 0.07214318215847015, "rewards/rejected": -0.21716395020484924, "step": 555 }, { "epoch": 0.876280535855004, "grad_norm": 0.17877773940563202, "learning_rate": 2.768539895240394e-06, "log_odds_chosen": 0.7663182616233826, "log_odds_ratio": -0.3947910964488983, "logits/chosen": -0.2686600387096405, "logits/rejected": -1.2789851427078247, "logps/chosen": -1.4189510345458984, "logps/rejected": -2.044057607650757, "loss": 1.4688, "nll_loss": 1.4293001890182495, "rewards/accuracies": 1.0, "rewards/chosen": -0.1418951004743576, "rewards/margins": 0.0625106617808342, "rewards/rejected": -0.2044057548046112, "step": 556 }, { "epoch": 0.8778565799842396, "grad_norm": 0.19811755418777466, "learning_rate": 2.763461961702633e-06, "log_odds_chosen": 0.796636164188385, "log_odds_ratio": -0.3811954855918884, "logits/chosen": -0.30146193504333496, "logits/rejected": -1.3820403814315796, "logps/chosen": -1.5293595790863037, "logps/rejected": -2.1986844539642334, "loss": 1.5534, "nll_loss": 1.515283465385437, "rewards/accuracies": 1.0, "rewards/chosen": -0.1529359519481659, "rewards/margins": 0.06693252176046371, "rewards/rejected": -0.219868466258049, "step": 557 }, { "epoch": 0.8794326241134752, "grad_norm": 0.19135232269763947, "learning_rate": 2.758378260574409e-06, "log_odds_chosen": 0.721428632736206, "log_odds_ratio": -0.39849963784217834, "logits/chosen": -0.28630053997039795, "logits/rejected": -1.2492754459381104, "logps/chosen": -1.5141559839248657, "logps/rejected": -2.1138787269592285, "loss": 1.5738, "nll_loss": 1.533995509147644, "rewards/accuracies": 1.0, "rewards/chosen": -0.15141557157039642, "rewards/margins": 0.05997228994965553, "rewards/rejected": -0.21138787269592285, "step": 558 }, { "epoch": 0.8810086682427108, "grad_norm": 0.18617720901966095, "learning_rate": 2.753288830260655e-06, "log_odds_chosen": 0.9730425477027893, "log_odds_ratio": -0.3354828953742981, "logits/chosen": -0.271095335483551, "logits/rejected": -1.4628548622131348, "logps/chosen": -1.5113807916641235, "logps/rejected": -2.336665391921997, "loss": 1.5499, "nll_loss": 1.516315221786499, "rewards/accuracies": 1.0, "rewards/chosen": -0.1511380821466446, "rewards/margins": 0.08252845704555511, "rewards/rejected": -0.2336665242910385, "step": 559 }, { "epoch": 0.8825847123719465, "grad_norm": 0.19468240439891815, "learning_rate": 2.7481937092095866e-06, "log_odds_chosen": 0.5405460596084595, "log_odds_ratio": -0.46741983294487, "logits/chosen": -0.22300955653190613, "logits/rejected": -1.1993566751480103, "logps/chosen": -1.5644901990890503, "logps/rejected": -2.020087957382202, "loss": 1.5992, "nll_loss": 1.5524640083312988, "rewards/accuracies": 1.0, "rewards/chosen": -0.15644903481006622, "rewards/margins": 0.045559756457805634, "rewards/rejected": -0.20200878381729126, "step": 560 }, { "epoch": 0.8841607565011821, "grad_norm": 0.1859883815050125, "learning_rate": 2.7430929359124086e-06, "log_odds_chosen": 0.6600494980812073, "log_odds_ratio": -0.42028987407684326, "logits/chosen": -0.2863801121711731, "logits/rejected": -0.9506353139877319, "logps/chosen": -1.410943627357483, "logps/rejected": -1.9470175504684448, "loss": 1.4807, "nll_loss": 1.4386284351348877, "rewards/accuracies": 1.0, "rewards/chosen": -0.141094371676445, "rewards/margins": 0.053607381880283356, "rewards/rejected": -0.19470174610614777, "step": 561 }, { "epoch": 0.8857368006304176, "grad_norm": 0.20268099009990692, "learning_rate": 2.737986548903029e-06, "log_odds_chosen": 1.0620503425598145, "log_odds_ratio": -0.3001374900341034, "logits/chosen": -0.4660804271697998, "logits/rejected": -1.3468518257141113, "logps/chosen": -1.3900612592697144, "logps/rejected": -2.2740349769592285, "loss": 1.448, "nll_loss": 1.4180266857147217, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390061229467392, "rewards/margins": 0.08839737623929977, "rewards/rejected": -0.22740350663661957, "step": 562 }, { "epoch": 0.8873128447596532, "grad_norm": 0.19392478466033936, "learning_rate": 2.7328745867577604e-06, "log_odds_chosen": 0.6651920080184937, "log_odds_ratio": -0.4168819785118103, "logits/chosen": -0.2711959779262543, "logits/rejected": -1.2294808626174927, "logps/chosen": -1.545215368270874, "logps/rejected": -2.1002511978149414, "loss": 1.6007, "nll_loss": 1.5590217113494873, "rewards/accuracies": 1.0, "rewards/chosen": -0.15452155470848083, "rewards/margins": 0.055503591895103455, "rewards/rejected": -0.2100251317024231, "step": 563 }, { "epoch": 0.8888888888888888, "grad_norm": 0.20134912431240082, "learning_rate": 2.727757088095037e-06, "log_odds_chosen": 0.8869103193283081, "log_odds_ratio": -0.3482133150100708, "logits/chosen": -0.27464380860328674, "logits/rejected": -1.2576364278793335, "logps/chosen": -1.535430908203125, "logps/rejected": -2.284431219100952, "loss": 1.5755, "nll_loss": 1.5406620502471924, "rewards/accuracies": 1.0, "rewards/chosen": -0.15354308485984802, "rewards/margins": 0.07490003854036331, "rewards/rejected": -0.22844311594963074, "step": 564 }, { "epoch": 0.8904649330181245, "grad_norm": 0.19171306490898132, "learning_rate": 2.7226340915751156e-06, "log_odds_chosen": 0.7525812387466431, "log_odds_ratio": -0.3937699794769287, "logits/chosen": -0.26140308380126953, "logits/rejected": -1.071201205253601, "logps/chosen": -1.5400971174240112, "logps/rejected": -2.1722888946533203, "loss": 1.5952, "nll_loss": 1.5558509826660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.15400969982147217, "rewards/margins": 0.06321915239095688, "rewards/rejected": -0.21722885966300964, "step": 565 }, { "epoch": 0.8920409771473601, "grad_norm": 0.1916339099407196, "learning_rate": 2.7175056358997887e-06, "log_odds_chosen": 0.6478245258331299, "log_odds_ratio": -0.43004921078681946, "logits/chosen": -0.18337209522724152, "logits/rejected": -1.3477016687393188, "logps/chosen": -1.5133692026138306, "logps/rejected": -2.0519278049468994, "loss": 1.5588, "nll_loss": 1.5158239603042603, "rewards/accuracies": 1.0, "rewards/chosen": -0.1513369381427765, "rewards/margins": 0.05385584756731987, "rewards/rejected": -0.20519277453422546, "step": 566 }, { "epoch": 0.8936170212765957, "grad_norm": 0.19772540032863617, "learning_rate": 2.7123717598120892e-06, "log_odds_chosen": 0.8925088047981262, "log_odds_ratio": -0.35744139552116394, "logits/chosen": -0.3828020691871643, "logits/rejected": -1.2839354276657104, "logps/chosen": -1.4594718217849731, "logps/rejected": -2.210188627243042, "loss": 1.5056, "nll_loss": 1.4698803424835205, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459471881389618, "rewards/margins": 0.07507168501615524, "rewards/rejected": -0.22101886570453644, "step": 567 }, { "epoch": 0.8951930654058313, "grad_norm": 0.18732528388500214, "learning_rate": 2.7072325020959985e-06, "log_odds_chosen": 0.8665810227394104, "log_odds_ratio": -0.35743066668510437, "logits/chosen": -0.20690904557704926, "logits/rejected": -1.0224376916885376, "logps/chosen": -1.361433744430542, "logps/rejected": -2.068009376525879, "loss": 1.4292, "nll_loss": 1.3934705257415771, "rewards/accuracies": 1.0, "rewards/chosen": -0.13614337146282196, "rewards/margins": 0.07065757364034653, "rewards/rejected": -0.2068009376525879, "step": 568 }, { "epoch": 0.8967691095350669, "grad_norm": 0.22601142525672913, "learning_rate": 2.702087901576155e-06, "log_odds_chosen": 0.8955207467079163, "log_odds_ratio": -0.345583975315094, "logits/chosen": -0.16073869168758392, "logits/rejected": -1.188117265701294, "logps/chosen": -1.5008628368377686, "logps/rejected": -2.2542965412139893, "loss": 1.5314, "nll_loss": 1.4968734979629517, "rewards/accuracies": 1.0, "rewards/chosen": -0.15008629858493805, "rewards/margins": 0.07534339278936386, "rewards/rejected": -0.2254296839237213, "step": 569 }, { "epoch": 0.8983451536643026, "grad_norm": 0.1993560940027237, "learning_rate": 2.6969379971175576e-06, "log_odds_chosen": 0.84205162525177, "log_odds_ratio": -0.3631736934185028, "logits/chosen": -0.2777113616466522, "logits/rejected": -1.3776825666427612, "logps/chosen": -1.5447388887405396, "logps/rejected": -2.2568342685699463, "loss": 1.5938, "nll_loss": 1.5574393272399902, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544739156961441, "rewards/margins": 0.07120953500270844, "rewards/rejected": -0.22568343579769135, "step": 570 }, { "epoch": 0.8999211977935382, "grad_norm": 0.20275285840034485, "learning_rate": 2.6917828276252745e-06, "log_odds_chosen": 0.9280831217765808, "log_odds_ratio": -0.3389042019844055, "logits/chosen": -0.2606070041656494, "logits/rejected": -1.232492208480835, "logps/chosen": -1.456214427947998, "logps/rejected": -2.229267120361328, "loss": 1.5004, "nll_loss": 1.4664775133132935, "rewards/accuracies": 1.0, "rewards/chosen": -0.1456214338541031, "rewards/margins": 0.07730529457330704, "rewards/rejected": -0.22292673587799072, "step": 571 }, { "epoch": 0.9014972419227738, "grad_norm": 0.1964682787656784, "learning_rate": 2.686622432044149e-06, "log_odds_chosen": 0.7510740756988525, "log_odds_ratio": -0.39093631505966187, "logits/chosen": -0.2591177225112915, "logits/rejected": -1.2564524412155151, "logps/chosen": -1.4025276899337769, "logps/rejected": -2.0166800022125244, "loss": 1.463, "nll_loss": 1.4239420890808105, "rewards/accuracies": 1.0, "rewards/chosen": -0.14025276899337769, "rewards/margins": 0.061415232717990875, "rewards/rejected": -0.20166799426078796, "step": 572 }, { "epoch": 0.9030732860520094, "grad_norm": 0.1793847680091858, "learning_rate": 2.681456849358505e-06, "log_odds_chosen": 0.8104532957077026, "log_odds_ratio": -0.3840184509754181, "logits/chosen": -0.34616154432296753, "logits/rejected": -1.3318803310394287, "logps/chosen": -1.4472362995147705, "logps/rejected": -2.120614528656006, "loss": 1.4976, "nll_loss": 1.4592398405075073, "rewards/accuracies": 1.0, "rewards/chosen": -0.14472362399101257, "rewards/margins": 0.06733782589435577, "rewards/rejected": -0.21206147968769073, "step": 573 }, { "epoch": 0.904649330181245, "grad_norm": 0.18348178267478943, "learning_rate": 2.6762861185918528e-06, "log_odds_chosen": 1.182490348815918, "log_odds_ratio": -0.29787686467170715, "logits/chosen": -0.3689255714416504, "logits/rejected": -1.3221290111541748, "logps/chosen": -1.3581138849258423, "logps/rejected": -2.352473735809326, "loss": 1.4083, "nll_loss": 1.378502368927002, "rewards/accuracies": 1.0, "rewards/chosen": -0.13581138849258423, "rewards/margins": 0.09943599998950958, "rewards/rejected": -0.235247403383255, "step": 574 }, { "epoch": 0.9062253743104807, "grad_norm": 0.20440001785755157, "learning_rate": 2.6711102788065934e-06, "log_odds_chosen": 0.9215613603591919, "log_odds_ratio": -0.34810373187065125, "logits/chosen": -0.2908223867416382, "logits/rejected": -1.2476472854614258, "logps/chosen": -1.5398906469345093, "logps/rejected": -2.327841281890869, "loss": 1.5889, "nll_loss": 1.5541001558303833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1539890617132187, "rewards/margins": 0.07879504561424255, "rewards/rejected": -0.23278410732746124, "step": 575 }, { "epoch": 0.9078014184397163, "grad_norm": 0.210665762424469, "learning_rate": 2.665929369103724e-06, "log_odds_chosen": 0.6061529517173767, "log_odds_ratio": -0.44262316823005676, "logits/chosen": -0.2395247519016266, "logits/rejected": -0.9772445559501648, "logps/chosen": -1.5880591869354248, "logps/rejected": -2.0922789573669434, "loss": 1.6249, "nll_loss": 1.580668330192566, "rewards/accuracies": 1.0, "rewards/chosen": -0.1588059365749359, "rewards/margins": 0.050421975553035736, "rewards/rejected": -0.20922791957855225, "step": 576 }, { "epoch": 0.9093774625689519, "grad_norm": 0.19389891624450684, "learning_rate": 2.6607434286225427e-06, "log_odds_chosen": 0.8256452083587646, "log_odds_ratio": -0.3714563846588135, "logits/chosen": -0.2425473928451538, "logits/rejected": -1.2359596490859985, "logps/chosen": -1.4768764972686768, "logps/rejected": -2.1682870388031006, "loss": 1.5302, "nll_loss": 1.493094801902771, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476876437664032, "rewards/margins": 0.06914106011390686, "rewards/rejected": -0.21682868897914886, "step": 577 }, { "epoch": 0.9109535066981875, "grad_norm": 0.21000936627388, "learning_rate": 2.6555524965403533e-06, "log_odds_chosen": 1.0111608505249023, "log_odds_ratio": -0.31302177906036377, "logits/chosen": -0.3497047424316406, "logits/rejected": -1.2955108880996704, "logps/chosen": -1.4033231735229492, "logps/rejected": -2.244431734085083, "loss": 1.462, "nll_loss": 1.4307024478912354, "rewards/accuracies": 1.0, "rewards/chosen": -0.14033232629299164, "rewards/margins": 0.08411087095737457, "rewards/rejected": -0.22444318234920502, "step": 578 }, { "epoch": 0.9125295508274232, "grad_norm": 0.18803854286670685, "learning_rate": 2.6503566120721683e-06, "log_odds_chosen": 0.982942521572113, "log_odds_ratio": -0.3207738697528839, "logits/chosen": -0.28768619894981384, "logits/rejected": -1.302692174911499, "logps/chosen": -1.4879767894744873, "logps/rejected": -2.316041946411133, "loss": 1.5368, "nll_loss": 1.5047372579574585, "rewards/accuracies": 1.0, "rewards/chosen": -0.1487976759672165, "rewards/margins": 0.0828065350651741, "rewards/rejected": -0.2316042184829712, "step": 579 }, { "epoch": 0.9141055949566588, "grad_norm": 0.18727664649486542, "learning_rate": 2.6451558144704126e-06, "log_odds_chosen": 0.9223566055297852, "log_odds_ratio": -0.3405495584011078, "logits/chosen": -0.40365201234817505, "logits/rejected": -1.2617590427398682, "logps/chosen": -1.4504380226135254, "logps/rejected": -2.2192819118499756, "loss": 1.4889, "nll_loss": 1.4548324346542358, "rewards/accuracies": 1.0, "rewards/chosen": -0.14504380524158478, "rewards/margins": 0.07688435912132263, "rewards/rejected": -0.2219281643629074, "step": 580 }, { "epoch": 0.9156816390858944, "grad_norm": 0.1898116171360016, "learning_rate": 2.6399501430246286e-06, "log_odds_chosen": 0.8261522650718689, "log_odds_ratio": -0.36379167437553406, "logits/chosen": -0.2834393084049225, "logits/rejected": -1.21560537815094, "logps/chosen": -1.4255337715148926, "logps/rejected": -2.1033647060394287, "loss": 1.469, "nll_loss": 1.4326211214065552, "rewards/accuracies": 1.0, "rewards/chosen": -0.14255337417125702, "rewards/margins": 0.06778310239315033, "rewards/rejected": -0.21033647656440735, "step": 581 }, { "epoch": 0.91725768321513, "grad_norm": 0.19628842175006866, "learning_rate": 2.634739637061177e-06, "log_odds_chosen": 0.6971665620803833, "log_odds_ratio": -0.40822935104370117, "logits/chosen": -0.30961042642593384, "logits/rejected": -1.1811354160308838, "logps/chosen": -1.4803777933120728, "logps/rejected": -2.0561718940734863, "loss": 1.5319, "nll_loss": 1.4910557270050049, "rewards/accuracies": 1.0, "rewards/chosen": -0.14803776144981384, "rewards/margins": 0.05757942050695419, "rewards/rejected": -0.20561718940734863, "step": 582 }, { "epoch": 0.9188337273443656, "grad_norm": 0.19777609407901764, "learning_rate": 2.6295243359429423e-06, "log_odds_chosen": 0.9527621269226074, "log_odds_ratio": -0.33162370324134827, "logits/chosen": -0.2951660752296448, "logits/rejected": -1.293708086013794, "logps/chosen": -1.4976961612701416, "logps/rejected": -2.3028030395507812, "loss": 1.5405, "nll_loss": 1.5073657035827637, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497696191072464, "rewards/margins": 0.0805106908082962, "rewards/rejected": -0.2302803099155426, "step": 583 }, { "epoch": 0.9204097714736013, "grad_norm": 0.19010689854621887, "learning_rate": 2.624304279069033e-06, "log_odds_chosen": 0.954330563545227, "log_odds_ratio": -0.3304038941860199, "logits/chosen": -0.262991726398468, "logits/rejected": -1.220430612564087, "logps/chosen": -1.4275193214416504, "logps/rejected": -2.21905517578125, "loss": 1.4945, "nll_loss": 1.461504578590393, "rewards/accuracies": 1.0, "rewards/chosen": -0.14275193214416504, "rewards/margins": 0.07915358990430832, "rewards/rejected": -0.22190551459789276, "step": 584 }, { "epoch": 0.9219858156028369, "grad_norm": 0.23942263424396515, "learning_rate": 2.6190795058744854e-06, "log_odds_chosen": 0.9014804363250732, "log_odds_ratio": -0.35250112414360046, "logits/chosen": -0.32867133617401123, "logits/rejected": -1.2367173433303833, "logps/chosen": -1.51943838596344, "logps/rejected": -2.2841804027557373, "loss": 1.5637, "nll_loss": 1.5284459590911865, "rewards/accuracies": 1.0, "rewards/chosen": -0.1519438624382019, "rewards/margins": 0.07647417485713959, "rewards/rejected": -0.2284180372953415, "step": 585 }, { "epoch": 0.9235618597320725, "grad_norm": 0.21306301653385162, "learning_rate": 2.6138500558299664e-06, "log_odds_chosen": 0.7792106866836548, "log_odds_ratio": -0.3837544918060303, "logits/chosen": -0.30056121945381165, "logits/rejected": -1.131338357925415, "logps/chosen": -1.4999313354492188, "logps/rejected": -2.1518428325653076, "loss": 1.5432, "nll_loss": 1.504861831665039, "rewards/accuracies": 1.0, "rewards/chosen": -0.1499931514263153, "rewards/margins": 0.06519114971160889, "rewards/rejected": -0.215184286236763, "step": 586 }, { "epoch": 0.9251379038613081, "grad_norm": 0.20388370752334595, "learning_rate": 2.608615968441472e-06, "log_odds_chosen": 0.8730366230010986, "log_odds_ratio": -0.35827386379241943, "logits/chosen": -0.3451239764690399, "logits/rejected": -1.2428234815597534, "logps/chosen": -1.541178584098816, "logps/rejected": -2.2841219902038574, "loss": 1.5874, "nll_loss": 1.551594614982605, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541178673505783, "rewards/margins": 0.0742943286895752, "rewards/rejected": -0.2284121960401535, "step": 587 }, { "epoch": 0.9267139479905437, "grad_norm": 0.19729666411876678, "learning_rate": 2.6033772832500333e-06, "log_odds_chosen": 0.8212614059448242, "log_odds_ratio": -0.3690330982208252, "logits/chosen": -0.3706430196762085, "logits/rejected": -1.1535141468048096, "logps/chosen": -1.5374579429626465, "logps/rejected": -2.2309741973876953, "loss": 1.5836, "nll_loss": 1.5466666221618652, "rewards/accuracies": 1.0, "rewards/chosen": -0.15374578535556793, "rewards/margins": 0.06935164332389832, "rewards/rejected": -0.22309744358062744, "step": 588 }, { "epoch": 0.9282899921197794, "grad_norm": 0.18803627789020538, "learning_rate": 2.5981340398314146e-06, "log_odds_chosen": 0.8717142343521118, "log_odds_ratio": -0.35949403047561646, "logits/chosen": -0.3949166536331177, "logits/rejected": -1.278796672821045, "logps/chosen": -1.4226505756378174, "logps/rejected": -2.1424102783203125, "loss": 1.4741, "nll_loss": 1.4381307363510132, "rewards/accuracies": 1.0, "rewards/chosen": -0.14226505160331726, "rewards/margins": 0.07197597622871399, "rewards/rejected": -0.21424104273319244, "step": 589 }, { "epoch": 0.929866036249015, "grad_norm": 0.200876384973526, "learning_rate": 2.592886277795815e-06, "log_odds_chosen": 0.8502908945083618, "log_odds_ratio": -0.36463305354118347, "logits/chosen": -0.34665432572364807, "logits/rejected": -1.574115514755249, "logps/chosen": -1.545432209968567, "logps/rejected": -2.2671260833740234, "loss": 1.6, "nll_loss": 1.5635088682174683, "rewards/accuracies": 1.0, "rewards/chosen": -0.1545432060956955, "rewards/margins": 0.07216940075159073, "rewards/rejected": -0.22671261429786682, "step": 590 }, { "epoch": 0.9314420803782506, "grad_norm": 0.20108841359615326, "learning_rate": 2.5876340367875706e-06, "log_odds_chosen": 1.0153826475143433, "log_odds_ratio": -0.31579023599624634, "logits/chosen": -0.34771737456321716, "logits/rejected": -1.486975908279419, "logps/chosen": -1.4971017837524414, "logps/rejected": -2.3579812049865723, "loss": 1.5475, "nll_loss": 1.5159598588943481, "rewards/accuracies": 1.0, "rewards/chosen": -0.14971019327640533, "rewards/margins": 0.08608793467283249, "rewards/rejected": -0.23579810559749603, "step": 591 }, { "epoch": 0.9330181245074862, "grad_norm": 0.18956692516803741, "learning_rate": 2.582377356484853e-06, "log_odds_chosen": 0.837839663028717, "log_odds_ratio": -0.3628246784210205, "logits/chosen": -0.37122786045074463, "logits/rejected": -1.2203317880630493, "logps/chosen": -1.3939440250396729, "logps/rejected": -2.0772705078125, "loss": 1.4546, "nll_loss": 1.4183294773101807, "rewards/accuracies": 1.0, "rewards/chosen": -0.13939440250396729, "rewards/margins": 0.06833265721797943, "rewards/rejected": -0.20772705972194672, "step": 592 }, { "epoch": 0.9345941686367218, "grad_norm": 0.19498895108699799, "learning_rate": 2.577116276599373e-06, "log_odds_chosen": 0.7960597276687622, "log_odds_ratio": -0.37910643219947815, "logits/chosen": -0.34351596236228943, "logits/rejected": -1.0250897407531738, "logps/chosen": -1.4233973026275635, "logps/rejected": -2.075965404510498, "loss": 1.4954, "nll_loss": 1.4574451446533203, "rewards/accuracies": 1.0, "rewards/chosen": -0.14233973622322083, "rewards/margins": 0.06525681167840958, "rewards/rejected": -0.2075965404510498, "step": 593 }, { "epoch": 0.9361702127659575, "grad_norm": 0.19851456582546234, "learning_rate": 2.5718508368760737e-06, "log_odds_chosen": 0.8462440371513367, "log_odds_ratio": -0.3784284293651581, "logits/chosen": -0.3738991916179657, "logits/rejected": -1.237477421760559, "logps/chosen": -1.4328763484954834, "logps/rejected": -2.136536121368408, "loss": 1.4951, "nll_loss": 1.4572863578796387, "rewards/accuracies": 1.0, "rewards/chosen": -0.14328764379024506, "rewards/margins": 0.07036596536636353, "rewards/rejected": -0.21365360915660858, "step": 594 }, { "epoch": 0.9377462568951931, "grad_norm": 0.18769197165966034, "learning_rate": 2.5665810770928386e-06, "log_odds_chosen": 0.9634343385696411, "log_odds_ratio": -0.3288772702217102, "logits/chosen": -0.3851248621940613, "logits/rejected": -1.3970948457717896, "logps/chosen": -1.4221051931381226, "logps/rejected": -2.2232911586761475, "loss": 1.4897, "nll_loss": 1.4568034410476685, "rewards/accuracies": 1.0, "rewards/chosen": -0.14221051335334778, "rewards/margins": 0.08011861145496368, "rewards/rejected": -0.22232912480831146, "step": 595 }, { "epoch": 0.9393223010244287, "grad_norm": 0.20603147149085999, "learning_rate": 2.5613070370601863e-06, "log_odds_chosen": 0.9289364218711853, "log_odds_ratio": -0.335774302482605, "logits/chosen": -0.31814393401145935, "logits/rejected": -1.2499024868011475, "logps/chosen": -1.4785757064819336, "logps/rejected": -2.2590699195861816, "loss": 1.5156, "nll_loss": 1.4820557832717896, "rewards/accuracies": 1.0, "rewards/chosen": -0.14785756170749664, "rewards/margins": 0.07804940640926361, "rewards/rejected": -0.22590698301792145, "step": 596 }, { "epoch": 0.9408983451536643, "grad_norm": 0.20211490988731384, "learning_rate": 2.556028756620969e-06, "log_odds_chosen": 0.856348991394043, "log_odds_ratio": -0.3590443730354309, "logits/chosen": -0.33854496479034424, "logits/rejected": -1.3129234313964844, "logps/chosen": -1.5026987791061401, "logps/rejected": -2.2209367752075195, "loss": 1.5329, "nll_loss": 1.4969648122787476, "rewards/accuracies": 1.0, "rewards/chosen": -0.15026986598968506, "rewards/margins": 0.07182382047176361, "rewards/rejected": -0.22209370136260986, "step": 597 }, { "epoch": 0.9424743892829, "grad_norm": 0.19991546869277954, "learning_rate": 2.5507462756500747e-06, "log_odds_chosen": 0.9981376528739929, "log_odds_ratio": -0.33000436425209045, "logits/chosen": -0.23800677061080933, "logits/rejected": -1.2048419713974, "logps/chosen": -1.5049721002578735, "logps/rejected": -2.351780652999878, "loss": 1.5433, "nll_loss": 1.5103166103363037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1504971981048584, "rewards/margins": 0.08468086272478104, "rewards/rejected": -0.23517806828022003, "step": 598 }, { "epoch": 0.9440504334121356, "grad_norm": 0.2179379016160965, "learning_rate": 2.5454596340541245e-06, "log_odds_chosen": 0.8718122839927673, "log_odds_ratio": -0.3630530834197998, "logits/chosen": -0.4387681186199188, "logits/rejected": -1.3172277212142944, "logps/chosen": -1.5390511751174927, "logps/rejected": -2.2787020206451416, "loss": 1.5792, "nll_loss": 1.5428839921951294, "rewards/accuracies": 1.0, "rewards/chosen": -0.15390512347221375, "rewards/margins": 0.07396508753299713, "rewards/rejected": -0.22787019610404968, "step": 599 }, { "epoch": 0.9456264775413712, "grad_norm": 0.19863368570804596, "learning_rate": 2.5401688717711702e-06, "log_odds_chosen": 0.7998033165931702, "log_odds_ratio": -0.3759251832962036, "logits/chosen": -0.3329557776451111, "logits/rejected": -1.2800992727279663, "logps/chosen": -1.4736813306808472, "logps/rejected": -2.1393368244171143, "loss": 1.5281, "nll_loss": 1.4904803037643433, "rewards/accuracies": 1.0, "rewards/chosen": -0.14736813306808472, "rewards/margins": 0.06656555086374283, "rewards/rejected": -0.21393369138240814, "step": 600 }, { "epoch": 0.9472025216706068, "grad_norm": 0.19201233983039856, "learning_rate": 2.5348740287703937e-06, "log_odds_chosen": 1.0029901266098022, "log_odds_ratio": -0.3156769871711731, "logits/chosen": -0.34973573684692383, "logits/rejected": -1.1656208038330078, "logps/chosen": -1.3755903244018555, "logps/rejected": -2.201836347579956, "loss": 1.4415, "nll_loss": 1.4099783897399902, "rewards/accuracies": 1.0, "rewards/chosen": -0.13755902647972107, "rewards/margins": 0.08262462913990021, "rewards/rejected": -0.22018365561962128, "step": 601 }, { "epoch": 0.9487785657998424, "grad_norm": 0.2115398347377777, "learning_rate": 2.529575145051805e-06, "log_odds_chosen": 0.9399322867393494, "log_odds_ratio": -0.3423665761947632, "logits/chosen": -0.37769484519958496, "logits/rejected": -1.224244236946106, "logps/chosen": -1.4493341445922852, "logps/rejected": -2.236774444580078, "loss": 1.5193, "nll_loss": 1.485029697418213, "rewards/accuracies": 1.0, "rewards/chosen": -0.14493340253829956, "rewards/margins": 0.07874403148889542, "rewards/rejected": -0.22367745637893677, "step": 602 }, { "epoch": 0.950354609929078, "grad_norm": 0.19805483520030975, "learning_rate": 2.52427226064594e-06, "log_odds_chosen": 0.987168550491333, "log_odds_ratio": -0.3237678110599518, "logits/chosen": -0.44117000699043274, "logits/rejected": -1.3285408020019531, "logps/chosen": -1.4890186786651611, "logps/rejected": -2.3211069107055664, "loss": 1.5198, "nll_loss": 1.487433910369873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14890186488628387, "rewards/margins": 0.0832088366150856, "rewards/rejected": -0.23211072385311127, "step": 603 }, { "epoch": 0.9519306540583137, "grad_norm": 0.18601743876934052, "learning_rate": 2.518965415613557e-06, "log_odds_chosen": 1.137446641921997, "log_odds_ratio": -0.2949294149875641, "logits/chosen": -0.37900564074516296, "logits/rejected": -1.2488760948181152, "logps/chosen": -1.3822598457336426, "logps/rejected": -2.332451105117798, "loss": 1.4432, "nll_loss": 1.4137518405914307, "rewards/accuracies": 1.0, "rewards/chosen": -0.1382259875535965, "rewards/margins": 0.0950191393494606, "rewards/rejected": -0.2332451194524765, "step": 604 }, { "epoch": 0.9535066981875493, "grad_norm": 0.19979554414749146, "learning_rate": 2.513654650045336e-06, "log_odds_chosen": 0.9225171208381653, "log_odds_ratio": -0.3389977216720581, "logits/chosen": -0.38119933009147644, "logits/rejected": -1.5180516242980957, "logps/chosen": -1.4174445867538452, "logps/rejected": -2.1828620433807373, "loss": 1.4656, "nll_loss": 1.4316853284835815, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417444497346878, "rewards/margins": 0.07654174417257309, "rewards/rejected": -0.2182862013578415, "step": 605 }, { "epoch": 0.9550827423167849, "grad_norm": 0.1927998960018158, "learning_rate": 2.508340004061574e-06, "log_odds_chosen": 1.0575662851333618, "log_odds_ratio": -0.3081508278846741, "logits/chosen": -0.3997096121311188, "logits/rejected": -1.0927016735076904, "logps/chosen": -1.4310603141784668, "logps/rejected": -2.318833112716675, "loss": 1.4849, "nll_loss": 1.454042673110962, "rewards/accuracies": 1.0, "rewards/chosen": -0.14310602843761444, "rewards/margins": 0.08877727389335632, "rewards/rejected": -0.23188331723213196, "step": 606 }, { "epoch": 0.9566587864460205, "grad_norm": 0.19888868927955627, "learning_rate": 2.503021517811882e-06, "log_odds_chosen": 0.9682137370109558, "log_odds_ratio": -0.3253340423107147, "logits/chosen": -0.4364151358604431, "logits/rejected": -1.3818237781524658, "logps/chosen": -1.5068976879119873, "logps/rejected": -2.3259003162384033, "loss": 1.5557, "nll_loss": 1.5231314897537231, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506897658109665, "rewards/margins": 0.08190026879310608, "rewards/rejected": -0.23259004950523376, "step": 607 }, { "epoch": 0.9582348305752562, "grad_norm": 0.19276244938373566, "learning_rate": 2.497699231474885e-06, "log_odds_chosen": 1.0809459686279297, "log_odds_ratio": -0.3042202889919281, "logits/chosen": -0.4018801152706146, "logits/rejected": -1.3968017101287842, "logps/chosen": -1.400070071220398, "logps/rejected": -2.3094584941864014, "loss": 1.4403, "nll_loss": 1.4098955392837524, "rewards/accuracies": 1.0, "rewards/chosen": -0.14000701904296875, "rewards/margins": 0.09093883633613586, "rewards/rejected": -0.23094584047794342, "step": 608 }, { "epoch": 0.9598108747044918, "grad_norm": 0.20758643746376038, "learning_rate": 2.4923731852579127e-06, "log_odds_chosen": 1.003240704536438, "log_odds_ratio": -0.32600492238998413, "logits/chosen": -0.41749560832977295, "logits/rejected": -1.0546338558197021, "logps/chosen": -1.4264339208602905, "logps/rejected": -2.2706849575042725, "loss": 1.4808, "nll_loss": 1.448164701461792, "rewards/accuracies": 1.0, "rewards/chosen": -0.14264339208602905, "rewards/margins": 0.08442509919404984, "rewards/rejected": -0.22706851363182068, "step": 609 }, { "epoch": 0.9613869188337274, "grad_norm": 0.19250842928886414, "learning_rate": 2.4870434193967017e-06, "log_odds_chosen": 1.1638853549957275, "log_odds_ratio": -0.28101596236228943, "logits/chosen": -0.3525513708591461, "logits/rejected": -1.4325730800628662, "logps/chosen": -1.4275925159454346, "logps/rejected": -2.4097812175750732, "loss": 1.457, "nll_loss": 1.4288923740386963, "rewards/accuracies": 1.0, "rewards/chosen": -0.1427592635154724, "rewards/margins": 0.0982188731431961, "rewards/rejected": -0.24097813665866852, "step": 610 }, { "epoch": 0.9629629629629629, "grad_norm": 0.21151085197925568, "learning_rate": 2.481709974155086e-06, "log_odds_chosen": 1.1808512210845947, "log_odds_ratio": -0.28556308150291443, "logits/chosen": -0.3375462591648102, "logits/rejected": -1.2200756072998047, "logps/chosen": -1.4453498125076294, "logps/rejected": -2.448586940765381, "loss": 1.4846, "nll_loss": 1.456089973449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.14453496038913727, "rewards/margins": 0.10032372176647186, "rewards/rejected": -0.24485869705677032, "step": 611 }, { "epoch": 0.9645390070921985, "grad_norm": 0.22557130455970764, "learning_rate": 2.4763728898246983e-06, "log_odds_chosen": 1.2929182052612305, "log_odds_ratio": -0.251591295003891, "logits/chosen": -0.23864808678627014, "logits/rejected": -1.2782857418060303, "logps/chosen": -1.3319588899612427, "logps/rejected": -2.397937774658203, "loss": 1.3652, "nll_loss": 1.340023398399353, "rewards/accuracies": 1.0, "rewards/chosen": -0.1331958919763565, "rewards/margins": 0.1065979078412056, "rewards/rejected": -0.2397937923669815, "step": 612 }, { "epoch": 0.9661150512214342, "grad_norm": 0.18006299436092377, "learning_rate": 2.4710322067246607e-06, "log_odds_chosen": 0.9720537066459656, "log_odds_ratio": -0.3259618282318115, "logits/chosen": -0.30720773339271545, "logits/rejected": -1.2487545013427734, "logps/chosen": -1.461777687072754, "logps/rejected": -2.275256395339966, "loss": 1.4946, "nll_loss": 1.4619730710983276, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461777687072754, "rewards/margins": 0.08134786784648895, "rewards/rejected": -0.22752563655376434, "step": 613 }, { "epoch": 0.9676910953506698, "grad_norm": 0.17892460525035858, "learning_rate": 2.465687965201283e-06, "log_odds_chosen": 1.0376200675964355, "log_odds_ratio": -0.31072306632995605, "logits/chosen": -0.35413774847984314, "logits/rejected": -1.2665860652923584, "logps/chosen": -1.3643009662628174, "logps/rejected": -2.222691535949707, "loss": 1.4241, "nll_loss": 1.3930423259735107, "rewards/accuracies": 1.0, "rewards/chosen": -0.13643009960651398, "rewards/margins": 0.08583903312683105, "rewards/rejected": -0.22226913273334503, "step": 614 }, { "epoch": 0.9692671394799054, "grad_norm": 0.19563743472099304, "learning_rate": 2.4603402056277577e-06, "log_odds_chosen": 0.9878734350204468, "log_odds_ratio": -0.32292407751083374, "logits/chosen": -0.4882684051990509, "logits/rejected": -1.2250369787216187, "logps/chosen": -1.4257678985595703, "logps/rejected": -2.247316837310791, "loss": 1.4822, "nll_loss": 1.4498778581619263, "rewards/accuracies": 1.0, "rewards/chosen": -0.14257679879665375, "rewards/margins": 0.08215488493442535, "rewards/rejected": -0.2247316688299179, "step": 615 }, { "epoch": 0.970843183609141, "grad_norm": 0.22117209434509277, "learning_rate": 2.454988968403854e-06, "log_odds_chosen": 0.7999638319015503, "log_odds_ratio": -0.3741395175457001, "logits/chosen": -0.3372759222984314, "logits/rejected": -1.3164011240005493, "logps/chosen": -1.479479193687439, "logps/rejected": -2.144162654876709, "loss": 1.5122, "nll_loss": 1.4747817516326904, "rewards/accuracies": 1.0, "rewards/chosen": -0.14794793725013733, "rewards/margins": 0.06646835803985596, "rewards/rejected": -0.2144162803888321, "step": 616 }, { "epoch": 0.9724192277383766, "grad_norm": 0.195278599858284, "learning_rate": 2.4496342939556133e-06, "log_odds_chosen": 1.010886549949646, "log_odds_ratio": -0.31771498918533325, "logits/chosen": -0.2681885361671448, "logits/rejected": -1.2319934368133545, "logps/chosen": -1.4390006065368652, "logps/rejected": -2.285721778869629, "loss": 1.4874, "nll_loss": 1.4556019306182861, "rewards/accuracies": 1.0, "rewards/chosen": -0.143900066614151, "rewards/margins": 0.08467209339141846, "rewards/rejected": -0.22857216000556946, "step": 617 }, { "epoch": 0.9739952718676123, "grad_norm": 0.19526216387748718, "learning_rate": 2.444276222735043e-06, "log_odds_chosen": 1.008836030960083, "log_odds_ratio": -0.3135777711868286, "logits/chosen": -0.4565156102180481, "logits/rejected": -1.3782358169555664, "logps/chosen": -1.3998832702636719, "logps/rejected": -2.2375426292419434, "loss": 1.4704, "nll_loss": 1.4390738010406494, "rewards/accuracies": 1.0, "rewards/chosen": -0.13998833298683167, "rewards/margins": 0.0837659239768982, "rewards/rejected": -0.22375425696372986, "step": 618 }, { "epoch": 0.9755713159968479, "grad_norm": 0.19684012234210968, "learning_rate": 2.4389147952198127e-06, "log_odds_chosen": 1.0080350637435913, "log_odds_ratio": -0.315501868724823, "logits/chosen": -0.46619752049446106, "logits/rejected": -1.2587003707885742, "logps/chosen": -1.443904161453247, "logps/rejected": -2.2875938415527344, "loss": 1.4711, "nll_loss": 1.4395172595977783, "rewards/accuracies": 1.0, "rewards/chosen": -0.14439040422439575, "rewards/margins": 0.08436896651983261, "rewards/rejected": -0.22875937819480896, "step": 619 }, { "epoch": 0.9771473601260835, "grad_norm": 0.18595977127552032, "learning_rate": 2.433550051912946e-06, "log_odds_chosen": 1.1093815565109253, "log_odds_ratio": -0.29099801182746887, "logits/chosen": -0.4006117284297943, "logits/rejected": -1.4136881828308105, "logps/chosen": -1.3357937335968018, "logps/rejected": -2.2475531101226807, "loss": 1.3835, "nll_loss": 1.3544049263000488, "rewards/accuracies": 1.0, "rewards/chosen": -0.13357935845851898, "rewards/margins": 0.09117594361305237, "rewards/rejected": -0.22475531697273254, "step": 620 }, { "epoch": 0.9787234042553191, "grad_norm": 0.21153992414474487, "learning_rate": 2.4281820333425167e-06, "log_odds_chosen": 1.0128014087677002, "log_odds_ratio": -0.3216671049594879, "logits/chosen": -0.39767417311668396, "logits/rejected": -1.2548097372055054, "logps/chosen": -1.5447418689727783, "logps/rejected": -2.411409854888916, "loss": 1.5688, "nll_loss": 1.5366218090057373, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544741690158844, "rewards/margins": 0.08666681498289108, "rewards/rejected": -0.24114099144935608, "step": 621 }, { "epoch": 0.9802994483845547, "grad_norm": 0.20115748047828674, "learning_rate": 2.42281078006134e-06, "log_odds_chosen": 1.2284798622131348, "log_odds_ratio": -0.25919806957244873, "logits/chosen": -0.4328358769416809, "logits/rejected": -1.545608639717102, "logps/chosen": -1.5295909643173218, "logps/rejected": -2.5866165161132812, "loss": 1.5635, "nll_loss": 1.5375945568084717, "rewards/accuracies": 1.0, "rewards/chosen": -0.15295909345149994, "rewards/margins": 0.10570257902145386, "rewards/rejected": -0.2586616575717926, "step": 622 }, { "epoch": 0.9818754925137904, "grad_norm": 0.2268103063106537, "learning_rate": 2.4174363326466703e-06, "log_odds_chosen": 0.9978740811347961, "log_odds_ratio": -0.3261033892631531, "logits/chosen": -0.43390950560569763, "logits/rejected": -1.5303535461425781, "logps/chosen": -1.4596524238586426, "logps/rejected": -2.306164503097534, "loss": 1.5012, "nll_loss": 1.4685872793197632, "rewards/accuracies": 1.0, "rewards/chosen": -0.14596523344516754, "rewards/margins": 0.08465121686458588, "rewards/rejected": -0.23061645030975342, "step": 623 }, { "epoch": 0.983451536643026, "grad_norm": 0.2076517939567566, "learning_rate": 2.41205873169989e-06, "log_odds_chosen": 0.9155547618865967, "log_odds_ratio": -0.3434157967567444, "logits/chosen": -0.3254348635673523, "logits/rejected": -1.0746028423309326, "logps/chosen": -1.4498534202575684, "logps/rejected": -2.212179660797119, "loss": 1.4989, "nll_loss": 1.4645111560821533, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449853479862213, "rewards/margins": 0.07623264193534851, "rewards/rejected": -0.22121797502040863, "step": 624 }, { "epoch": 0.9850275807722616, "grad_norm": 0.18755526840686798, "learning_rate": 2.4066780178462058e-06, "log_odds_chosen": 1.093489646911621, "log_odds_ratio": -0.30363929271698, "logits/chosen": -0.3011537492275238, "logits/rejected": -1.2025160789489746, "logps/chosen": -1.4793384075164795, "logps/rejected": -2.4127535820007324, "loss": 1.5154, "nll_loss": 1.4850668907165527, "rewards/accuracies": 1.0, "rewards/chosen": -0.14793384075164795, "rewards/margins": 0.09334155172109604, "rewards/rejected": -0.2412753701210022, "step": 625 }, { "epoch": 0.9866036249014972, "grad_norm": 0.19269263744354248, "learning_rate": 2.40129423173434e-06, "log_odds_chosen": 1.0034220218658447, "log_odds_ratio": -0.3211997449398041, "logits/chosen": -0.35841816663742065, "logits/rejected": -1.3340610265731812, "logps/chosen": -1.4337854385375977, "logps/rejected": -2.2714056968688965, "loss": 1.4894, "nll_loss": 1.4572951793670654, "rewards/accuracies": 1.0, "rewards/chosen": -0.14337855577468872, "rewards/margins": 0.08376200497150421, "rewards/rejected": -0.22714056074619293, "step": 626 }, { "epoch": 0.9881796690307328, "grad_norm": 0.20670926570892334, "learning_rate": 2.3959074140362274e-06, "log_odds_chosen": 1.0268993377685547, "log_odds_ratio": -0.3118314743041992, "logits/chosen": -0.4891017973423004, "logits/rejected": -1.2871733903884888, "logps/chosen": -1.4102925062179565, "logps/rejected": -2.26517391204834, "loss": 1.4519, "nll_loss": 1.4207662343978882, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410292536020279, "rewards/margins": 0.08548815548419952, "rewards/rejected": -0.22651740908622742, "step": 627 }, { "epoch": 0.9897557131599685, "grad_norm": 0.22095555067062378, "learning_rate": 2.3905176054467007e-06, "log_odds_chosen": 1.2333874702453613, "log_odds_ratio": -0.2664095163345337, "logits/chosen": -0.47534334659576416, "logits/rejected": -1.3395469188690186, "logps/chosen": -1.39500892162323, "logps/rejected": -2.438161611557007, "loss": 1.4493, "nll_loss": 1.4226487874984741, "rewards/accuracies": 1.0, "rewards/chosen": -0.13950088620185852, "rewards/margins": 0.10431528836488724, "rewards/rejected": -0.24381616711616516, "step": 628 }, { "epoch": 0.9913317572892041, "grad_norm": 0.21743904054164886, "learning_rate": 2.3851248466831905e-06, "log_odds_chosen": 1.0874625444412231, "log_odds_ratio": -0.3082122206687927, "logits/chosen": -0.3590807020664215, "logits/rejected": -1.2268338203430176, "logps/chosen": -1.397624135017395, "logps/rejected": -2.311945676803589, "loss": 1.4329, "nll_loss": 1.4020652770996094, "rewards/accuracies": 1.0, "rewards/chosen": -0.13976241648197174, "rewards/margins": 0.09143215417861938, "rewards/rejected": -0.23119457066059113, "step": 629 }, { "epoch": 0.9929078014184397, "grad_norm": 0.2329930067062378, "learning_rate": 2.379729178485412e-06, "log_odds_chosen": 0.9880236387252808, "log_odds_ratio": -0.3203542232513428, "logits/chosen": -0.39962947368621826, "logits/rejected": -1.357712984085083, "logps/chosen": -1.5019173622131348, "logps/rejected": -2.340007781982422, "loss": 1.5417, "nll_loss": 1.5096969604492188, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501917541027069, "rewards/margins": 0.08380904793739319, "rewards/rejected": -0.2340007871389389, "step": 630 }, { "epoch": 0.9944838455476753, "grad_norm": 0.2334175556898117, "learning_rate": 2.3743306416150636e-06, "log_odds_chosen": 1.0434774160385132, "log_odds_ratio": -0.3047768473625183, "logits/chosen": -0.3637744188308716, "logits/rejected": -1.3631768226623535, "logps/chosen": -1.4570072889328003, "logps/rejected": -2.332566738128662, "loss": 1.4981, "nll_loss": 1.467656135559082, "rewards/accuracies": 1.0, "rewards/chosen": -0.14570073783397675, "rewards/margins": 0.08755593746900558, "rewards/rejected": -0.23325668275356293, "step": 631 }, { "epoch": 0.996059889676911, "grad_norm": 0.22371363639831543, "learning_rate": 2.368929276855512e-06, "log_odds_chosen": 0.8800406455993652, "log_odds_ratio": -0.348661869764328, "logits/chosen": -0.3670199513435364, "logits/rejected": -1.2765319347381592, "logps/chosen": -1.5614867210388184, "logps/rejected": -2.3103034496307373, "loss": 1.5793, "nll_loss": 1.5443942546844482, "rewards/accuracies": 1.0, "rewards/chosen": -0.15614868700504303, "rewards/margins": 0.0748816654086113, "rewards/rejected": -0.23103034496307373, "step": 632 }, { "epoch": 0.9976359338061466, "grad_norm": 0.20589956641197205, "learning_rate": 2.363525125011487e-06, "log_odds_chosen": 0.9788938164710999, "log_odds_ratio": -0.33115172386169434, "logits/chosen": -0.4048473834991455, "logits/rejected": -1.3237719535827637, "logps/chosen": -1.5760952234268188, "logps/rejected": -2.4175729751586914, "loss": 1.6168, "nll_loss": 1.5837026834487915, "rewards/accuracies": 1.0, "rewards/chosen": -0.15760952234268188, "rewards/margins": 0.08414778858423233, "rewards/rejected": -0.24175730347633362, "step": 633 }, { "epoch": 0.9992119779353822, "grad_norm": 0.19094868004322052, "learning_rate": 2.3581182269087755e-06, "log_odds_chosen": 1.389491081237793, "log_odds_ratio": -0.26652011275291443, "logits/chosen": -0.37861159443855286, "logits/rejected": -1.4625403881072998, "logps/chosen": -1.385037899017334, "logps/rejected": -2.5498814582824707, "loss": 1.4362, "nll_loss": 1.4095302820205688, "rewards/accuracies": 1.0, "rewards/chosen": -0.1385038048028946, "rewards/margins": 0.11648434400558472, "rewards/rejected": -0.2549881637096405, "step": 634 }, { "epoch": 1.0, "grad_norm": 0.24677427113056183, "learning_rate": 2.3527086233939097e-06, "log_odds_chosen": 1.143390417098999, "log_odds_ratio": -0.28132104873657227, "logits/chosen": -0.38930320739746094, "logits/rejected": -1.5918021202087402, "logps/chosen": -1.4692234992980957, "logps/rejected": -2.4415197372436523, "loss": 1.5151, "nll_loss": 1.4869499206542969, "rewards/accuracies": 1.0, "rewards/chosen": -0.14692234992980957, "rewards/margins": 0.09722961485385895, "rewards/rejected": -0.2441519796848297, "step": 635 }, { "epoch": 1.0015760441292356, "grad_norm": 0.22635005414485931, "learning_rate": 2.347296355333861e-06, "log_odds_chosen": 1.0286297798156738, "log_odds_ratio": -0.30935704708099365, "logits/chosen": -0.48233625292778015, "logits/rejected": -1.2280995845794678, "logps/chosen": -1.4753402471542358, "logps/rejected": -2.3454596996307373, "loss": 1.5161, "nll_loss": 1.4851717948913574, "rewards/accuracies": 1.0, "rewards/chosen": -0.14753402769565582, "rewards/margins": 0.08701195567846298, "rewards/rejected": -0.2345459908246994, "step": 636 }, { "epoch": 1.0031520882584712, "grad_norm": 0.20084941387176514, "learning_rate": 2.3418814636157283e-06, "log_odds_chosen": 0.9146707653999329, "log_odds_ratio": -0.3415667712688446, "logits/chosen": -0.4143337607383728, "logits/rejected": -1.3947221040725708, "logps/chosen": -1.5152298212051392, "logps/rejected": -2.2845675945281982, "loss": 1.5458, "nll_loss": 1.5116223096847534, "rewards/accuracies": 1.0, "rewards/chosen": -0.15152299404144287, "rewards/margins": 0.07693378627300262, "rewards/rejected": -0.2284567654132843, "step": 637 }, { "epoch": 1.0047281323877069, "grad_norm": 0.1994769275188446, "learning_rate": 2.336463989146434e-06, "log_odds_chosen": 1.0365430116653442, "log_odds_ratio": -0.311045378446579, "logits/chosen": -0.4344879686832428, "logits/rejected": -1.1822190284729004, "logps/chosen": -1.4108353853225708, "logps/rejected": -2.2783117294311523, "loss": 1.4477, "nll_loss": 1.4165457487106323, "rewards/accuracies": 1.0, "rewards/chosen": -0.14108355343341827, "rewards/margins": 0.08674763143062592, "rewards/rejected": -0.2278311848640442, "step": 638 }, { "epoch": 1.0063041765169425, "grad_norm": 0.19928711652755737, "learning_rate": 2.3310439728524074e-06, "log_odds_chosen": 1.0843204259872437, "log_odds_ratio": -0.2937708795070648, "logits/chosen": -0.45674261450767517, "logits/rejected": -1.2861055135726929, "logps/chosen": -1.4331440925598145, "logps/rejected": -2.343839168548584, "loss": 1.4823, "nll_loss": 1.4529598951339722, "rewards/accuracies": 1.0, "rewards/chosen": -0.1433144211769104, "rewards/margins": 0.09106951951980591, "rewards/rejected": -0.23438391089439392, "step": 639 }, { "epoch": 1.007880220646178, "grad_norm": 0.19886772334575653, "learning_rate": 2.325621455679286e-06, "log_odds_chosen": 1.2139259576797485, "log_odds_ratio": -0.2702604830265045, "logits/chosen": -0.37458765506744385, "logits/rejected": -1.2667701244354248, "logps/chosen": -1.41457998752594, "logps/rejected": -2.4413113594055176, "loss": 1.449, "nll_loss": 1.4219905138015747, "rewards/accuracies": 1.0, "rewards/chosen": -0.14145800471305847, "rewards/margins": 0.10267314314842224, "rewards/rejected": -0.2441311478614807, "step": 640 }, { "epoch": 1.0094562647754137, "grad_norm": 0.19491668045520782, "learning_rate": 2.3201964785915953e-06, "log_odds_chosen": 0.919166088104248, "log_odds_ratio": -0.33753031492233276, "logits/chosen": -0.4605422019958496, "logits/rejected": -1.1579219102859497, "logps/chosen": -1.4780750274658203, "logps/rejected": -2.248640298843384, "loss": 1.5201, "nll_loss": 1.486314058303833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1478075087070465, "rewards/margins": 0.07705653458833694, "rewards/rejected": -0.22486402094364166, "step": 641 }, { "epoch": 1.0110323089046493, "grad_norm": 0.21980389952659607, "learning_rate": 2.3147690825724457e-06, "log_odds_chosen": 0.859315037727356, "log_odds_ratio": -0.35994282364845276, "logits/chosen": -0.4084959626197815, "logits/rejected": -1.2679862976074219, "logps/chosen": -1.5238345861434937, "logps/rejected": -2.2519776821136475, "loss": 1.5578, "nll_loss": 1.5217998027801514, "rewards/accuracies": 1.0, "rewards/chosen": -0.1523834466934204, "rewards/margins": 0.07281431555747986, "rewards/rejected": -0.22519777715206146, "step": 642 }, { "epoch": 1.012608353033885, "grad_norm": 0.19822460412979126, "learning_rate": 2.3093393086232226e-06, "log_odds_chosen": 1.149411916732788, "log_odds_ratio": -0.27911868691444397, "logits/chosen": -0.3707679808139801, "logits/rejected": -1.2237002849578857, "logps/chosen": -1.3857028484344482, "logps/rejected": -2.3470818996429443, "loss": 1.4383, "nll_loss": 1.4103491306304932, "rewards/accuracies": 1.0, "rewards/chosen": -0.13857027888298035, "rewards/margins": 0.09613790363073349, "rewards/rejected": -0.23470818996429443, "step": 643 }, { "epoch": 1.0141843971631206, "grad_norm": 0.18972162902355194, "learning_rate": 2.3039071977632748e-06, "log_odds_chosen": 1.1466765403747559, "log_odds_ratio": -0.2902736961841583, "logits/chosen": -0.46120280027389526, "logits/rejected": -1.5394573211669922, "logps/chosen": -1.4458996057510376, "logps/rejected": -2.4237475395202637, "loss": 1.4804, "nll_loss": 1.4513590335845947, "rewards/accuracies": 1.0, "rewards/chosen": -0.14458994567394257, "rewards/margins": 0.09778478741645813, "rewards/rejected": -0.2423747479915619, "step": 644 }, { "epoch": 1.0157604412923562, "grad_norm": 0.1946789175271988, "learning_rate": 2.2984727910296044e-06, "log_odds_chosen": 0.935196042060852, "log_odds_ratio": -0.336031049489975, "logits/chosen": -0.46645069122314453, "logits/rejected": -1.354770302772522, "logps/chosen": -1.506697177886963, "logps/rejected": -2.299145221710205, "loss": 1.5601, "nll_loss": 1.5265041589736938, "rewards/accuracies": 1.0, "rewards/chosen": -0.15066972374916077, "rewards/margins": 0.07924479246139526, "rewards/rejected": -0.22991451621055603, "step": 645 }, { "epoch": 1.0173364854215918, "grad_norm": 0.23612742125988007, "learning_rate": 2.2930361294765594e-06, "log_odds_chosen": 1.0463613271713257, "log_odds_ratio": -0.3042789101600647, "logits/chosen": -0.3468957543373108, "logits/rejected": -1.4972220659255981, "logps/chosen": -1.5089102983474731, "logps/rejected": -2.399858236312866, "loss": 1.5605, "nll_loss": 1.5300790071487427, "rewards/accuracies": 1.0, "rewards/chosen": -0.15089105069637299, "rewards/margins": 0.08909478038549423, "rewards/rejected": -0.23998583853244781, "step": 646 }, { "epoch": 1.0189125295508275, "grad_norm": 0.19889263808727264, "learning_rate": 2.287597254175521e-06, "log_odds_chosen": 1.190199375152588, "log_odds_ratio": -0.28450313210487366, "logits/chosen": -0.40667012333869934, "logits/rejected": -1.3844438791275024, "logps/chosen": -1.3774466514587402, "logps/rejected": -2.3757009506225586, "loss": 1.4397, "nll_loss": 1.4112755060195923, "rewards/accuracies": 1.0, "rewards/chosen": -0.13774468004703522, "rewards/margins": 0.0998254269361496, "rewards/rejected": -0.23757009208202362, "step": 647 }, { "epoch": 1.020488573680063, "grad_norm": 0.21212461590766907, "learning_rate": 2.2821562062145938e-06, "log_odds_chosen": 1.2260313034057617, "log_odds_ratio": -0.27425557374954224, "logits/chosen": -0.4070795774459839, "logits/rejected": -1.502755045890808, "logps/chosen": -1.529916763305664, "logps/rejected": -2.596616506576538, "loss": 1.543, "nll_loss": 1.5155991315841675, "rewards/accuracies": 1.0, "rewards/chosen": -0.1529916673898697, "rewards/margins": 0.10667000710964203, "rewards/rejected": -0.2596616744995117, "step": 648 }, { "epoch": 1.0220646178092987, "grad_norm": 0.22124163806438446, "learning_rate": 2.2767130266982967e-06, "log_odds_chosen": 0.9073204398155212, "log_odds_ratio": -0.35024213790893555, "logits/chosen": -0.47316858172416687, "logits/rejected": -1.1234474182128906, "logps/chosen": -1.443469762802124, "logps/rejected": -2.2021484375, "loss": 1.4911, "nll_loss": 1.4561150074005127, "rewards/accuracies": 1.0, "rewards/chosen": -0.14434699714183807, "rewards/margins": 0.07586785405874252, "rewards/rejected": -0.22021484375, "step": 649 }, { "epoch": 1.0236406619385343, "grad_norm": 0.20071031153202057, "learning_rate": 2.271267756747251e-06, "log_odds_chosen": 1.093822956085205, "log_odds_ratio": -0.2981938123703003, "logits/chosen": -0.3957046866416931, "logits/rejected": -1.3798214197158813, "logps/chosen": -1.4157538414001465, "logps/rejected": -2.3325469493865967, "loss": 1.4483, "nll_loss": 1.4184434413909912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1415753960609436, "rewards/margins": 0.09167931228876114, "rewards/rejected": -0.23325468599796295, "step": 650 }, { "epoch": 1.02521670606777, "grad_norm": 0.2593681514263153, "learning_rate": 2.265820437497871e-06, "log_odds_chosen": 0.9342566728591919, "log_odds_ratio": -0.34289321303367615, "logits/chosen": -0.3919522166252136, "logits/rejected": -1.1586806774139404, "logps/chosen": -1.410290241241455, "logps/rejected": -2.187276840209961, "loss": 1.4708, "nll_loss": 1.4364949464797974, "rewards/accuracies": 1.0, "rewards/chosen": -0.14102903008460999, "rewards/margins": 0.07769866287708282, "rewards/rejected": -0.2187276929616928, "step": 651 }, { "epoch": 1.0267927501970056, "grad_norm": 0.23078316450119019, "learning_rate": 2.26037111010205e-06, "log_odds_chosen": 0.9340367317199707, "log_odds_ratio": -0.3341951370239258, "logits/chosen": -0.4020829200744629, "logits/rejected": -1.2824766635894775, "logps/chosen": -1.405263900756836, "logps/rejected": -2.176396369934082, "loss": 1.4516, "nll_loss": 1.4181625843048096, "rewards/accuracies": 1.0, "rewards/chosen": -0.1405263990163803, "rewards/margins": 0.07711324095726013, "rewards/rejected": -0.21763963997364044, "step": 652 }, { "epoch": 1.0283687943262412, "grad_norm": 0.20919527113437653, "learning_rate": 2.254919815726856e-06, "log_odds_chosen": 0.9500594735145569, "log_odds_ratio": -0.33337053656578064, "logits/chosen": -0.45734572410583496, "logits/rejected": -1.2711719274520874, "logps/chosen": -1.4963735342025757, "logps/rejected": -2.2994837760925293, "loss": 1.5504, "nll_loss": 1.5170687437057495, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496373564004898, "rewards/margins": 0.08031102269887924, "rewards/rejected": -0.22994837164878845, "step": 653 }, { "epoch": 1.0299448384554768, "grad_norm": 0.20019294321537018, "learning_rate": 2.2494665955542127e-06, "log_odds_chosen": 1.2779302597045898, "log_odds_ratio": -0.2577243149280548, "logits/chosen": -0.5304352045059204, "logits/rejected": -1.4261908531188965, "logps/chosen": -1.406901240348816, "logps/rejected": -2.486321210861206, "loss": 1.4461, "nll_loss": 1.4202921390533447, "rewards/accuracies": 1.0, "rewards/chosen": -0.14069011807441711, "rewards/margins": 0.10794198513031006, "rewards/rejected": -0.24863210320472717, "step": 654 }, { "epoch": 1.0315208825847124, "grad_norm": 0.22600282728672028, "learning_rate": 2.2440114907805942e-06, "log_odds_chosen": 1.0152065753936768, "log_odds_ratio": -0.3147715628147125, "logits/chosen": -0.5199546217918396, "logits/rejected": -1.3554050922393799, "logps/chosen": -1.538907527923584, "logps/rejected": -2.4036970138549805, "loss": 1.5585, "nll_loss": 1.527039885520935, "rewards/accuracies": 1.0, "rewards/chosen": -0.15389074385166168, "rewards/margins": 0.08647895604372025, "rewards/rejected": -0.24036970734596252, "step": 655 }, { "epoch": 1.033096926713948, "grad_norm": 0.21135546267032623, "learning_rate": 2.2385545426167112e-06, "log_odds_chosen": 0.9998137354850769, "log_odds_ratio": -0.3207527697086334, "logits/chosen": -0.34665676951408386, "logits/rejected": -1.5091569423675537, "logps/chosen": -1.5195560455322266, "logps/rejected": -2.3657279014587402, "loss": 1.5643, "nll_loss": 1.532178282737732, "rewards/accuracies": 1.0, "rewards/chosen": -0.15195560455322266, "rewards/margins": 0.08461718261241913, "rewards/rejected": -0.2365727722644806, "step": 656 }, { "epoch": 1.0346729708431837, "grad_norm": 0.1999482959508896, "learning_rate": 2.2330957922872016e-06, "log_odds_chosen": 1.3584487438201904, "log_odds_ratio": -0.24272942543029785, "logits/chosen": -0.5136032104492188, "logits/rejected": -1.5450217723846436, "logps/chosen": -1.4356147050857544, "logps/rejected": -2.6008501052856445, "loss": 1.4744, "nll_loss": 1.4501111507415771, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435614824295044, "rewards/margins": 0.11652353405952454, "rewards/rejected": -0.26008501648902893, "step": 657 }, { "epoch": 1.0362490149724193, "grad_norm": 0.19058655202388763, "learning_rate": 2.2276352810303166e-06, "log_odds_chosen": 1.2888612747192383, "log_odds_ratio": -0.25243476033210754, "logits/chosen": -0.43797439336776733, "logits/rejected": -1.427096962928772, "logps/chosen": -1.3446942567825317, "logps/rejected": -2.425525426864624, "loss": 1.3929, "nll_loss": 1.367623209953308, "rewards/accuracies": 1.0, "rewards/chosen": -0.1344694197177887, "rewards/margins": 0.10808312147855759, "rewards/rejected": -0.24255254864692688, "step": 658 }, { "epoch": 1.037825059101655, "grad_norm": 0.20395569503307343, "learning_rate": 2.222173050097609e-06, "log_odds_chosen": 1.0824915170669556, "log_odds_ratio": -0.2948096692562103, "logits/chosen": -0.4529469609260559, "logits/rejected": -1.4607760906219482, "logps/chosen": -1.4559506177902222, "logps/rejected": -2.369581699371338, "loss": 1.5011, "nll_loss": 1.471666932106018, "rewards/accuracies": 1.0, "rewards/chosen": -0.14559505879878998, "rewards/margins": 0.09136311709880829, "rewards/rejected": -0.23695819079875946, "step": 659 }, { "epoch": 1.0394011032308905, "grad_norm": 0.21224889159202576, "learning_rate": 2.2167091407536272e-06, "log_odds_chosen": 1.037617564201355, "log_odds_ratio": -0.30816492438316345, "logits/chosen": -0.4756828248500824, "logits/rejected": -1.2282439470291138, "logps/chosen": -1.4746648073196411, "logps/rejected": -2.348268508911133, "loss": 1.5271, "nll_loss": 1.4963159561157227, "rewards/accuracies": 1.0, "rewards/chosen": -0.1474664807319641, "rewards/margins": 0.08736037462949753, "rewards/rejected": -0.23482683300971985, "step": 660 }, { "epoch": 1.0409771473601261, "grad_norm": 0.20707234740257263, "learning_rate": 2.211243594275594e-06, "log_odds_chosen": 1.0012534856796265, "log_odds_ratio": -0.3229444622993469, "logits/chosen": -0.47621166706085205, "logits/rejected": -1.1613657474517822, "logps/chosen": -1.496647596359253, "logps/rejected": -2.3439135551452637, "loss": 1.5196, "nll_loss": 1.4873143434524536, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496647596359253, "rewards/margins": 0.08472661674022675, "rewards/rejected": -0.23439136147499084, "step": 661 }, { "epoch": 1.0425531914893618, "grad_norm": 0.2119607776403427, "learning_rate": 2.2057764519531034e-06, "log_odds_chosen": 1.0933870077133179, "log_odds_ratio": -0.2962178587913513, "logits/chosen": -0.4307108223438263, "logits/rejected": -1.3217809200286865, "logps/chosen": -1.4943647384643555, "logps/rejected": -2.426520347595215, "loss": 1.5165, "nll_loss": 1.4868603944778442, "rewards/accuracies": 1.0, "rewards/chosen": -0.14943647384643555, "rewards/margins": 0.09321555495262146, "rewards/rejected": -0.242652028799057, "step": 662 }, { "epoch": 1.0441292356185974, "grad_norm": 0.2170783281326294, "learning_rate": 2.2003077550878053e-06, "log_odds_chosen": 1.1991064548492432, "log_odds_ratio": -0.26581087708473206, "logits/chosen": -0.44469496607780457, "logits/rejected": -1.3662856817245483, "logps/chosen": -1.4822015762329102, "logps/rejected": -2.5079472064971924, "loss": 1.5001, "nll_loss": 1.473563313484192, "rewards/accuracies": 1.0, "rewards/chosen": -0.14822015166282654, "rewards/margins": 0.10257457941770554, "rewards/rejected": -0.25079473853111267, "step": 663 }, { "epoch": 1.045705279747833, "grad_norm": 0.21621710062026978, "learning_rate": 2.1948375449930915e-06, "log_odds_chosen": 1.0592302083969116, "log_odds_ratio": -0.3070540130138397, "logits/chosen": -0.4349663257598877, "logits/rejected": -1.4020671844482422, "logps/chosen": -1.5268006324768066, "logps/rejected": -2.436373233795166, "loss": 1.5509, "nll_loss": 1.5201623439788818, "rewards/accuracies": 1.0, "rewards/chosen": -0.15268008410930634, "rewards/margins": 0.09095728397369385, "rewards/rejected": -0.24363736808300018, "step": 664 }, { "epoch": 1.0472813238770686, "grad_norm": 0.20191088318824768, "learning_rate": 2.189365862993787e-06, "log_odds_chosen": 1.1319061517715454, "log_odds_ratio": -0.28574445843696594, "logits/chosen": -0.3920745551586151, "logits/rejected": -1.3230290412902832, "logps/chosen": -1.5087662935256958, "logps/rejected": -2.4781603813171387, "loss": 1.5468, "nll_loss": 1.5181792974472046, "rewards/accuracies": 1.0, "rewards/chosen": -0.15087662637233734, "rewards/margins": 0.09693944454193115, "rewards/rejected": -0.2478160858154297, "step": 665 }, { "epoch": 1.0488573680063042, "grad_norm": 0.2054862082004547, "learning_rate": 2.1838927504258354e-06, "log_odds_chosen": 1.120408296585083, "log_odds_ratio": -0.28678375482559204, "logits/chosen": -0.4047078788280487, "logits/rejected": -1.3636611700057983, "logps/chosen": -1.4629921913146973, "logps/rejected": -2.413411855697632, "loss": 1.495, "nll_loss": 1.4663009643554688, "rewards/accuracies": 1.0, "rewards/chosen": -0.14629922807216644, "rewards/margins": 0.09504196047782898, "rewards/rejected": -0.24134118854999542, "step": 666 }, { "epoch": 1.0504334121355399, "grad_norm": 0.22820371389389038, "learning_rate": 2.178418248635988e-06, "log_odds_chosen": 1.0012179613113403, "log_odds_ratio": -0.3233652412891388, "logits/chosen": -0.4469676613807678, "logits/rejected": -1.095099687576294, "logps/chosen": -1.534761667251587, "logps/rejected": -2.3932883739471436, "loss": 1.5999, "nll_loss": 1.5675679445266724, "rewards/accuracies": 1.0, "rewards/chosen": -0.15347616374492645, "rewards/margins": 0.08585266023874283, "rewards/rejected": -0.23932884633541107, "step": 667 }, { "epoch": 1.0520094562647755, "grad_norm": 0.20692212879657745, "learning_rate": 2.1729423989814912e-06, "log_odds_chosen": 0.9865133762359619, "log_odds_ratio": -0.323873370885849, "logits/chosen": -0.4800672233104706, "logits/rejected": -1.2725329399108887, "logps/chosen": -1.4586519002914429, "logps/rejected": -2.289828300476074, "loss": 1.4836, "nll_loss": 1.4511914253234863, "rewards/accuracies": 1.0, "rewards/chosen": -0.14586518704891205, "rewards/margins": 0.08311763405799866, "rewards/rejected": -0.2289828509092331, "step": 668 }, { "epoch": 1.053585500394011, "grad_norm": 0.2024935632944107, "learning_rate": 2.167465242829774e-06, "log_odds_chosen": 1.0498971939086914, "log_odds_ratio": -0.3074461817741394, "logits/chosen": -0.49104076623916626, "logits/rejected": -1.2772282361984253, "logps/chosen": -1.5097532272338867, "logps/rejected": -2.407884120941162, "loss": 1.5443, "nll_loss": 1.5135592222213745, "rewards/accuracies": 1.0, "rewards/chosen": -0.1509753167629242, "rewards/margins": 0.08981308341026306, "rewards/rejected": -0.24078840017318726, "step": 669 }, { "epoch": 1.0551615445232467, "grad_norm": 0.20358148217201233, "learning_rate": 2.1619868215581343e-06, "log_odds_chosen": 1.1426955461502075, "log_odds_ratio": -0.2820016145706177, "logits/chosen": -0.4818193316459656, "logits/rejected": -1.3105734586715698, "logps/chosen": -1.4363994598388672, "logps/rejected": -2.3980278968811035, "loss": 1.4916, "nll_loss": 1.4634190797805786, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436399668455124, "rewards/margins": 0.0961628332734108, "rewards/rejected": -0.2398027926683426, "step": 670 }, { "epoch": 1.0567375886524824, "grad_norm": 0.20384307205677032, "learning_rate": 2.1565071765534287e-06, "log_odds_chosen": 1.0277100801467896, "log_odds_ratio": -0.31216877698898315, "logits/chosen": -0.4087562561035156, "logits/rejected": -1.33811616897583, "logps/chosen": -1.5160971879959106, "logps/rejected": -2.3919973373413086, "loss": 1.5464, "nll_loss": 1.5151844024658203, "rewards/accuracies": 1.0, "rewards/chosen": -0.15160971879959106, "rewards/margins": 0.08759000897407532, "rewards/rejected": -0.2391997128725052, "step": 671 }, { "epoch": 1.058313632781718, "grad_norm": 0.20280694961547852, "learning_rate": 2.1510263492117574e-06, "log_odds_chosen": 0.8706097602844238, "log_odds_ratio": -0.35547569394111633, "logits/chosen": -0.4099719822406769, "logits/rejected": -1.165898084640503, "logps/chosen": -1.5018154382705688, "logps/rejected": -2.2361526489257812, "loss": 1.5544, "nll_loss": 1.5188325643539429, "rewards/accuracies": 1.0, "rewards/chosen": -0.15018156170845032, "rewards/margins": 0.07343369722366333, "rewards/rejected": -0.22361525893211365, "step": 672 }, { "epoch": 1.0598896769109536, "grad_norm": 0.24474608898162842, "learning_rate": 2.1455443809381535e-06, "log_odds_chosen": 1.22650146484375, "log_odds_ratio": -0.26890575885772705, "logits/chosen": -0.49158066511154175, "logits/rejected": -1.5457218885421753, "logps/chosen": -1.4304301738739014, "logps/rejected": -2.4712085723876953, "loss": 1.4672, "nll_loss": 1.4402765035629272, "rewards/accuracies": 1.0, "rewards/chosen": -0.14304301142692566, "rewards/margins": 0.10407783836126328, "rewards/rejected": -0.24712085723876953, "step": 673 }, { "epoch": 1.0614657210401892, "grad_norm": 0.21208275854587555, "learning_rate": 2.1400613131462697e-06, "log_odds_chosen": 1.1482954025268555, "log_odds_ratio": -0.281281441450119, "logits/chosen": -0.41779255867004395, "logits/rejected": -1.416609764099121, "logps/chosen": -1.5766055583953857, "logps/rejected": -2.5737593173980713, "loss": 1.5968, "nll_loss": 1.5687103271484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.157660573720932, "rewards/margins": 0.09971538186073303, "rewards/rejected": -0.25737592577934265, "step": 674 }, { "epoch": 1.0630417651694248, "grad_norm": 0.19848236441612244, "learning_rate": 2.1345771872580628e-06, "log_odds_chosen": 1.1715642213821411, "log_odds_ratio": -0.2934086322784424, "logits/chosen": -0.45807644724845886, "logits/rejected": -1.4059028625488281, "logps/chosen": -1.484837532043457, "logps/rejected": -2.4954147338867188, "loss": 1.5094, "nll_loss": 1.4800400733947754, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484837383031845, "rewards/margins": 0.1010577380657196, "rewards/rejected": -0.2495414763689041, "step": 675 }, { "epoch": 1.0646178092986605, "grad_norm": 0.19303160905838013, "learning_rate": 2.1290920447034846e-06, "log_odds_chosen": 1.0953112840652466, "log_odds_ratio": -0.2924834191799164, "logits/chosen": -0.4072478115558624, "logits/rejected": -1.357849359512329, "logps/chosen": -1.461911916732788, "logps/rejected": -2.386472225189209, "loss": 1.4992, "nll_loss": 1.4699064493179321, "rewards/accuracies": 1.0, "rewards/chosen": -0.14619119465351105, "rewards/margins": 0.09245604276657104, "rewards/rejected": -0.2386472523212433, "step": 676 }, { "epoch": 1.066193853427896, "grad_norm": 0.22716405987739563, "learning_rate": 2.1236059269201683e-06, "log_odds_chosen": 0.9468636512756348, "log_odds_ratio": -0.334963858127594, "logits/chosen": -0.5059394240379333, "logits/rejected": -1.2632321119308472, "logps/chosen": -1.4986002445220947, "logps/rejected": -2.3012125492095947, "loss": 1.5213, "nll_loss": 1.4878305196762085, "rewards/accuracies": 1.0, "rewards/chosen": -0.14986002445220947, "rewards/margins": 0.0802612453699112, "rewards/rejected": -0.23012126982212067, "step": 677 }, { "epoch": 1.0677698975571317, "grad_norm": 0.25305354595184326, "learning_rate": 2.1181188753531124e-06, "log_odds_chosen": 1.0967220067977905, "log_odds_ratio": -0.29285427927970886, "logits/chosen": -0.5359491109848022, "logits/rejected": -1.3216474056243896, "logps/chosen": -1.632015585899353, "logps/rejected": -2.5898711681365967, "loss": 1.6498, "nll_loss": 1.6205224990844727, "rewards/accuracies": 1.0, "rewards/chosen": -0.16320157051086426, "rewards/margins": 0.09578555822372437, "rewards/rejected": -0.2589871287345886, "step": 678 }, { "epoch": 1.0693459416863673, "grad_norm": 0.20264583826065063, "learning_rate": 2.112630931454371e-06, "log_odds_chosen": 1.1878125667572021, "log_odds_ratio": -0.272763192653656, "logits/chosen": -0.4376254975795746, "logits/rejected": -1.2670501470565796, "logps/chosen": -1.4005696773529053, "logps/rejected": -2.396745204925537, "loss": 1.4444, "nll_loss": 1.417091965675354, "rewards/accuracies": 1.0, "rewards/chosen": -0.14005698263645172, "rewards/margins": 0.09961752593517303, "rewards/rejected": -0.23967449367046356, "step": 679 }, { "epoch": 1.070921985815603, "grad_norm": 0.19328324496746063, "learning_rate": 2.10714213668274e-06, "log_odds_chosen": 1.0389231443405151, "log_odds_ratio": -0.3068993389606476, "logits/chosen": -0.4587363302707672, "logits/rejected": -1.2263991832733154, "logps/chosen": -1.448409914970398, "logps/rejected": -2.3234221935272217, "loss": 1.4967, "nll_loss": 1.466004490852356, "rewards/accuracies": 1.0, "rewards/chosen": -0.14484098553657532, "rewards/margins": 0.08750123530626297, "rewards/rejected": -0.23234222829341888, "step": 680 }, { "epoch": 1.0724980299448386, "grad_norm": 0.21751874685287476, "learning_rate": 2.1016525325034403e-06, "log_odds_chosen": 1.3174195289611816, "log_odds_ratio": -0.2651524543762207, "logits/chosen": -0.45115169882774353, "logits/rejected": -1.3691192865371704, "logps/chosen": -1.4566643238067627, "logps/rejected": -2.591353416442871, "loss": 1.4862, "nll_loss": 1.4596649408340454, "rewards/accuracies": 1.0, "rewards/chosen": -0.1456664353609085, "rewards/margins": 0.11346893012523651, "rewards/rejected": -0.259135365486145, "step": 681 }, { "epoch": 1.074074074074074, "grad_norm": 0.21977226436138153, "learning_rate": 2.096162160387811e-06, "log_odds_chosen": 0.9342979192733765, "log_odds_ratio": -0.33705762028694153, "logits/chosen": -0.4070799648761749, "logits/rejected": -1.302762746810913, "logps/chosen": -1.393695592880249, "logps/rejected": -2.1565520763397217, "loss": 1.4537, "nll_loss": 1.4199846982955933, "rewards/accuracies": 1.0, "rewards/chosen": -0.13936956226825714, "rewards/margins": 0.07628564536571503, "rewards/rejected": -0.21565520763397217, "step": 682 }, { "epoch": 1.0756501182033098, "grad_norm": 0.18787585198879242, "learning_rate": 2.09067106181299e-06, "log_odds_chosen": 1.1954203844070435, "log_odds_ratio": -0.2733722925186157, "logits/chosen": -0.4470306634902954, "logits/rejected": -1.3488725423812866, "logps/chosen": -1.3385844230651855, "logps/rejected": -2.3301005363464355, "loss": 1.384, "nll_loss": 1.3566768169403076, "rewards/accuracies": 1.0, "rewards/chosen": -0.13385844230651855, "rewards/margins": 0.0991516038775444, "rewards/rejected": -0.23301005363464355, "step": 683 }, { "epoch": 1.0772261623325452, "grad_norm": 0.21121583878993988, "learning_rate": 2.0851792782616052e-06, "log_odds_chosen": 1.3500101566314697, "log_odds_ratio": -0.24266520142555237, "logits/chosen": -0.3705444931983948, "logits/rejected": -1.5028289556503296, "logps/chosen": -1.4343652725219727, "logps/rejected": -2.5927631855010986, "loss": 1.4733, "nll_loss": 1.4490647315979004, "rewards/accuracies": 1.0, "rewards/chosen": -0.14343653619289398, "rewards/margins": 0.11583980917930603, "rewards/rejected": -0.2592763304710388, "step": 684 }, { "epoch": 1.078802206461781, "grad_norm": 0.20326411724090576, "learning_rate": 2.0796868512214576e-06, "log_odds_chosen": 1.1894053220748901, "log_odds_ratio": -0.2836572229862213, "logits/chosen": -0.5265865325927734, "logits/rejected": -1.2781651020050049, "logps/chosen": -1.4270437955856323, "logps/rejected": -2.43418288230896, "loss": 1.4809, "nll_loss": 1.4525479078292847, "rewards/accuracies": 1.0, "rewards/chosen": -0.14270438253879547, "rewards/margins": 0.10071390867233276, "rewards/rejected": -0.24341829121112823, "step": 685 }, { "epoch": 1.0803782505910164, "grad_norm": 0.1927499771118164, "learning_rate": 2.0741938221852103e-06, "log_odds_chosen": 1.1493513584136963, "log_odds_ratio": -0.28375622630119324, "logits/chosen": -0.4246770143508911, "logits/rejected": -1.3402469158172607, "logps/chosen": -1.4302904605865479, "logps/rejected": -2.40423583984375, "loss": 1.4764, "nll_loss": 1.4480384588241577, "rewards/accuracies": 1.0, "rewards/chosen": -0.14302903413772583, "rewards/margins": 0.09739455580711365, "rewards/rejected": -0.24042358994483948, "step": 686 }, { "epoch": 1.081954294720252, "grad_norm": 0.2168681025505066, "learning_rate": 2.0687002326500743e-06, "log_odds_chosen": 1.2200334072113037, "log_odds_ratio": -0.26408666372299194, "logits/chosen": -0.5207791328430176, "logits/rejected": -1.4260272979736328, "logps/chosen": -1.4295238256454468, "logps/rejected": -2.4644484519958496, "loss": 1.4635, "nll_loss": 1.437109112739563, "rewards/accuracies": 1.0, "rewards/chosen": -0.14295236766338348, "rewards/margins": 0.10349246859550476, "rewards/rejected": -0.24644485116004944, "step": 687 }, { "epoch": 1.0835303388494877, "grad_norm": 0.2626137137413025, "learning_rate": 2.0632061241174942e-06, "log_odds_chosen": 1.1877446174621582, "log_odds_ratio": -0.28063517808914185, "logits/chosen": -0.5086207389831543, "logits/rejected": -1.3838472366333008, "logps/chosen": -1.4812184572219849, "logps/rejected": -2.4984750747680664, "loss": 1.5082, "nll_loss": 1.48015296459198, "rewards/accuracies": 1.0, "rewards/chosen": -0.14812184870243073, "rewards/margins": 0.10172563791275024, "rewards/rejected": -0.24984750151634216, "step": 688 }, { "epoch": 1.0851063829787233, "grad_norm": 0.2078801542520523, "learning_rate": 2.0577115380928364e-06, "log_odds_chosen": 1.1658936738967896, "log_odds_ratio": -0.28187406063079834, "logits/chosen": -0.46922481060028076, "logits/rejected": -1.3566447496414185, "logps/chosen": -1.4518340826034546, "logps/rejected": -2.446504831314087, "loss": 1.4766, "nll_loss": 1.448391318321228, "rewards/accuracies": 1.0, "rewards/chosen": -0.14518341422080994, "rewards/margins": 0.09946707636117935, "rewards/rejected": -0.24465049803256989, "step": 689 }, { "epoch": 1.086682427107959, "grad_norm": 0.2070070058107376, "learning_rate": 2.052216516085073e-06, "log_odds_chosen": 0.9943616390228271, "log_odds_ratio": -0.33048465847969055, "logits/chosen": -0.4671243727207184, "logits/rejected": -1.3567644357681274, "logps/chosen": -1.4065361022949219, "logps/rejected": -2.240896224975586, "loss": 1.4587, "nll_loss": 1.4256466627120972, "rewards/accuracies": 1.0, "rewards/chosen": -0.1406536102294922, "rewards/margins": 0.083436019718647, "rewards/rejected": -0.2240896373987198, "step": 690 }, { "epoch": 1.0882584712371945, "grad_norm": 0.2293894737958908, "learning_rate": 2.0467210996064707e-06, "log_odds_chosen": 0.9658500552177429, "log_odds_ratio": -0.3337486684322357, "logits/chosen": -0.5552599430084229, "logits/rejected": -0.977747917175293, "logps/chosen": -1.462015151977539, "logps/rejected": -2.276854991912842, "loss": 1.4938, "nll_loss": 1.4604203701019287, "rewards/accuracies": 1.0, "rewards/chosen": -0.14620152115821838, "rewards/margins": 0.08148398995399475, "rewards/rejected": -0.22768549621105194, "step": 691 }, { "epoch": 1.0898345153664302, "grad_norm": 0.1888820230960846, "learning_rate": 2.0412253301722774e-06, "log_odds_chosen": 1.2150609493255615, "log_odds_ratio": -0.2623293995857239, "logits/chosen": -0.43161195516586304, "logits/rejected": -1.4845757484436035, "logps/chosen": -1.512966275215149, "logps/rejected": -2.5592939853668213, "loss": 1.5458, "nll_loss": 1.519562840461731, "rewards/accuracies": 1.0, "rewards/chosen": -0.15129663050174713, "rewards/margins": 0.10463276505470276, "rewards/rejected": -0.2559293806552887, "step": 692 }, { "epoch": 1.0914105594956658, "grad_norm": 0.21559658646583557, "learning_rate": 2.0357292493004044e-06, "log_odds_chosen": 1.1779356002807617, "log_odds_ratio": -0.2732158601284027, "logits/chosen": -0.4328814446926117, "logits/rejected": -1.3747599124908447, "logps/chosen": -1.4345402717590332, "logps/rejected": -2.4330968856811523, "loss": 1.456, "nll_loss": 1.4286876916885376, "rewards/accuracies": 1.0, "rewards/chosen": -0.14345404505729675, "rewards/margins": 0.09985566139221191, "rewards/rejected": -0.24330970644950867, "step": 693 }, { "epoch": 1.0929866036249014, "grad_norm": 0.19508136808872223, "learning_rate": 2.0302328985111193e-06, "log_odds_chosen": 1.24747633934021, "log_odds_ratio": -0.27322325110435486, "logits/chosen": -0.37207263708114624, "logits/rejected": -1.3356413841247559, "logps/chosen": -1.3938806056976318, "logps/rejected": -2.4547274112701416, "loss": 1.4437, "nll_loss": 1.4163326025009155, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393880695104599, "rewards/margins": 0.1060846745967865, "rewards/rejected": -0.2454727590084076, "step": 694 }, { "epoch": 1.094562647754137, "grad_norm": 0.22678373754024506, "learning_rate": 2.0247363193267256e-06, "log_odds_chosen": 1.120569109916687, "log_odds_ratio": -0.28781750798225403, "logits/chosen": -0.38653212785720825, "logits/rejected": -1.3823848962783813, "logps/chosen": -1.4960999488830566, "logps/rejected": -2.4544363021850586, "loss": 1.5224, "nll_loss": 1.4936158657073975, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496100127696991, "rewards/margins": 0.0958336591720581, "rewards/rejected": -0.2454436719417572, "step": 695 }, { "epoch": 1.0961386918833727, "grad_norm": 0.22228041291236877, "learning_rate": 2.019239553271255e-06, "log_odds_chosen": 1.2738690376281738, "log_odds_ratio": -0.2555113732814789, "logits/chosen": -0.46568235754966736, "logits/rejected": -1.5240552425384521, "logps/chosen": -1.44100821018219, "logps/rejected": -2.527820587158203, "loss": 1.4779, "nll_loss": 1.4523212909698486, "rewards/accuracies": 1.0, "rewards/chosen": -0.1441008299589157, "rewards/margins": 0.10868123918771744, "rewards/rejected": -0.25278207659721375, "step": 696 }, { "epoch": 1.0977147360126083, "grad_norm": 0.20070266723632812, "learning_rate": 2.0137426418701488e-06, "log_odds_chosen": 1.274893045425415, "log_odds_ratio": -0.25972452759742737, "logits/chosen": -0.42226606607437134, "logits/rejected": -1.3668261766433716, "logps/chosen": -1.4452632665634155, "logps/rejected": -2.54134202003479, "loss": 1.4818, "nll_loss": 1.4557902812957764, "rewards/accuracies": 1.0, "rewards/chosen": -0.14452631771564484, "rewards/margins": 0.10960787534713745, "rewards/rejected": -0.2541341781616211, "step": 697 }, { "epoch": 1.099290780141844, "grad_norm": 0.19114573299884796, "learning_rate": 2.008245626649947e-06, "log_odds_chosen": 1.2128827571868896, "log_odds_ratio": -0.28301626443862915, "logits/chosen": -0.4753851592540741, "logits/rejected": -1.6074962615966797, "logps/chosen": -1.3890125751495361, "logps/rejected": -2.417355537414551, "loss": 1.4411, "nll_loss": 1.412771463394165, "rewards/accuracies": 1.0, "rewards/chosen": -0.1389012634754181, "rewards/margins": 0.10283426940441132, "rewards/rejected": -0.2417355477809906, "step": 698 }, { "epoch": 1.1008668242710795, "grad_norm": 0.24698396027088165, "learning_rate": 2.0027485491379746e-06, "log_odds_chosen": 1.299547791481018, "log_odds_ratio": -0.252640038728714, "logits/chosen": -0.6217135190963745, "logits/rejected": -1.2584782838821411, "logps/chosen": -1.5652302503585815, "logps/rejected": -2.6947779655456543, "loss": 1.5874, "nll_loss": 1.5620999336242676, "rewards/accuracies": 1.0, "rewards/chosen": -0.15652303397655487, "rewards/margins": 0.1129547655582428, "rewards/rejected": -0.2694777846336365, "step": 699 }, { "epoch": 1.1024428684003151, "grad_norm": 0.22090387344360352, "learning_rate": 1.9972514508620256e-06, "log_odds_chosen": 1.1659311056137085, "log_odds_ratio": -0.28398028016090393, "logits/chosen": -0.45424994826316833, "logits/rejected": -1.3774477243423462, "logps/chosen": -1.4243271350860596, "logps/rejected": -2.4104866981506348, "loss": 1.4658, "nll_loss": 1.4373921155929565, "rewards/accuracies": 1.0, "rewards/chosen": -0.14243271946907043, "rewards/margins": 0.09861597418785095, "rewards/rejected": -0.241048663854599, "step": 700 }, { "epoch": 1.1040189125295508, "grad_norm": 0.20541685819625854, "learning_rate": 1.991754373350053e-06, "log_odds_chosen": 0.9334115386009216, "log_odds_ratio": -0.340568870306015, "logits/chosen": -0.4905482828617096, "logits/rejected": -1.374817967414856, "logps/chosen": -1.484605312347412, "logps/rejected": -2.271920680999756, "loss": 1.529, "nll_loss": 1.4949533939361572, "rewards/accuracies": 1.0, "rewards/chosen": -0.14846055209636688, "rewards/margins": 0.07873153686523438, "rewards/rejected": -0.22719207406044006, "step": 701 }, { "epoch": 1.1055949566587864, "grad_norm": 0.19397854804992676, "learning_rate": 1.986257358129852e-06, "log_odds_chosen": 1.1067744493484497, "log_odds_ratio": -0.2919045388698578, "logits/chosen": -0.47195306420326233, "logits/rejected": -1.3318650722503662, "logps/chosen": -1.382297396659851, "logps/rejected": -2.2992775440216064, "loss": 1.4258, "nll_loss": 1.3965654373168945, "rewards/accuracies": 1.0, "rewards/chosen": -0.13822975754737854, "rewards/margins": 0.09169799834489822, "rewards/rejected": -0.22992774844169617, "step": 702 }, { "epoch": 1.107171000788022, "grad_norm": 0.2922585606575012, "learning_rate": 1.9807604467287453e-06, "log_odds_chosen": 1.085465908050537, "log_odds_ratio": -0.2986772060394287, "logits/chosen": -0.5647338628768921, "logits/rejected": -1.22553551197052, "logps/chosen": -1.4989755153656006, "logps/rejected": -2.421286106109619, "loss": 1.5251, "nll_loss": 1.4952261447906494, "rewards/accuracies": 1.0, "rewards/chosen": -0.14989756047725677, "rewards/margins": 0.09223109483718872, "rewards/rejected": -0.2421286404132843, "step": 703 }, { "epoch": 1.1087470449172576, "grad_norm": 0.23126116394996643, "learning_rate": 1.9752636806732742e-06, "log_odds_chosen": 1.1335054636001587, "log_odds_ratio": -0.28281736373901367, "logits/chosen": -0.41041299700737, "logits/rejected": -1.4300845861434937, "logps/chosen": -1.422570824623108, "logps/rejected": -2.3780839443206787, "loss": 1.4513, "nll_loss": 1.4230315685272217, "rewards/accuracies": 1.0, "rewards/chosen": -0.14225709438323975, "rewards/margins": 0.09555128961801529, "rewards/rejected": -0.23780837655067444, "step": 704 }, { "epoch": 1.1103230890464932, "grad_norm": 0.21834450960159302, "learning_rate": 1.9697671014888805e-06, "log_odds_chosen": 1.173021912574768, "log_odds_ratio": -0.2767985463142395, "logits/chosen": -0.5870257019996643, "logits/rejected": -1.2767181396484375, "logps/chosen": -1.4964110851287842, "logps/rejected": -2.500709056854248, "loss": 1.5257, "nll_loss": 1.497986912727356, "rewards/accuracies": 1.0, "rewards/chosen": -0.14964111149311066, "rewards/margins": 0.10042980313301086, "rewards/rejected": -0.2500708997249603, "step": 705 }, { "epoch": 1.1118991331757289, "grad_norm": 0.20281967520713806, "learning_rate": 1.9642707506995954e-06, "log_odds_chosen": 1.3563827276229858, "log_odds_ratio": -0.23615798354148865, "logits/chosen": -0.509278416633606, "logits/rejected": -1.42978835105896, "logps/chosen": -1.4364010095596313, "logps/rejected": -2.599912166595459, "loss": 1.4685, "nll_loss": 1.4448447227478027, "rewards/accuracies": 1.0, "rewards/chosen": -0.14364011585712433, "rewards/margins": 0.11635109782218933, "rewards/rejected": -0.25999119877815247, "step": 706 }, { "epoch": 1.1134751773049645, "grad_norm": 0.19448623061180115, "learning_rate": 1.9587746698277232e-06, "log_odds_chosen": 1.0534982681274414, "log_odds_ratio": -0.3130910098552704, "logits/chosen": -0.5047029852867126, "logits/rejected": -1.230925440788269, "logps/chosen": -1.4441640377044678, "logps/rejected": -2.335139274597168, "loss": 1.4782, "nll_loss": 1.4469194412231445, "rewards/accuracies": 1.0, "rewards/chosen": -0.1444164216518402, "rewards/margins": 0.0890975221991539, "rewards/rejected": -0.23351392149925232, "step": 707 }, { "epoch": 1.1150512214342, "grad_norm": 0.23463737964630127, "learning_rate": 1.953278900393529e-06, "log_odds_chosen": 1.1329559087753296, "log_odds_ratio": -0.294190376996994, "logits/chosen": -0.5475265979766846, "logits/rejected": -1.2021700143814087, "logps/chosen": -1.487652063369751, "logps/rejected": -2.456540584564209, "loss": 1.4983, "nll_loss": 1.4689185619354248, "rewards/accuracies": 1.0, "rewards/chosen": -0.1487652063369751, "rewards/margins": 0.09688883274793625, "rewards/rejected": -0.24565403163433075, "step": 708 }, { "epoch": 1.1166272655634357, "grad_norm": 0.20823369920253754, "learning_rate": 1.9477834839149274e-06, "log_odds_chosen": 1.1705896854400635, "log_odds_ratio": -0.27801841497421265, "logits/chosen": -0.5280231237411499, "logits/rejected": -1.339627742767334, "logps/chosen": -1.3890221118927002, "logps/rejected": -2.372847557067871, "loss": 1.4468, "nll_loss": 1.4190031290054321, "rewards/accuracies": 1.0, "rewards/chosen": -0.1389022022485733, "rewards/margins": 0.09838256239891052, "rewards/rejected": -0.23728476464748383, "step": 709 }, { "epoch": 1.1182033096926713, "grad_norm": 0.2220274657011032, "learning_rate": 1.942288461907164e-06, "log_odds_chosen": 1.2102402448654175, "log_odds_ratio": -0.27753397822380066, "logits/chosen": -0.5286645889282227, "logits/rejected": -1.386061429977417, "logps/chosen": -1.3907581567764282, "logps/rejected": -2.4172701835632324, "loss": 1.4442, "nll_loss": 1.4164468050003052, "rewards/accuracies": 1.0, "rewards/chosen": -0.13907580077648163, "rewards/margins": 0.1026512086391449, "rewards/rejected": -0.24172700941562653, "step": 710 }, { "epoch": 1.119779353821907, "grad_norm": 0.21669377386569977, "learning_rate": 1.936793875882505e-06, "log_odds_chosen": 1.1954429149627686, "log_odds_ratio": -0.2695174217224121, "logits/chosen": -0.34124839305877686, "logits/rejected": -1.3979601860046387, "logps/chosen": -1.5485085248947144, "logps/rejected": -2.5840556621551514, "loss": 1.5938, "nll_loss": 1.5668448209762573, "rewards/accuracies": 1.0, "rewards/chosen": -0.15485085546970367, "rewards/margins": 0.10355471074581146, "rewards/rejected": -0.25840556621551514, "step": 711 }, { "epoch": 1.1213553979511426, "grad_norm": 0.21096524596214294, "learning_rate": 1.931299767349926e-06, "log_odds_chosen": 1.2031110525131226, "log_odds_ratio": -0.27347713708877563, "logits/chosen": -0.5242701172828674, "logits/rejected": -1.194074034690857, "logps/chosen": -1.378991723060608, "logps/rejected": -2.381162643432617, "loss": 1.4227, "nll_loss": 1.3953834772109985, "rewards/accuracies": 1.0, "rewards/chosen": -0.13789916038513184, "rewards/margins": 0.10021708905696869, "rewards/rejected": -0.23811623454093933, "step": 712 }, { "epoch": 1.1229314420803782, "grad_norm": 0.23077386617660522, "learning_rate": 1.9258061778147895e-06, "log_odds_chosen": 1.1153610944747925, "log_odds_ratio": -0.29129940271377563, "logits/chosen": -0.5413553714752197, "logits/rejected": -1.4865033626556396, "logps/chosen": -1.4882041215896606, "logps/rejected": -2.4407083988189697, "loss": 1.5247, "nll_loss": 1.4956085681915283, "rewards/accuracies": 1.0, "rewards/chosen": -0.1488204151391983, "rewards/margins": 0.09525042027235031, "rewards/rejected": -0.24407082796096802, "step": 713 }, { "epoch": 1.1245074862096138, "grad_norm": 0.2107950747013092, "learning_rate": 1.9203131487785426e-06, "log_odds_chosen": 1.3729069232940674, "log_odds_ratio": -0.23002268373966217, "logits/chosen": -0.6425501108169556, "logits/rejected": -1.2580373287200928, "logps/chosen": -1.3680059909820557, "logps/rejected": -2.529069423675537, "loss": 1.4156, "nll_loss": 1.392630696296692, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368006020784378, "rewards/margins": 0.11610635370016098, "rewards/rejected": -0.2529069483280182, "step": 714 }, { "epoch": 1.1260835303388494, "grad_norm": 0.21586111187934875, "learning_rate": 1.9148207217383946e-06, "log_odds_chosen": 1.5254570245742798, "log_odds_ratio": -0.2026120126247406, "logits/chosen": -0.4808170795440674, "logits/rejected": -1.5110218524932861, "logps/chosen": -1.4799153804779053, "logps/rejected": -2.810546398162842, "loss": 1.4902, "nll_loss": 1.469893217086792, "rewards/accuracies": 1.0, "rewards/chosen": -0.14799155294895172, "rewards/margins": 0.13306309282779694, "rewards/rejected": -0.28105467557907104, "step": 715 }, { "epoch": 1.127659574468085, "grad_norm": 0.2213343381881714, "learning_rate": 1.9093289381870094e-06, "log_odds_chosen": 1.044459581375122, "log_odds_ratio": -0.307096391916275, "logits/chosen": -0.5094771385192871, "logits/rejected": -1.4036896228790283, "logps/chosen": -1.473573088645935, "logps/rejected": -2.359524726867676, "loss": 1.5123, "nll_loss": 1.4815442562103271, "rewards/accuracies": 1.0, "rewards/chosen": -0.14735731482505798, "rewards/margins": 0.08859515190124512, "rewards/rejected": -0.2359524816274643, "step": 716 }, { "epoch": 1.1292356185973207, "grad_norm": 0.21230585873126984, "learning_rate": 1.9038378396121895e-06, "log_odds_chosen": 1.2633765935897827, "log_odds_ratio": -0.25486746430397034, "logits/chosen": -0.3245435953140259, "logits/rejected": -1.5540175437927246, "logps/chosen": -1.5153406858444214, "logps/rejected": -2.610530138015747, "loss": 1.5438, "nll_loss": 1.518311858177185, "rewards/accuracies": 1.0, "rewards/chosen": -0.1515340805053711, "rewards/margins": 0.10951897501945496, "rewards/rejected": -0.26105305552482605, "step": 717 }, { "epoch": 1.1308116627265563, "grad_norm": 0.1942850798368454, "learning_rate": 1.8983474674965597e-06, "log_odds_chosen": 1.1623995304107666, "log_odds_ratio": -0.28777241706848145, "logits/chosen": -0.49636614322662354, "logits/rejected": -1.5351665019989014, "logps/chosen": -1.449415683746338, "logps/rejected": -2.436007499694824, "loss": 1.4854, "nll_loss": 1.4566153287887573, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449415683746338, "rewards/margins": 0.09865917265415192, "rewards/rejected": -0.2436007559299469, "step": 718 }, { "epoch": 1.132387706855792, "grad_norm": 0.21141377091407776, "learning_rate": 1.8928578633172603e-06, "log_odds_chosen": 1.3506557941436768, "log_odds_ratio": -0.2339305281639099, "logits/chosen": -0.4822527766227722, "logits/rejected": -1.5536284446716309, "logps/chosen": -1.4843451976776123, "logps/rejected": -2.6501717567443848, "loss": 1.5204, "nll_loss": 1.4970332384109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.14843453466892242, "rewards/margins": 0.11658263951539993, "rewards/rejected": -0.26501715183258057, "step": 719 }, { "epoch": 1.1339637509850276, "grad_norm": 0.20757867395877838, "learning_rate": 1.8873690685456283e-06, "log_odds_chosen": 1.2760004997253418, "log_odds_ratio": -0.25464388728141785, "logits/chosen": -0.5191957950592041, "logits/rejected": -1.3725414276123047, "logps/chosen": -1.4265276193618774, "logps/rejected": -2.5132930278778076, "loss": 1.4593, "nll_loss": 1.433866262435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.14265277981758118, "rewards/margins": 0.10867653042078018, "rewards/rejected": -0.25132930278778076, "step": 720 }, { "epoch": 1.1355397951142632, "grad_norm": 0.2010820060968399, "learning_rate": 1.8818811246468872e-06, "log_odds_chosen": 1.4654037952423096, "log_odds_ratio": -0.22392913699150085, "logits/chosen": -0.503913938999176, "logits/rejected": -1.5154976844787598, "logps/chosen": -1.415074348449707, "logps/rejected": -2.6756412982940674, "loss": 1.4484, "nll_loss": 1.425994634628296, "rewards/accuracies": 1.0, "rewards/chosen": -0.14150743186473846, "rewards/margins": 0.12605668604373932, "rewards/rejected": -0.2675641179084778, "step": 721 }, { "epoch": 1.1371158392434988, "grad_norm": 0.22116105258464813, "learning_rate": 1.876394073079832e-06, "log_odds_chosen": 1.3847888708114624, "log_odds_ratio": -0.23863618075847626, "logits/chosen": -0.5928460359573364, "logits/rejected": -1.412048578262329, "logps/chosen": -1.5432716608047485, "logps/rejected": -2.754733085632324, "loss": 1.5645, "nll_loss": 1.5405950546264648, "rewards/accuracies": 1.0, "rewards/chosen": -0.1543271839618683, "rewards/margins": 0.12114612758159637, "rewards/rejected": -0.27547329664230347, "step": 722 }, { "epoch": 1.1386918833727344, "grad_norm": 0.20633108913898468, "learning_rate": 1.8709079552965152e-06, "log_odds_chosen": 1.2970001697540283, "log_odds_ratio": -0.2590065002441406, "logits/chosen": -0.4998897612094879, "logits/rejected": -1.3825888633728027, "logps/chosen": -1.4237260818481445, "logps/rejected": -2.5296103954315186, "loss": 1.458, "nll_loss": 1.4320578575134277, "rewards/accuracies": 1.0, "rewards/chosen": -0.14237259328365326, "rewards/margins": 0.1105884537100792, "rewards/rejected": -0.25296103954315186, "step": 723 }, { "epoch": 1.14026792750197, "grad_norm": 0.21756547689437866, "learning_rate": 1.8654228127419375e-06, "log_odds_chosen": 1.1096466779708862, "log_odds_ratio": -0.29027462005615234, "logits/chosen": -0.5779076814651489, "logits/rejected": -1.3305450677871704, "logps/chosen": -1.437713623046875, "logps/rejected": -2.3763113021850586, "loss": 1.4718, "nll_loss": 1.4427886009216309, "rewards/accuracies": 1.0, "rewards/chosen": -0.14377135038375854, "rewards/margins": 0.09385980665683746, "rewards/rejected": -0.237631157040596, "step": 724 }, { "epoch": 1.1418439716312057, "grad_norm": 0.19929639995098114, "learning_rate": 1.8599386868537306e-06, "log_odds_chosen": 1.3706843852996826, "log_odds_ratio": -0.22976359724998474, "logits/chosen": -0.5125320553779602, "logits/rejected": -1.487987756729126, "logps/chosen": -1.3186641931533813, "logps/rejected": -2.464041233062744, "loss": 1.3672, "nll_loss": 1.344267725944519, "rewards/accuracies": 1.0, "rewards/chosen": -0.1318664252758026, "rewards/margins": 0.11453770846128464, "rewards/rejected": -0.24640414118766785, "step": 725 }, { "epoch": 1.1434200157604413, "grad_norm": 0.21918931603431702, "learning_rate": 1.8544556190618464e-06, "log_odds_chosen": 1.2842426300048828, "log_odds_ratio": -0.24738024175167084, "logits/chosen": -0.5883026719093323, "logits/rejected": -1.3024299144744873, "logps/chosen": -1.5094125270843506, "logps/rejected": -2.6171648502349854, "loss": 1.5315, "nll_loss": 1.5067555904388428, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094126760959625, "rewards/margins": 0.11077524721622467, "rewards/rejected": -0.2617165148258209, "step": 726 }, { "epoch": 1.144996059889677, "grad_norm": 0.2221018373966217, "learning_rate": 1.8489736507882424e-06, "log_odds_chosen": 1.594733715057373, "log_odds_ratio": -0.20838479697704315, "logits/chosen": -0.5265178680419922, "logits/rejected": -1.4495673179626465, "logps/chosen": -1.4184130430221558, "logps/rejected": -2.80631685256958, "loss": 1.4437, "nll_loss": 1.4228373765945435, "rewards/accuracies": 1.0, "rewards/chosen": -0.14184130728244781, "rewards/margins": 0.13879039883613586, "rewards/rejected": -0.2806317210197449, "step": 727 }, { "epoch": 1.1465721040189125, "grad_norm": 0.1980506330728531, "learning_rate": 1.8434928234465716e-06, "log_odds_chosen": 1.3545522689819336, "log_odds_ratio": -0.23778237402439117, "logits/chosen": -0.6014202237129211, "logits/rejected": -1.3518621921539307, "logps/chosen": -1.3901243209838867, "logps/rejected": -2.542637825012207, "loss": 1.4309, "nll_loss": 1.4071658849716187, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390124261379242, "rewards/margins": 0.11525137722492218, "rewards/rejected": -0.2542637884616852, "step": 728 }, { "epoch": 1.1481481481481481, "grad_norm": 0.22705380618572235, "learning_rate": 1.8380131784418657e-06, "log_odds_chosen": 1.3542594909667969, "log_odds_ratio": -0.24852502346038818, "logits/chosen": -0.46507954597473145, "logits/rejected": -1.4865875244140625, "logps/chosen": -1.4420115947723389, "logps/rejected": -2.6088292598724365, "loss": 1.4827, "nll_loss": 1.4578158855438232, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442011594772339, "rewards/margins": 0.116681769490242, "rewards/rejected": -0.2608829140663147, "step": 729 }, { "epoch": 1.1497241922773838, "grad_norm": 0.2376445233821869, "learning_rate": 1.8325347571702259e-06, "log_odds_chosen": 1.357885479927063, "log_odds_ratio": -0.23886026442050934, "logits/chosen": -0.5753093957901001, "logits/rejected": -1.4901759624481201, "logps/chosen": -1.4975333213806152, "logps/rejected": -2.6757655143737793, "loss": 1.5096, "nll_loss": 1.4857057332992554, "rewards/accuracies": 1.0, "rewards/chosen": -0.14975333213806152, "rewards/margins": 0.11782322824001312, "rewards/rejected": -0.26757654547691345, "step": 730 }, { "epoch": 1.1513002364066194, "grad_norm": 0.22765523195266724, "learning_rate": 1.827057601018509e-06, "log_odds_chosen": 1.399768352508545, "log_odds_ratio": -0.23107658326625824, "logits/chosen": -0.6271060109138489, "logits/rejected": -1.4669194221496582, "logps/chosen": -1.3811513185501099, "logps/rejected": -2.567168951034546, "loss": 1.4307, "nll_loss": 1.4075775146484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.13811513781547546, "rewards/margins": 0.11860179901123047, "rewards/rejected": -0.25671693682670593, "step": 731 }, { "epoch": 1.152876280535855, "grad_norm": 0.23158101737499237, "learning_rate": 1.8215817513640122e-06, "log_odds_chosen": 1.396857500076294, "log_odds_ratio": -0.23694464564323425, "logits/chosen": -0.5242437720298767, "logits/rejected": -1.471669316291809, "logps/chosen": -1.4370704889297485, "logps/rejected": -2.635478973388672, "loss": 1.4605, "nll_loss": 1.4368555545806885, "rewards/accuracies": 1.0, "rewards/chosen": -0.1437070667743683, "rewards/margins": 0.1198408305644989, "rewards/rejected": -0.2635478675365448, "step": 732 }, { "epoch": 1.1544523246650906, "grad_norm": 0.21417687833309174, "learning_rate": 1.8161072495741647e-06, "log_odds_chosen": 1.3921798467636108, "log_odds_ratio": -0.22892777621746063, "logits/chosen": -0.5019906759262085, "logits/rejected": -1.516732096672058, "logps/chosen": -1.3953202962875366, "logps/rejected": -2.57902455329895, "loss": 1.4339, "nll_loss": 1.4110405445098877, "rewards/accuracies": 1.0, "rewards/chosen": -0.13953202962875366, "rewards/margins": 0.1183704137802124, "rewards/rejected": -0.25790247321128845, "step": 733 }, { "epoch": 1.1560283687943262, "grad_norm": 0.22115546464920044, "learning_rate": 1.810634137006213e-06, "log_odds_chosen": 1.0537023544311523, "log_odds_ratio": -0.30371353030204773, "logits/chosen": -0.5922952890396118, "logits/rejected": -1.1409456729888916, "logps/chosen": -1.4845179319381714, "logps/rejected": -2.3795814514160156, "loss": 1.526, "nll_loss": 1.4956529140472412, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484518051147461, "rewards/margins": 0.0895063579082489, "rewards/rejected": -0.237958163022995, "step": 734 }, { "epoch": 1.1576044129235619, "grad_norm": 0.21111159026622772, "learning_rate": 1.805162455006908e-06, "log_odds_chosen": 1.2015193700790405, "log_odds_ratio": -0.2717169523239136, "logits/chosen": -0.5222191214561462, "logits/rejected": -1.27971351146698, "logps/chosen": -1.4912288188934326, "logps/rejected": -2.5226693153381348, "loss": 1.5047, "nll_loss": 1.4774832725524902, "rewards/accuracies": 1.0, "rewards/chosen": -0.14912287890911102, "rewards/margins": 0.10314405709505081, "rewards/rejected": -0.25226691365242004, "step": 735 }, { "epoch": 1.1591804570527975, "grad_norm": 0.21160832047462463, "learning_rate": 1.799692244912195e-06, "log_odds_chosen": 1.1387715339660645, "log_odds_ratio": -0.3090120255947113, "logits/chosen": -0.5213903188705444, "logits/rejected": -1.130449891090393, "logps/chosen": -1.4878404140472412, "logps/rejected": -2.469958543777466, "loss": 1.5314, "nll_loss": 1.5005372762680054, "rewards/accuracies": 1.0, "rewards/chosen": -0.14878404140472412, "rewards/margins": 0.09821182489395142, "rewards/rejected": -0.24699586629867554, "step": 736 }, { "epoch": 1.160756501182033, "grad_norm": 0.21755388379096985, "learning_rate": 1.7942235480468964e-06, "log_odds_chosen": 1.2541836500167847, "log_odds_ratio": -0.26203587651252747, "logits/chosen": -0.560463011264801, "logits/rejected": -1.3185467720031738, "logps/chosen": -1.3833380937576294, "logps/rejected": -2.4418272972106934, "loss": 1.4227, "nll_loss": 1.3965139389038086, "rewards/accuracies": 1.0, "rewards/chosen": -0.13833379745483398, "rewards/margins": 0.10584891587495804, "rewards/rejected": -0.24418272078037262, "step": 737 }, { "epoch": 1.1623325453112687, "grad_norm": 0.20753788948059082, "learning_rate": 1.7887564057244062e-06, "log_odds_chosen": 1.2912025451660156, "log_odds_ratio": -0.2690528631210327, "logits/chosen": -0.5081381797790527, "logits/rejected": -1.401484727859497, "logps/chosen": -1.3404988050460815, "logps/rejected": -2.4288179874420166, "loss": 1.3837, "nll_loss": 1.3567984104156494, "rewards/accuracies": 1.0, "rewards/chosen": -0.1340498924255371, "rewards/margins": 0.10883191972970963, "rewards/rejected": -0.24288181960582733, "step": 738 }, { "epoch": 1.1639085894405043, "grad_norm": 0.22165168821811676, "learning_rate": 1.7832908592463732e-06, "log_odds_chosen": 1.221674919128418, "log_odds_ratio": -0.26628822088241577, "logits/chosen": -0.6422454714775085, "logits/rejected": -1.4249920845031738, "logps/chosen": -1.4700878858566284, "logps/rejected": -2.504058837890625, "loss": 1.5102, "nll_loss": 1.4835454225540161, "rewards/accuracies": 1.0, "rewards/chosen": -0.14700879156589508, "rewards/margins": 0.10339709371328354, "rewards/rejected": -0.250405877828598, "step": 739 }, { "epoch": 1.16548463356974, "grad_norm": 0.23696185648441315, "learning_rate": 1.7778269499023908e-06, "log_odds_chosen": 1.337630271911621, "log_odds_ratio": -0.24248188734054565, "logits/chosen": -0.5418239831924438, "logits/rejected": -1.4796379804611206, "logps/chosen": -1.507027506828308, "logps/rejected": -2.668131113052368, "loss": 1.5347, "nll_loss": 1.5104109048843384, "rewards/accuracies": 1.0, "rewards/chosen": -0.15070275962352753, "rewards/margins": 0.11611035466194153, "rewards/rejected": -0.26681309938430786, "step": 740 }, { "epoch": 1.1670606776989756, "grad_norm": 0.24330352246761322, "learning_rate": 1.7723647189696843e-06, "log_odds_chosen": 1.2329293489456177, "log_odds_ratio": -0.26677531003952026, "logits/chosen": -0.5035621523857117, "logits/rejected": -1.101192831993103, "logps/chosen": -1.393128514289856, "logps/rejected": -2.431424140930176, "loss": 1.4447, "nll_loss": 1.4179928302764893, "rewards/accuracies": 1.0, "rewards/chosen": -0.13931286334991455, "rewards/margins": 0.10382957756519318, "rewards/rejected": -0.24314244091510773, "step": 741 }, { "epoch": 1.1686367218282112, "grad_norm": 0.22313028573989868, "learning_rate": 1.7669042077127982e-06, "log_odds_chosen": 1.422914981842041, "log_odds_ratio": -0.23566770553588867, "logits/chosen": -0.5073877573013306, "logits/rejected": -1.4369897842407227, "logps/chosen": -1.465867280960083, "logps/rejected": -2.6980741024017334, "loss": 1.4886, "nll_loss": 1.4650605916976929, "rewards/accuracies": 1.0, "rewards/chosen": -0.14658673107624054, "rewards/margins": 0.12322070449590683, "rewards/rejected": -0.2698074281215668, "step": 742 }, { "epoch": 1.1702127659574468, "grad_norm": 0.20472803711891174, "learning_rate": 1.7614454573832886e-06, "log_odds_chosen": 1.4325921535491943, "log_odds_ratio": -0.21896687150001526, "logits/chosen": -0.6333247423171997, "logits/rejected": -1.4073162078857422, "logps/chosen": -1.3432472944259644, "logps/rejected": -2.550940990447998, "loss": 1.3907, "nll_loss": 1.3687745332717896, "rewards/accuracies": 1.0, "rewards/chosen": -0.13432474434375763, "rewards/margins": 0.12076936662197113, "rewards/rejected": -0.25509411096572876, "step": 743 }, { "epoch": 1.1717888100866825, "grad_norm": 0.2390693724155426, "learning_rate": 1.7559885092194058e-06, "log_odds_chosen": 1.0897796154022217, "log_odds_ratio": -0.2942361533641815, "logits/chosen": -0.5000759363174438, "logits/rejected": -1.45731782913208, "logps/chosen": -1.4935035705566406, "logps/rejected": -2.421919822692871, "loss": 1.5341, "nll_loss": 1.5046358108520508, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493503749370575, "rewards/margins": 0.09284161031246185, "rewards/rejected": -0.24219197034835815, "step": 744 }, { "epoch": 1.173364854215918, "grad_norm": 0.23357681930065155, "learning_rate": 1.7505334044457871e-06, "log_odds_chosen": 1.3224742412567139, "log_odds_ratio": -0.239582359790802, "logits/chosen": -0.5304093360900879, "logits/rejected": -1.427680253982544, "logps/chosen": -1.3786001205444336, "logps/rejected": -2.4917540550231934, "loss": 1.4094, "nll_loss": 1.3854491710662842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1378600001335144, "rewards/margins": 0.11131538450717926, "rewards/rejected": -0.24917539954185486, "step": 745 }, { "epoch": 1.1749408983451537, "grad_norm": 0.19663824141025543, "learning_rate": 1.7450801842731443e-06, "log_odds_chosen": 1.3291776180267334, "log_odds_ratio": -0.2390061318874359, "logits/chosen": -0.46576210856437683, "logits/rejected": -1.3635282516479492, "logps/chosen": -1.4895015954971313, "logps/rejected": -2.638615846633911, "loss": 1.5181, "nll_loss": 1.494153618812561, "rewards/accuracies": 1.0, "rewards/chosen": -0.14895015954971313, "rewards/margins": 0.11491142213344574, "rewards/rejected": -0.2638615667819977, "step": 746 }, { "epoch": 1.1765169424743893, "grad_norm": 0.21254904568195343, "learning_rate": 1.7396288898979497e-06, "log_odds_chosen": 1.4820916652679443, "log_odds_ratio": -0.21064743399620056, "logits/chosen": -0.5675363540649414, "logits/rejected": -1.4828413724899292, "logps/chosen": -1.3795688152313232, "logps/rejected": -2.646085739135742, "loss": 1.4106, "nll_loss": 1.3895608186721802, "rewards/accuracies": 1.0, "rewards/chosen": -0.1379568874835968, "rewards/margins": 0.12665165960788727, "rewards/rejected": -0.26460856199264526, "step": 747 }, { "epoch": 1.178092986603625, "grad_norm": 0.22074899077415466, "learning_rate": 1.7341795625021292e-06, "log_odds_chosen": 1.424844741821289, "log_odds_ratio": -0.22192633152008057, "logits/chosen": -0.5427080392837524, "logits/rejected": -1.6632615327835083, "logps/chosen": -1.4167243242263794, "logps/rejected": -2.6381478309631348, "loss": 1.4551, "nll_loss": 1.4328675270080566, "rewards/accuracies": 1.0, "rewards/chosen": -0.14167243242263794, "rewards/margins": 0.12214237451553345, "rewards/rejected": -0.2638148069381714, "step": 748 }, { "epoch": 1.1796690307328606, "grad_norm": 0.21415913105010986, "learning_rate": 1.7287322432527485e-06, "log_odds_chosen": 1.3086893558502197, "log_odds_ratio": -0.2472424954175949, "logits/chosen": -0.58354651927948, "logits/rejected": -1.566764235496521, "logps/chosen": -1.5024094581604004, "logps/rejected": -2.634434700012207, "loss": 1.5273, "nll_loss": 1.5025382041931152, "rewards/accuracies": 1.0, "rewards/chosen": -0.1502409428358078, "rewards/margins": 0.11320249736309052, "rewards/rejected": -0.2634434401988983, "step": 749 }, { "epoch": 1.1812450748620962, "grad_norm": 0.22282186150550842, "learning_rate": 1.7232869733017038e-06, "log_odds_chosen": 1.291513442993164, "log_odds_ratio": -0.25042369961738586, "logits/chosen": -0.5659542083740234, "logits/rejected": -1.5032849311828613, "logps/chosen": -1.4535952806472778, "logps/rejected": -2.5596208572387695, "loss": 1.4805, "nll_loss": 1.455505132675171, "rewards/accuracies": 1.0, "rewards/chosen": -0.14535953104496002, "rewards/margins": 0.11060254275798798, "rewards/rejected": -0.255962073802948, "step": 750 }, { "epoch": 1.1828211189913318, "grad_norm": 0.22748686373233795, "learning_rate": 1.7178437937854065e-06, "log_odds_chosen": 1.658095359802246, "log_odds_ratio": -0.17879220843315125, "logits/chosen": -0.6560875773429871, "logits/rejected": -1.786608338356018, "logps/chosen": -1.492921233177185, "logps/rejected": -2.9493677616119385, "loss": 1.5139, "nll_loss": 1.4960277080535889, "rewards/accuracies": 1.0, "rewards/chosen": -0.14929211139678955, "rewards/margins": 0.1456446647644043, "rewards/rejected": -0.29493677616119385, "step": 751 }, { "epoch": 1.1843971631205674, "grad_norm": 0.25552818179130554, "learning_rate": 1.7124027458244794e-06, "log_odds_chosen": 1.3929933309555054, "log_odds_ratio": -0.2336614727973938, "logits/chosen": -0.5347802042961121, "logits/rejected": -1.3094077110290527, "logps/chosen": -1.4163881540298462, "logps/rejected": -2.6132845878601074, "loss": 1.4573, "nll_loss": 1.4338853359222412, "rewards/accuracies": 1.0, "rewards/chosen": -0.1416388303041458, "rewards/margins": 0.11968961358070374, "rewards/rejected": -0.26132845878601074, "step": 752 }, { "epoch": 1.185973207249803, "grad_norm": 0.20070816576480865, "learning_rate": 1.7069638705234407e-06, "log_odds_chosen": 1.0706877708435059, "log_odds_ratio": -0.3014824092388153, "logits/chosen": -0.43309080600738525, "logits/rejected": -1.2945914268493652, "logps/chosen": -1.4858583211898804, "logps/rejected": -2.396928071975708, "loss": 1.5283, "nll_loss": 1.4981718063354492, "rewards/accuracies": 1.0, "rewards/chosen": -0.14858584105968475, "rewards/margins": 0.09110698103904724, "rewards/rejected": -0.2396928071975708, "step": 753 }, { "epoch": 1.1875492513790387, "grad_norm": 0.21234571933746338, "learning_rate": 1.7015272089703954e-06, "log_odds_chosen": 1.2921807765960693, "log_odds_ratio": -0.2518993616104126, "logits/chosen": -0.5934471487998962, "logits/rejected": -1.5490331649780273, "logps/chosen": -1.4556087255477905, "logps/rejected": -2.555610179901123, "loss": 1.4785, "nll_loss": 1.453262209892273, "rewards/accuracies": 1.0, "rewards/chosen": -0.14556089043617249, "rewards/margins": 0.1100001335144043, "rewards/rejected": -0.2555610239505768, "step": 754 }, { "epoch": 1.1891252955082743, "grad_norm": 0.21953803300857544, "learning_rate": 1.6960928022367261e-06, "log_odds_chosen": 1.2742233276367188, "log_odds_ratio": -0.26117175817489624, "logits/chosen": -0.5909973978996277, "logits/rejected": -1.424904704093933, "logps/chosen": -1.4837849140167236, "logps/rejected": -2.586916446685791, "loss": 1.5123, "nll_loss": 1.4862273931503296, "rewards/accuracies": 1.0, "rewards/chosen": -0.14837850630283356, "rewards/margins": 0.11031313240528107, "rewards/rejected": -0.2586916387081146, "step": 755 }, { "epoch": 1.19070133963751, "grad_norm": 0.2024756819009781, "learning_rate": 1.6906606913767776e-06, "log_odds_chosen": 1.249240517616272, "log_odds_ratio": -0.26217758655548096, "logits/chosen": -0.5883452892303467, "logits/rejected": -1.4659888744354248, "logps/chosen": -1.3932411670684814, "logps/rejected": -2.4510996341705322, "loss": 1.4315, "nll_loss": 1.4052568674087524, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393241137266159, "rewards/margins": 0.10578584671020508, "rewards/rejected": -0.24510996043682098, "step": 756 }, { "epoch": 1.1922773837667455, "grad_norm": 0.22757992148399353, "learning_rate": 1.6852309174275543e-06, "log_odds_chosen": 1.2733908891677856, "log_odds_ratio": -0.2527121305465698, "logits/chosen": -0.6954429745674133, "logits/rejected": -1.4367382526397705, "logps/chosen": -1.3156039714813232, "logps/rejected": -2.373481512069702, "loss": 1.3517, "nll_loss": 1.3264782428741455, "rewards/accuracies": 1.0, "rewards/chosen": -0.13156040012836456, "rewards/margins": 0.10578775405883789, "rewards/rejected": -0.23734815418720245, "step": 757 }, { "epoch": 1.1938534278959811, "grad_norm": 0.2215537428855896, "learning_rate": 1.6798035214084047e-06, "log_odds_chosen": 1.345110297203064, "log_odds_ratio": -0.23759454488754272, "logits/chosen": -0.6308282017707825, "logits/rejected": -1.6211813688278198, "logps/chosen": -1.4159858226776123, "logps/rejected": -2.564734697341919, "loss": 1.4532, "nll_loss": 1.4294607639312744, "rewards/accuracies": 1.0, "rewards/chosen": -0.14159858226776123, "rewards/margins": 0.11487489938735962, "rewards/rejected": -0.25647348165512085, "step": 758 }, { "epoch": 1.1954294720252168, "grad_norm": 0.22554029524326324, "learning_rate": 1.674378544320714e-06, "log_odds_chosen": 1.590653419494629, "log_odds_ratio": -0.19607162475585938, "logits/chosen": -0.5363159775733948, "logits/rejected": -1.542907476425171, "logps/chosen": -1.3737821578979492, "logps/rejected": -2.734147787094116, "loss": 1.4116, "nll_loss": 1.3920382261276245, "rewards/accuracies": 1.0, "rewards/chosen": -0.13737823069095612, "rewards/margins": 0.13603655993938446, "rewards/rejected": -0.2734147906303406, "step": 759 }, { "epoch": 1.1970055161544524, "grad_norm": 0.20527444779872894, "learning_rate": 1.6689560271475922e-06, "log_odds_chosen": 1.46551513671875, "log_odds_ratio": -0.21081432700157166, "logits/chosen": -0.5736262798309326, "logits/rejected": -1.462472677230835, "logps/chosen": -1.3918555974960327, "logps/rejected": -2.643242359161377, "loss": 1.4212, "nll_loss": 1.4000998735427856, "rewards/accuracies": 1.0, "rewards/chosen": -0.1391855627298355, "rewards/margins": 0.12513871490955353, "rewards/rejected": -0.26432427763938904, "step": 760 }, { "epoch": 1.198581560283688, "grad_norm": 0.22382031381130219, "learning_rate": 1.6635360108535665e-06, "log_odds_chosen": 1.4534813165664673, "log_odds_ratio": -0.2228448987007141, "logits/chosen": -0.5872243046760559, "logits/rejected": -1.4932541847229004, "logps/chosen": -1.34547758102417, "logps/rejected": -2.580556869506836, "loss": 1.3918, "nll_loss": 1.3695387840270996, "rewards/accuracies": 1.0, "rewards/chosen": -0.13454777002334595, "rewards/margins": 0.12350792437791824, "rewards/rejected": -0.2580556869506836, "step": 761 }, { "epoch": 1.2001576044129236, "grad_norm": 0.2164224088191986, "learning_rate": 1.6581185363842717e-06, "log_odds_chosen": 1.4758771657943726, "log_odds_ratio": -0.22314564883708954, "logits/chosen": -0.648838996887207, "logits/rejected": -1.5060044527053833, "logps/chosen": -1.391494870185852, "logps/rejected": -2.658966541290283, "loss": 1.4153, "nll_loss": 1.393001914024353, "rewards/accuracies": 1.0, "rewards/chosen": -0.1391494870185852, "rewards/margins": 0.12674716114997864, "rewards/rejected": -0.26589664816856384, "step": 762 }, { "epoch": 1.2017336485421592, "grad_norm": 0.2243705540895462, "learning_rate": 1.6527036446661393e-06, "log_odds_chosen": 1.4079028367996216, "log_odds_ratio": -0.23095834255218506, "logits/chosen": -0.5770010948181152, "logits/rejected": -1.523849368095398, "logps/chosen": -1.4122446775436401, "logps/rejected": -2.6187679767608643, "loss": 1.4419, "nll_loss": 1.4188222885131836, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412244737148285, "rewards/margins": 0.12065234035253525, "rewards/rejected": -0.26187682151794434, "step": 763 }, { "epoch": 1.2033096926713949, "grad_norm": 0.21240510046482086, "learning_rate": 1.6472913766060901e-06, "log_odds_chosen": 1.5549542903900146, "log_odds_ratio": -0.1982915848493576, "logits/chosen": -0.7087709903717041, "logits/rejected": -1.4476606845855713, "logps/chosen": -1.3446422815322876, "logps/rejected": -2.662215232849121, "loss": 1.3825, "nll_loss": 1.3626868724822998, "rewards/accuracies": 1.0, "rewards/chosen": -0.13446423411369324, "rewards/margins": 0.13175728917121887, "rewards/rejected": -0.2662215232849121, "step": 764 }, { "epoch": 1.2048857368006305, "grad_norm": 0.206766277551651, "learning_rate": 1.6418817730912252e-06, "log_odds_chosen": 1.28862726688385, "log_odds_ratio": -0.26395970582962036, "logits/chosen": -0.6465386152267456, "logits/rejected": -1.2131530046463013, "logps/chosen": -1.4487866163253784, "logps/rejected": -2.554746627807617, "loss": 1.4801, "nll_loss": 1.4536832571029663, "rewards/accuracies": 1.0, "rewards/chosen": -0.14487865567207336, "rewards/margins": 0.11059600114822388, "rewards/rejected": -0.25547468662261963, "step": 765 }, { "epoch": 1.2064617809298661, "grad_norm": 0.21988733112812042, "learning_rate": 1.6364748749885133e-06, "log_odds_chosen": 1.0742143392562866, "log_odds_ratio": -0.2990318238735199, "logits/chosen": -0.623568594455719, "logits/rejected": -1.2595133781433105, "logps/chosen": -1.4102067947387695, "logps/rejected": -2.311211347579956, "loss": 1.4432, "nll_loss": 1.4133214950561523, "rewards/accuracies": 1.0, "rewards/chosen": -0.14102068543434143, "rewards/margins": 0.09010044485330582, "rewards/rejected": -0.23112112283706665, "step": 766 }, { "epoch": 1.2080378250591017, "grad_norm": 0.23654837906360626, "learning_rate": 1.6310707231444883e-06, "log_odds_chosen": 1.3700404167175293, "log_odds_ratio": -0.2373034507036209, "logits/chosen": -0.6271353960037231, "logits/rejected": -1.3166310787200928, "logps/chosen": -1.4159679412841797, "logps/rejected": -2.5892958641052246, "loss": 1.4419, "nll_loss": 1.418202519416809, "rewards/accuracies": 1.0, "rewards/chosen": -0.14159680902957916, "rewards/margins": 0.11733277887105942, "rewards/rejected": -0.258929580450058, "step": 767 }, { "epoch": 1.2096138691883374, "grad_norm": 0.23533912003040314, "learning_rate": 1.625669358384936e-06, "log_odds_chosen": 1.1100581884384155, "log_odds_ratio": -0.3063144087791443, "logits/chosen": -0.5259105563163757, "logits/rejected": -1.1709703207015991, "logps/chosen": -1.4853869676589966, "logps/rejected": -2.4346115589141846, "loss": 1.5203, "nll_loss": 1.489625334739685, "rewards/accuracies": 1.0, "rewards/chosen": -0.14853869378566742, "rewards/margins": 0.09492245316505432, "rewards/rejected": -0.24346116185188293, "step": 768 }, { "epoch": 1.211189913317573, "grad_norm": 0.22458776831626892, "learning_rate": 1.620270821514587e-06, "log_odds_chosen": 1.5604948997497559, "log_odds_ratio": -0.21382805705070496, "logits/chosen": -0.49775218963623047, "logits/rejected": -1.4751949310302734, "logps/chosen": -1.500352382659912, "logps/rejected": -2.876207113265991, "loss": 1.5172, "nll_loss": 1.495776653289795, "rewards/accuracies": 1.0, "rewards/chosen": -0.15003523230552673, "rewards/margins": 0.13758549094200134, "rewards/rejected": -0.2876207232475281, "step": 769 }, { "epoch": 1.2127659574468086, "grad_norm": 0.24959568679332733, "learning_rate": 1.6148751533168104e-06, "log_odds_chosen": 1.7171392440795898, "log_odds_ratio": -0.18354278802871704, "logits/chosen": -0.6221411228179932, "logits/rejected": -1.692143440246582, "logps/chosen": -1.4705101251602173, "logps/rejected": -2.9811625480651855, "loss": 1.4828, "nll_loss": 1.4644263982772827, "rewards/accuracies": 1.0, "rewards/chosen": -0.14705102145671844, "rewards/margins": 0.15106526017189026, "rewards/rejected": -0.2981162667274475, "step": 770 }, { "epoch": 1.2143420015760442, "grad_norm": 0.221751868724823, "learning_rate": 1.6094823945532996e-06, "log_odds_chosen": 1.3184762001037598, "log_odds_ratio": -0.256094366312027, "logits/chosen": -0.6579760909080505, "logits/rejected": -1.5384408235549927, "logps/chosen": -1.424337387084961, "logps/rejected": -2.5556325912475586, "loss": 1.4483, "nll_loss": 1.4226460456848145, "rewards/accuracies": 1.0, "rewards/chosen": -0.1424337476491928, "rewards/margins": 0.11312951892614365, "rewards/rejected": -0.25556325912475586, "step": 771 }, { "epoch": 1.2159180457052798, "grad_norm": 0.24769911170005798, "learning_rate": 1.6040925859637728e-06, "log_odds_chosen": 1.3353594541549683, "log_odds_ratio": -0.24589507281780243, "logits/chosen": -0.6162819266319275, "logits/rejected": -1.3712636232376099, "logps/chosen": -1.5013405084609985, "logps/rejected": -2.6573030948638916, "loss": 1.5274, "nll_loss": 1.5027711391448975, "rewards/accuracies": 1.0, "rewards/chosen": -0.15013404190540314, "rewards/margins": 0.11559627950191498, "rewards/rejected": -0.2657303214073181, "step": 772 }, { "epoch": 1.2174940898345155, "grad_norm": 0.25154909491539, "learning_rate": 1.5987057682656596e-06, "log_odds_chosen": 1.273819088935852, "log_odds_ratio": -0.2507978081703186, "logits/chosen": -0.5333799123764038, "logits/rejected": -1.2989473342895508, "logps/chosen": -1.544511079788208, "logps/rejected": -2.6521706581115723, "loss": 1.5747, "nll_loss": 1.5496604442596436, "rewards/accuracies": 1.0, "rewards/chosen": -0.15445111691951752, "rewards/margins": 0.1107659637928009, "rewards/rejected": -0.2652170658111572, "step": 773 }, { "epoch": 1.219070133963751, "grad_norm": 0.24077482521533966, "learning_rate": 1.593321982153795e-06, "log_odds_chosen": 1.2832716703414917, "log_odds_ratio": -0.2492670714855194, "logits/chosen": -0.6236528158187866, "logits/rejected": -1.5098700523376465, "logps/chosen": -1.5666507482528687, "logps/rejected": -2.6859042644500732, "loss": 1.581, "nll_loss": 1.5561209917068481, "rewards/accuracies": 1.0, "rewards/chosen": -0.15666507184505463, "rewards/margins": 0.11192534118890762, "rewards/rejected": -0.26859042048454285, "step": 774 }, { "epoch": 1.2206461780929867, "grad_norm": 0.24258244037628174, "learning_rate": 1.5879412683001106e-06, "log_odds_chosen": 1.2435587644577026, "log_odds_ratio": -0.2599763870239258, "logits/chosen": -0.6440672874450684, "logits/rejected": -1.2146906852722168, "logps/chosen": -1.3805272579193115, "logps/rejected": -2.4296422004699707, "loss": 1.4255, "nll_loss": 1.3995163440704346, "rewards/accuracies": 1.0, "rewards/chosen": -0.13805273175239563, "rewards/margins": 0.10491149127483368, "rewards/rejected": -0.24296420812606812, "step": 775 }, { "epoch": 1.2222222222222223, "grad_norm": 0.2312171459197998, "learning_rate": 1.5825636673533298e-06, "log_odds_chosen": 1.7001415491104126, "log_odds_ratio": -0.17940793931484222, "logits/chosen": -0.6745871305465698, "logits/rejected": -1.7740318775177002, "logps/chosen": -1.4513566493988037, "logps/rejected": -2.9394679069519043, "loss": 1.457, "nll_loss": 1.4390698671340942, "rewards/accuracies": 1.0, "rewards/chosen": -0.14513565599918365, "rewards/margins": 0.14881114661693573, "rewards/rejected": -0.2939468026161194, "step": 776 }, { "epoch": 1.2237982663514577, "grad_norm": 0.22569940984249115, "learning_rate": 1.5771892199386598e-06, "log_odds_chosen": 1.2924063205718994, "log_odds_ratio": -0.25367188453674316, "logits/chosen": -0.5892406105995178, "logits/rejected": -1.359096884727478, "logps/chosen": -1.4653481245040894, "logps/rejected": -2.5802369117736816, "loss": 1.5082, "nll_loss": 1.4827901124954224, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465347856283188, "rewards/margins": 0.11148889362812042, "rewards/rejected": -0.2580237090587616, "step": 777 }, { "epoch": 1.2253743104806936, "grad_norm": 0.20317105948925018, "learning_rate": 1.5718179666574834e-06, "log_odds_chosen": 1.671058177947998, "log_odds_ratio": -0.18040546774864197, "logits/chosen": -0.5229330658912659, "logits/rejected": -1.6276131868362427, "logps/chosen": -1.4343481063842773, "logps/rejected": -2.8913261890411377, "loss": 1.4658, "nll_loss": 1.4477105140686035, "rewards/accuracies": 1.0, "rewards/chosen": -0.1434348076581955, "rewards/margins": 0.14569780230522156, "rewards/rejected": -0.28913259506225586, "step": 778 }, { "epoch": 1.226950354609929, "grad_norm": 0.23067185282707214, "learning_rate": 1.5664499480870539e-06, "log_odds_chosen": 1.5963189601898193, "log_odds_ratio": -0.19177693128585815, "logits/chosen": -0.5456362366676331, "logits/rejected": -1.5545574426651, "logps/chosen": -1.4592418670654297, "logps/rejected": -2.8497893810272217, "loss": 1.4949, "nll_loss": 1.475722312927246, "rewards/accuracies": 1.0, "rewards/chosen": -0.14592419564723969, "rewards/margins": 0.13905476033687592, "rewards/rejected": -0.2849789559841156, "step": 779 }, { "epoch": 1.2285263987391648, "grad_norm": 0.2926139831542969, "learning_rate": 1.5610852047801875e-06, "log_odds_chosen": 1.197067141532898, "log_odds_ratio": -0.2708202302455902, "logits/chosen": -0.6311848759651184, "logits/rejected": -1.3294005393981934, "logps/chosen": -1.3937289714813232, "logps/rejected": -2.4043586254119873, "loss": 1.4177, "nll_loss": 1.39057195186615, "rewards/accuracies": 1.0, "rewards/chosen": -0.13937290012836456, "rewards/margins": 0.10106298327445984, "rewards/rejected": -0.2404358983039856, "step": 780 }, { "epoch": 1.2301024428684002, "grad_norm": 0.2347894310951233, "learning_rate": 1.5557237772649567e-06, "log_odds_chosen": 1.203992247581482, "log_odds_ratio": -0.2692109942436218, "logits/chosen": -0.5334935188293457, "logits/rejected": -1.4999908208847046, "logps/chosen": -1.4735887050628662, "logps/rejected": -2.5025579929351807, "loss": 1.5064, "nll_loss": 1.479496955871582, "rewards/accuracies": 1.0, "rewards/chosen": -0.14735886454582214, "rewards/margins": 0.10289692133665085, "rewards/rejected": -0.2502557933330536, "step": 781 }, { "epoch": 1.231678486997636, "grad_norm": 0.20368658006191254, "learning_rate": 1.5503657060443866e-06, "log_odds_chosen": 1.3019431829452515, "log_odds_ratio": -0.2502024471759796, "logits/chosen": -0.5618449449539185, "logits/rejected": -1.432936191558838, "logps/chosen": -1.395858883857727, "logps/rejected": -2.497097969055176, "loss": 1.4394, "nll_loss": 1.4143925905227661, "rewards/accuracies": 1.0, "rewards/chosen": -0.13958589732646942, "rewards/margins": 0.1101238951086998, "rewards/rejected": -0.24970978498458862, "step": 782 }, { "epoch": 1.2332545311268714, "grad_norm": 0.2099921554327011, "learning_rate": 1.5450110315961457e-06, "log_odds_chosen": 1.5822702646255493, "log_odds_ratio": -0.20346100628376007, "logits/chosen": -0.6009377241134644, "logits/rejected": -1.6171350479125977, "logps/chosen": -1.4313298463821411, "logps/rejected": -2.80722975730896, "loss": 1.4718, "nll_loss": 1.451432228088379, "rewards/accuracies": 1.0, "rewards/chosen": -0.1431329846382141, "rewards/margins": 0.13759002089500427, "rewards/rejected": -0.280722975730896, "step": 783 }, { "epoch": 1.2348305752561073, "grad_norm": 0.22688640654087067, "learning_rate": 1.539659794372243e-06, "log_odds_chosen": 1.5025097131729126, "log_odds_ratio": -0.2302016317844391, "logits/chosen": -0.6282855272293091, "logits/rejected": -1.626317024230957, "logps/chosen": -1.4437496662139893, "logps/rejected": -2.7493932247161865, "loss": 1.4742, "nll_loss": 1.4511845111846924, "rewards/accuracies": 1.0, "rewards/chosen": -0.14437496662139893, "rewards/margins": 0.1305643618106842, "rewards/rejected": -0.27493932843208313, "step": 784 }, { "epoch": 1.2364066193853427, "grad_norm": 0.21591053903102875, "learning_rate": 1.5343120347987172e-06, "log_odds_chosen": 1.5057079792022705, "log_odds_ratio": -0.20444804430007935, "logits/chosen": -0.588503360748291, "logits/rejected": -1.5970463752746582, "logps/chosen": -1.4286227226257324, "logps/rejected": -2.728306293487549, "loss": 1.4729, "nll_loss": 1.4524708986282349, "rewards/accuracies": 1.0, "rewards/chosen": -0.14286227524280548, "rewards/margins": 0.12996836006641388, "rewards/rejected": -0.27283063530921936, "step": 785 }, { "epoch": 1.2379826635145785, "grad_norm": 0.2277948409318924, "learning_rate": 1.5289677932753398e-06, "log_odds_chosen": 1.2474923133850098, "log_odds_ratio": -0.2646195888519287, "logits/chosen": -0.607757031917572, "logits/rejected": -1.372312307357788, "logps/chosen": -1.463487982749939, "logps/rejected": -2.5320003032684326, "loss": 1.4958, "nll_loss": 1.4693405628204346, "rewards/accuracies": 1.0, "rewards/chosen": -0.14634880423545837, "rewards/margins": 0.10685122013092041, "rewards/rejected": -0.2532000243663788, "step": 786 }, { "epoch": 1.239558707643814, "grad_norm": 0.21466998755931854, "learning_rate": 1.5236271101753017e-06, "log_odds_chosen": 1.440157175064087, "log_odds_ratio": -0.21480616927146912, "logits/chosen": -0.6203504800796509, "logits/rejected": -1.5559945106506348, "logps/chosen": -1.4606151580810547, "logps/rejected": -2.70353364944458, "loss": 1.4769, "nll_loss": 1.4553958177566528, "rewards/accuracies": 1.0, "rewards/chosen": -0.14606152474880219, "rewards/margins": 0.12429183721542358, "rewards/rejected": -0.27035337686538696, "step": 787 }, { "epoch": 1.2411347517730495, "grad_norm": 0.21912704408168793, "learning_rate": 1.5182900258449135e-06, "log_odds_chosen": 1.60334312915802, "log_odds_ratio": -0.1867779791355133, "logits/chosen": -0.5966047048568726, "logits/rejected": -1.5762933492660522, "logps/chosen": -1.44041907787323, "logps/rejected": -2.8344244956970215, "loss": 1.4694, "nll_loss": 1.4507081508636475, "rewards/accuracies": 1.0, "rewards/chosen": -0.14404189586639404, "rewards/margins": 0.13940054178237915, "rewards/rejected": -0.2834424674510956, "step": 788 }, { "epoch": 1.2427107959022852, "grad_norm": 0.22293169796466827, "learning_rate": 1.5129565806032986e-06, "log_odds_chosen": 1.5123443603515625, "log_odds_ratio": -0.21256795525550842, "logits/chosen": -0.588271975517273, "logits/rejected": -1.4032602310180664, "logps/chosen": -1.330634593963623, "logps/rejected": -2.614960193634033, "loss": 1.3776, "nll_loss": 1.3562999963760376, "rewards/accuracies": 1.0, "rewards/chosen": -0.1330634504556656, "rewards/margins": 0.12843254208564758, "rewards/rejected": -0.261495977640152, "step": 789 }, { "epoch": 1.2442868400315208, "grad_norm": 0.2574654221534729, "learning_rate": 1.507626814742087e-06, "log_odds_chosen": 1.3430910110473633, "log_odds_ratio": -0.23965327441692352, "logits/chosen": -0.634746253490448, "logits/rejected": -1.3998106718063354, "logps/chosen": -1.5132931470870972, "logps/rejected": -2.677356004714966, "loss": 1.5481, "nll_loss": 1.52411687374115, "rewards/accuracies": 1.0, "rewards/chosen": -0.15132930874824524, "rewards/margins": 0.11640629172325134, "rewards/rejected": -0.2677356004714966, "step": 790 }, { "epoch": 1.2458628841607564, "grad_norm": 0.21352773904800415, "learning_rate": 1.502300768525115e-06, "log_odds_chosen": 1.5173344612121582, "log_odds_ratio": -0.2205386757850647, "logits/chosen": -0.5914537310600281, "logits/rejected": -1.5284650325775146, "logps/chosen": -1.5250039100646973, "logps/rejected": -2.858093023300171, "loss": 1.5251, "nll_loss": 1.503089189529419, "rewards/accuracies": 1.0, "rewards/chosen": -0.15250039100646973, "rewards/margins": 0.13330891728401184, "rewards/rejected": -0.28580930829048157, "step": 791 }, { "epoch": 1.247438928289992, "grad_norm": 0.25724470615386963, "learning_rate": 1.4969784821881177e-06, "log_odds_chosen": 1.2892694473266602, "log_odds_ratio": -0.25421732664108276, "logits/chosen": -0.6022913455963135, "logits/rejected": -1.2409374713897705, "logps/chosen": -1.3959331512451172, "logps/rejected": -2.485353469848633, "loss": 1.448, "nll_loss": 1.4225430488586426, "rewards/accuracies": 1.0, "rewards/chosen": -0.13959333300590515, "rewards/margins": 0.10894200205802917, "rewards/rejected": -0.24853533506393433, "step": 792 }, { "epoch": 1.2490149724192277, "grad_norm": 0.2178412228822708, "learning_rate": 1.4916599959384262e-06, "log_odds_chosen": 1.2507131099700928, "log_odds_ratio": -0.2574303448200226, "logits/chosen": -0.6103950142860413, "logits/rejected": -1.4853626489639282, "logps/chosen": -1.475551962852478, "logps/rejected": -2.548299789428711, "loss": 1.4999, "nll_loss": 1.474159598350525, "rewards/accuracies": 1.0, "rewards/chosen": -0.14755521714687347, "rewards/margins": 0.10727477073669434, "rewards/rejected": -0.254830002784729, "step": 793 }, { "epoch": 1.2505910165484633, "grad_norm": 0.30536407232284546, "learning_rate": 1.4863453499546643e-06, "log_odds_chosen": 1.4956989288330078, "log_odds_ratio": -0.21353504061698914, "logits/chosen": -0.5590574741363525, "logits/rejected": -1.4953241348266602, "logps/chosen": -1.406250238418579, "logps/rejected": -2.6932482719421387, "loss": 1.4367, "nll_loss": 1.4153844118118286, "rewards/accuracies": 1.0, "rewards/chosen": -0.1406250298023224, "rewards/margins": 0.12869982421398163, "rewards/rejected": -0.2693248391151428, "step": 794 }, { "epoch": 1.252167060677699, "grad_norm": 0.26826876401901245, "learning_rate": 1.4810345843864427e-06, "log_odds_chosen": 1.3864645957946777, "log_odds_ratio": -0.2319655865430832, "logits/chosen": -0.6440175175666809, "logits/rejected": -1.3618885278701782, "logps/chosen": -1.4762009382247925, "logps/rejected": -2.6720967292785645, "loss": 1.5074, "nll_loss": 1.4842313528060913, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476200968027115, "rewards/margins": 0.11958958208560944, "rewards/rejected": -0.2672096788883209, "step": 795 }, { "epoch": 1.2537431048069345, "grad_norm": 0.30943307280540466, "learning_rate": 1.4757277393540598e-06, "log_odds_chosen": 1.3894554376602173, "log_odds_ratio": -0.23852145671844482, "logits/chosen": -0.5769093632698059, "logits/rejected": -1.2110780477523804, "logps/chosen": -1.4527041912078857, "logps/rejected": -2.6519722938537598, "loss": 1.493, "nll_loss": 1.4691100120544434, "rewards/accuracies": 1.0, "rewards/chosen": -0.1452704221010208, "rewards/margins": 0.1199268251657486, "rewards/rejected": -0.2651972472667694, "step": 796 }, { "epoch": 1.2553191489361701, "grad_norm": 0.21650095283985138, "learning_rate": 1.4704248549481946e-06, "log_odds_chosen": 1.3199766874313354, "log_odds_ratio": -0.2460341453552246, "logits/chosen": -0.6074656248092651, "logits/rejected": -1.613268256187439, "logps/chosen": -1.5171059370040894, "logps/rejected": -2.6643447875976562, "loss": 1.5401, "nll_loss": 1.515483021736145, "rewards/accuracies": 1.0, "rewards/chosen": -0.1517105996608734, "rewards/margins": 0.11472390592098236, "rewards/rejected": -0.26643452048301697, "step": 797 }, { "epoch": 1.2568951930654058, "grad_norm": 0.21090464293956757, "learning_rate": 1.4651259712296063e-06, "log_odds_chosen": 1.5943585634231567, "log_odds_ratio": -0.19176922738552094, "logits/chosen": -0.517490029335022, "logits/rejected": -1.4596930742263794, "logps/chosen": -1.4921587705612183, "logps/rejected": -2.8879101276397705, "loss": 1.5124, "nll_loss": 1.493196725845337, "rewards/accuracies": 1.0, "rewards/chosen": -0.14921587705612183, "rewards/margins": 0.13957512378692627, "rewards/rejected": -0.2887910008430481, "step": 798 }, { "epoch": 1.2584712371946414, "grad_norm": 0.24253034591674805, "learning_rate": 1.45983112822883e-06, "log_odds_chosen": 1.3575317859649658, "log_odds_ratio": -0.23305505514144897, "logits/chosen": -0.49398794770240784, "logits/rejected": -1.3266679048538208, "logps/chosen": -1.4118586778640747, "logps/rejected": -2.570868730545044, "loss": 1.4468, "nll_loss": 1.4234497547149658, "rewards/accuracies": 1.0, "rewards/chosen": -0.14118586480617523, "rewards/margins": 0.11590103805065155, "rewards/rejected": -0.2570869028568268, "step": 799 }, { "epoch": 1.260047281323877, "grad_norm": 0.22545106709003448, "learning_rate": 1.4545403659458756e-06, "log_odds_chosen": 1.4175035953521729, "log_odds_ratio": -0.2259032428264618, "logits/chosen": -0.6052595973014832, "logits/rejected": -1.311805248260498, "logps/chosen": -1.4253277778625488, "logps/rejected": -2.6440634727478027, "loss": 1.4589, "nll_loss": 1.4363291263580322, "rewards/accuracies": 1.0, "rewards/chosen": -0.14253278076648712, "rewards/margins": 0.12187359482049942, "rewards/rejected": -0.26440635323524475, "step": 800 }, { "epoch": 1.2616233254531126, "grad_norm": 0.22660937905311584, "learning_rate": 1.4492537243499253e-06, "log_odds_chosen": 1.4647465944290161, "log_odds_ratio": -0.21327193081378937, "logits/chosen": -0.6022266149520874, "logits/rejected": -1.414176344871521, "logps/chosen": -1.4155055284500122, "logps/rejected": -2.6666698455810547, "loss": 1.4326, "nll_loss": 1.4112662076950073, "rewards/accuracies": 1.0, "rewards/chosen": -0.14155057072639465, "rewards/margins": 0.12511645257472992, "rewards/rejected": -0.2666670083999634, "step": 801 }, { "epoch": 1.2631993695823482, "grad_norm": 0.20875735580921173, "learning_rate": 1.443971243379031e-06, "log_odds_chosen": 1.481856346130371, "log_odds_ratio": -0.2232595980167389, "logits/chosen": -0.5322192907333374, "logits/rejected": -1.3940941095352173, "logps/chosen": -1.4499024152755737, "logps/rejected": -2.7311134338378906, "loss": 1.4739, "nll_loss": 1.451573133468628, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449902355670929, "rewards/margins": 0.12812109291553497, "rewards/rejected": -0.27311137318611145, "step": 802 }, { "epoch": 1.2647754137115839, "grad_norm": 0.23457227647304535, "learning_rate": 1.4386929629398144e-06, "log_odds_chosen": 1.6659356355667114, "log_odds_ratio": -0.1785147786140442, "logits/chosen": -0.5916958451271057, "logits/rejected": -1.5709147453308105, "logps/chosen": -1.4835273027420044, "logps/rejected": -2.9457387924194336, "loss": 1.5032, "nll_loss": 1.4853252172470093, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483527421951294, "rewards/margins": 0.14622116088867188, "rewards/rejected": -0.2945738732814789, "step": 803 }, { "epoch": 1.2663514578408195, "grad_norm": 0.22985726594924927, "learning_rate": 1.4334189229071614e-06, "log_odds_chosen": 1.3873188495635986, "log_odds_ratio": -0.23752443492412567, "logits/chosen": -0.5912019610404968, "logits/rejected": -1.3441188335418701, "logps/chosen": -1.381171703338623, "logps/rejected": -2.564377546310425, "loss": 1.411, "nll_loss": 1.3872419595718384, "rewards/accuracies": 1.0, "rewards/chosen": -0.13811716437339783, "rewards/margins": 0.11832059174776077, "rewards/rejected": -0.256437748670578, "step": 804 }, { "epoch": 1.267927501970055, "grad_norm": 0.24306786060333252, "learning_rate": 1.4281491631239263e-06, "log_odds_chosen": 1.1761853694915771, "log_odds_ratio": -0.27694106101989746, "logits/chosen": -0.6338315606117249, "logits/rejected": -1.3575412034988403, "logps/chosen": -1.5164538621902466, "logps/rejected": -2.5306150913238525, "loss": 1.532, "nll_loss": 1.504274606704712, "rewards/accuracies": 1.0, "rewards/chosen": -0.15164539217948914, "rewards/margins": 0.10141611844301224, "rewards/rejected": -0.25306153297424316, "step": 805 }, { "epoch": 1.2695035460992907, "grad_norm": 0.22970065474510193, "learning_rate": 1.4228837234006272e-06, "log_odds_chosen": 1.6077741384506226, "log_odds_ratio": -0.19113853573799133, "logits/chosen": -0.6385869979858398, "logits/rejected": -1.403347134590149, "logps/chosen": -1.3846330642700195, "logps/rejected": -2.773245334625244, "loss": 1.436, "nll_loss": 1.4168992042541504, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384633183479309, "rewards/margins": 0.13886120915412903, "rewards/rejected": -0.27732449769973755, "step": 806 }, { "epoch": 1.2710795902285263, "grad_norm": 0.2602978050708771, "learning_rate": 1.4176226435151462e-06, "log_odds_chosen": 1.4350380897521973, "log_odds_ratio": -0.22818563878536224, "logits/chosen": -0.6012192964553833, "logits/rejected": -1.6245769262313843, "logps/chosen": -1.4842414855957031, "logps/rejected": -2.735145330429077, "loss": 1.4962, "nll_loss": 1.473372220993042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484241634607315, "rewards/margins": 0.12509039044380188, "rewards/rejected": -0.2735145390033722, "step": 807 }, { "epoch": 1.272655634357762, "grad_norm": 0.20345047116279602, "learning_rate": 1.4123659632124298e-06, "log_odds_chosen": 1.564432978630066, "log_odds_ratio": -0.20057973265647888, "logits/chosen": -0.6357970237731934, "logits/rejected": -1.5508131980895996, "logps/chosen": -1.4488463401794434, "logps/rejected": -2.8090689182281494, "loss": 1.4804, "nll_loss": 1.4603270292282104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448846310377121, "rewards/margins": 0.13602225482463837, "rewards/rejected": -0.28090691566467285, "step": 808 }, { "epoch": 1.2742316784869976, "grad_norm": 0.21275083720684052, "learning_rate": 1.4071137222041852e-06, "log_odds_chosen": 1.5876526832580566, "log_odds_ratio": -0.1935243159532547, "logits/chosen": -0.5202667713165283, "logits/rejected": -1.6951507329940796, "logps/chosen": -1.4972716569900513, "logps/rejected": -2.887930154800415, "loss": 1.5175, "nll_loss": 1.4981794357299805, "rewards/accuracies": 1.0, "rewards/chosen": -0.14972718060016632, "rewards/margins": 0.13906586170196533, "rewards/rejected": -0.28879302740097046, "step": 809 }, { "epoch": 1.2758077226162332, "grad_norm": 0.25385233759880066, "learning_rate": 1.4018659601685857e-06, "log_odds_chosen": 1.38628089427948, "log_odds_ratio": -0.23076358437538147, "logits/chosen": -0.5455945730209351, "logits/rejected": -1.5138773918151855, "logps/chosen": -1.4868183135986328, "logps/rejected": -2.6896021366119385, "loss": 1.5078, "nll_loss": 1.4847323894500732, "rewards/accuracies": 1.0, "rewards/chosen": -0.14868183434009552, "rewards/margins": 0.12027836591005325, "rewards/rejected": -0.26896020770072937, "step": 810 }, { "epoch": 1.2773837667454688, "grad_norm": 0.25446489453315735, "learning_rate": 1.3966227167499667e-06, "log_odds_chosen": 1.307693600654602, "log_odds_ratio": -0.24345320463180542, "logits/chosen": -0.6371269822120667, "logits/rejected": -1.3773181438446045, "logps/chosen": -1.527349829673767, "logps/rejected": -2.6613223552703857, "loss": 1.545, "nll_loss": 1.520634651184082, "rewards/accuracies": 1.0, "rewards/chosen": -0.15273499488830566, "rewards/margins": 0.1133972555398941, "rewards/rejected": -0.2661322355270386, "step": 811 }, { "epoch": 1.2789598108747045, "grad_norm": 0.2630695104598999, "learning_rate": 1.3913840315585277e-06, "log_odds_chosen": 1.3261871337890625, "log_odds_ratio": -0.2366020679473877, "logits/chosen": -0.5820093750953674, "logits/rejected": -1.5055376291275024, "logps/chosen": -1.4547984600067139, "logps/rejected": -2.5924019813537598, "loss": 1.4846, "nll_loss": 1.4609538316726685, "rewards/accuracies": 1.0, "rewards/chosen": -0.14547984302043915, "rewards/margins": 0.11376036703586578, "rewards/rejected": -0.25924021005630493, "step": 812 }, { "epoch": 1.28053585500394, "grad_norm": 0.22853800654411316, "learning_rate": 1.3861499441700337e-06, "log_odds_chosen": 1.6726137399673462, "log_odds_ratio": -0.18094965815544128, "logits/chosen": -0.72444087266922, "logits/rejected": -1.5712106227874756, "logps/chosen": -1.5373544692993164, "logps/rejected": -3.019139289855957, "loss": 1.5554, "nll_loss": 1.537285566329956, "rewards/accuracies": 1.0, "rewards/chosen": -0.1537354439496994, "rewards/margins": 0.14817848801612854, "rewards/rejected": -0.30191394686698914, "step": 813 }, { "epoch": 1.2821118991331757, "grad_norm": 0.25108060240745544, "learning_rate": 1.3809204941255144e-06, "log_odds_chosen": 1.6380887031555176, "log_odds_ratio": -0.18578845262527466, "logits/chosen": -0.6443689465522766, "logits/rejected": -1.5698541402816772, "logps/chosen": -1.41604745388031, "logps/rejected": -2.836592674255371, "loss": 1.4439, "nll_loss": 1.425299048423767, "rewards/accuracies": 1.0, "rewards/chosen": -0.1416047364473343, "rewards/margins": 0.1420545130968094, "rewards/rejected": -0.28365927934646606, "step": 814 }, { "epoch": 1.2836879432624113, "grad_norm": 0.23532946407794952, "learning_rate": 1.3756957209309667e-06, "log_odds_chosen": 1.7262568473815918, "log_odds_ratio": -0.17713306844234467, "logits/chosen": -0.6322847604751587, "logits/rejected": -1.7657585144042969, "logps/chosen": -1.4623332023620605, "logps/rejected": -2.976186752319336, "loss": 1.4912, "nll_loss": 1.4735324382781982, "rewards/accuracies": 1.0, "rewards/chosen": -0.14623333513736725, "rewards/margins": 0.1513853520154953, "rewards/rejected": -0.29761865735054016, "step": 815 }, { "epoch": 1.285263987391647, "grad_norm": 0.2441030591726303, "learning_rate": 1.3704756640570575e-06, "log_odds_chosen": 1.7832515239715576, "log_odds_ratio": -0.17854107916355133, "logits/chosen": -0.663288414478302, "logits/rejected": -1.5476438999176025, "logps/chosen": -1.42481529712677, "logps/rejected": -2.986271619796753, "loss": 1.4527, "nll_loss": 1.434854507446289, "rewards/accuracies": 1.0, "rewards/chosen": -0.14248153567314148, "rewards/margins": 0.1561456173658371, "rewards/rejected": -0.2986271381378174, "step": 816 }, { "epoch": 1.2868400315208826, "grad_norm": 0.3268474042415619, "learning_rate": 1.3652603629388224e-06, "log_odds_chosen": 1.5470060110092163, "log_odds_ratio": -0.21487179398536682, "logits/chosen": -0.596767008304596, "logits/rejected": -1.5159244537353516, "logps/chosen": -1.3974632024765015, "logps/rejected": -2.725958824157715, "loss": 1.4215, "nll_loss": 1.3999779224395752, "rewards/accuracies": 1.0, "rewards/chosen": -0.13974632322788239, "rewards/margins": 0.1328495293855667, "rewards/rejected": -0.2725958526134491, "step": 817 }, { "epoch": 1.2884160756501182, "grad_norm": 0.23389191925525665, "learning_rate": 1.3600498569753715e-06, "log_odds_chosen": 1.4132609367370605, "log_odds_ratio": -0.2203066498041153, "logits/chosen": -0.45069289207458496, "logits/rejected": -1.3142499923706055, "logps/chosen": -1.4453165531158447, "logps/rejected": -2.6591105461120605, "loss": 1.4794, "nll_loss": 1.4573687314987183, "rewards/accuracies": 1.0, "rewards/chosen": -0.14453165233135223, "rewards/margins": 0.12137939780950546, "rewards/rejected": -0.2659110426902771, "step": 818 }, { "epoch": 1.2899921197793538, "grad_norm": 0.39004889130592346, "learning_rate": 1.3548441855295872e-06, "log_odds_chosen": 1.4098448753356934, "log_odds_ratio": -0.22153350710868835, "logits/chosen": -0.5996136665344238, "logits/rejected": -1.6268856525421143, "logps/chosen": -1.523671269416809, "logps/rejected": -2.753201723098755, "loss": 1.5461, "nll_loss": 1.5239500999450684, "rewards/accuracies": 1.0, "rewards/chosen": -0.15236711502075195, "rewards/margins": 0.12295305728912354, "rewards/rejected": -0.2753202021121979, "step": 819 }, { "epoch": 1.2915681639085894, "grad_norm": 0.23572242259979248, "learning_rate": 1.3496433879278315e-06, "log_odds_chosen": 1.4967519044876099, "log_odds_ratio": -0.22356414794921875, "logits/chosen": -0.5787345767021179, "logits/rejected": -1.5085010528564453, "logps/chosen": -1.5622023344039917, "logps/rejected": -2.886157989501953, "loss": 1.5728, "nll_loss": 1.5504934787750244, "rewards/accuracies": 1.0, "rewards/chosen": -0.15622025728225708, "rewards/margins": 0.13239558041095734, "rewards/rejected": -0.2886158227920532, "step": 820 }, { "epoch": 1.293144208037825, "grad_norm": 0.5434833765029907, "learning_rate": 1.3444475034596463e-06, "log_odds_chosen": 1.7494049072265625, "log_odds_ratio": -0.1667623519897461, "logits/chosen": -0.6446298360824585, "logits/rejected": -1.5591685771942139, "logps/chosen": -1.4353530406951904, "logps/rejected": -2.967869758605957, "loss": 1.4562, "nll_loss": 1.439555287361145, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435353010892868, "rewards/margins": 0.15325166285037994, "rewards/rejected": -0.29678699374198914, "step": 821 }, { "epoch": 1.2947202521670607, "grad_norm": 0.24766425788402557, "learning_rate": 1.3392565713774575e-06, "log_odds_chosen": 1.8081917762756348, "log_odds_ratio": -0.15730388462543488, "logits/chosen": -0.6573777198791504, "logits/rejected": -1.6533654928207397, "logps/chosen": -1.4503732919692993, "logps/rejected": -3.0400443077087402, "loss": 1.4829, "nll_loss": 1.4671614170074463, "rewards/accuracies": 1.0, "rewards/chosen": -0.14503732323646545, "rewards/margins": 0.15896710753440857, "rewards/rejected": -0.3040044605731964, "step": 822 }, { "epoch": 1.2962962962962963, "grad_norm": 0.275490939617157, "learning_rate": 1.3340706308962763e-06, "log_odds_chosen": 1.485947608947754, "log_odds_ratio": -0.21844175457954407, "logits/chosen": -0.6285545825958252, "logits/rejected": -1.2534183263778687, "logps/chosen": -1.4148553609848022, "logps/rejected": -2.692004442214966, "loss": 1.4459, "nll_loss": 1.4240984916687012, "rewards/accuracies": 1.0, "rewards/chosen": -0.1414855271577835, "rewards/margins": 0.12771493196487427, "rewards/rejected": -0.2692004442214966, "step": 823 }, { "epoch": 1.297872340425532, "grad_norm": 0.3741922378540039, "learning_rate": 1.3288897211934066e-06, "log_odds_chosen": 1.4314900636672974, "log_odds_ratio": -0.22909072041511536, "logits/chosen": -0.601670503616333, "logits/rejected": -1.247763752937317, "logps/chosen": -1.4129483699798584, "logps/rejected": -2.6406874656677246, "loss": 1.4388, "nll_loss": 1.415844440460205, "rewards/accuracies": 1.0, "rewards/chosen": -0.14129483699798584, "rewards/margins": 0.12277393043041229, "rewards/rejected": -0.26406875252723694, "step": 824 }, { "epoch": 1.2994483845547675, "grad_norm": 0.2457880675792694, "learning_rate": 1.323713881408147e-06, "log_odds_chosen": 1.3239305019378662, "log_odds_ratio": -0.2442399561405182, "logits/chosen": -0.5879520773887634, "logits/rejected": -1.5013256072998047, "logps/chosen": -1.465965747833252, "logps/rejected": -2.60659122467041, "loss": 1.4891, "nll_loss": 1.4646568298339844, "rewards/accuracies": 1.0, "rewards/chosen": -0.14659658074378967, "rewards/margins": 0.11406257003545761, "rewards/rejected": -0.2606591582298279, "step": 825 }, { "epoch": 1.3010244286840031, "grad_norm": 0.23192963004112244, "learning_rate": 1.3185431506414943e-06, "log_odds_chosen": 1.5322251319885254, "log_odds_ratio": -0.2040068507194519, "logits/chosen": -0.6286110877990723, "logits/rejected": -1.5630531311035156, "logps/chosen": -1.3600577116012573, "logps/rejected": -2.671229839324951, "loss": 1.3889, "nll_loss": 1.3684895038604736, "rewards/accuracies": 1.0, "rewards/chosen": -0.13600577414035797, "rewards/margins": 0.13111719489097595, "rewards/rejected": -0.26712295413017273, "step": 826 }, { "epoch": 1.3026004728132388, "grad_norm": 0.2781152129173279, "learning_rate": 1.313377567955851e-06, "log_odds_chosen": 1.226921796798706, "log_odds_ratio": -0.27446916699409485, "logits/chosen": -0.5895642638206482, "logits/rejected": -1.3028472661972046, "logps/chosen": -1.4906383752822876, "logps/rejected": -2.549145460128784, "loss": 1.5115, "nll_loss": 1.4840279817581177, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490638256072998, "rewards/margins": 0.10585072636604309, "rewards/rejected": -0.2549145519733429, "step": 827 }, { "epoch": 1.3041765169424744, "grad_norm": 0.24023838341236115, "learning_rate": 1.3082171723747257e-06, "log_odds_chosen": 1.3791821002960205, "log_odds_ratio": -0.2413456290960312, "logits/chosen": -0.7238616943359375, "logits/rejected": -1.4020274877548218, "logps/chosen": -1.4439319372177124, "logps/rejected": -2.6299452781677246, "loss": 1.4706, "nll_loss": 1.4465088844299316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443932056427002, "rewards/margins": 0.11860135197639465, "rewards/rejected": -0.26299452781677246, "step": 828 }, { "epoch": 1.30575256107171, "grad_norm": 0.2640969753265381, "learning_rate": 1.3030620028824424e-06, "log_odds_chosen": 1.3337830305099487, "log_odds_ratio": -0.24221113324165344, "logits/chosen": -0.6235900521278381, "logits/rejected": -1.3800705671310425, "logps/chosen": -1.458464503288269, "logps/rejected": -2.602323055267334, "loss": 1.4903, "nll_loss": 1.4661128520965576, "rewards/accuracies": 1.0, "rewards/chosen": -0.14584645628929138, "rewards/margins": 0.11438586562871933, "rewards/rejected": -0.2602323293685913, "step": 829 }, { "epoch": 1.3073286052009456, "grad_norm": 0.22373828291893005, "learning_rate": 1.2979120984238449e-06, "log_odds_chosen": 1.3962290287017822, "log_odds_ratio": -0.2311771810054779, "logits/chosen": -0.6370054483413696, "logits/rejected": -1.3749843835830688, "logps/chosen": -1.5016121864318848, "logps/rejected": -2.71468448638916, "loss": 1.5249, "nll_loss": 1.5017694234848022, "rewards/accuracies": 1.0, "rewards/chosen": -0.15016120672225952, "rewards/margins": 0.12130723893642426, "rewards/rejected": -0.27146846055984497, "step": 830 }, { "epoch": 1.3089046493301812, "grad_norm": 0.259819358587265, "learning_rate": 1.2927674979040009e-06, "log_odds_chosen": 1.3944642543792725, "log_odds_ratio": -0.22724983096122742, "logits/chosen": -0.5305383205413818, "logits/rejected": -1.2928234338760376, "logps/chosen": -1.3926821947097778, "logps/rejected": -2.578974962234497, "loss": 1.4403, "nll_loss": 1.4176024198532104, "rewards/accuracies": 1.0, "rewards/chosen": -0.13926823437213898, "rewards/margins": 0.11862929165363312, "rewards/rejected": -0.2578974962234497, "step": 831 }, { "epoch": 1.3104806934594169, "grad_norm": 0.2155289202928543, "learning_rate": 1.2876282401879106e-06, "log_odds_chosen": 1.5762255191802979, "log_odds_ratio": -0.19331632554531097, "logits/chosen": -0.5675473213195801, "logits/rejected": -1.423137903213501, "logps/chosen": -1.4213179349899292, "logps/rejected": -2.7864999771118164, "loss": 1.4538, "nll_loss": 1.4345142841339111, "rewards/accuracies": 1.0, "rewards/chosen": -0.14213180541992188, "rewards/margins": 0.136518195271492, "rewards/rejected": -0.2786499857902527, "step": 832 }, { "epoch": 1.3120567375886525, "grad_norm": 0.24451856315135956, "learning_rate": 1.2824943641002115e-06, "log_odds_chosen": 1.6182150840759277, "log_odds_ratio": -0.18711890280246735, "logits/chosen": -0.4954943060874939, "logits/rejected": -1.6108092069625854, "logps/chosen": -1.4183343648910522, "logps/rejected": -2.8229732513427734, "loss": 1.4494, "nll_loss": 1.4307172298431396, "rewards/accuracies": 1.0, "rewards/chosen": -0.14183342456817627, "rewards/margins": 0.1404639184474945, "rewards/rejected": -0.2822973430156708, "step": 833 }, { "epoch": 1.313632781717888, "grad_norm": 0.25867733359336853, "learning_rate": 1.2773659084248845e-06, "log_odds_chosen": 1.852787733078003, "log_odds_ratio": -0.17198441922664642, "logits/chosen": -0.7245616316795349, "logits/rejected": -1.652019739151001, "logps/chosen": -1.370871663093567, "logps/rejected": -2.991672992706299, "loss": 1.3967, "nll_loss": 1.3794749975204468, "rewards/accuracies": 1.0, "rewards/chosen": -0.1370871663093567, "rewards/margins": 0.16208013892173767, "rewards/rejected": -0.29916730523109436, "step": 834 }, { "epoch": 1.3152088258471237, "grad_norm": 0.23653243482112885, "learning_rate": 1.2722429119049632e-06, "log_odds_chosen": 1.2768532037734985, "log_odds_ratio": -0.2633589506149292, "logits/chosen": -0.5338461995124817, "logits/rejected": -1.3585484027862549, "logps/chosen": -1.503070592880249, "logps/rejected": -2.6095571517944336, "loss": 1.536, "nll_loss": 1.5097079277038574, "rewards/accuracies": 1.0, "rewards/chosen": -0.1503070592880249, "rewards/margins": 0.11064866185188293, "rewards/rejected": -0.26095569133758545, "step": 835 }, { "epoch": 1.3167848699763594, "grad_norm": 0.22052763402462006, "learning_rate": 1.267125413242239e-06, "log_odds_chosen": 1.5005533695220947, "log_odds_ratio": -0.21490009129047394, "logits/chosen": -0.6354590654373169, "logits/rejected": -1.3583699464797974, "logps/chosen": -1.3766456842422485, "logps/rejected": -2.661776065826416, "loss": 1.4172, "nll_loss": 1.3957130908966064, "rewards/accuracies": 1.0, "rewards/chosen": -0.1376645565032959, "rewards/margins": 0.12851302325725555, "rewards/rejected": -0.26617759466171265, "step": 836 }, { "epoch": 1.318360914105595, "grad_norm": 0.4790278673171997, "learning_rate": 1.2620134510969719e-06, "log_odds_chosen": 1.4586212635040283, "log_odds_ratio": -0.224677175283432, "logits/chosen": -0.6979159116744995, "logits/rejected": -1.2649545669555664, "logps/chosen": -1.3683842420578003, "logps/rejected": -2.615034341812134, "loss": 1.4085, "nll_loss": 1.386067509651184, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368384212255478, "rewards/margins": 0.1246650293469429, "rewards/rejected": -0.2615034580230713, "step": 837 }, { "epoch": 1.3199369582348306, "grad_norm": 0.2725180685520172, "learning_rate": 1.2569070640875912e-06, "log_odds_chosen": 1.6334424018859863, "log_odds_ratio": -0.20868034660816193, "logits/chosen": -0.5379756689071655, "logits/rejected": -1.443952202796936, "logps/chosen": -1.346409797668457, "logps/rejected": -2.7176432609558105, "loss": 1.3902, "nll_loss": 1.369292974472046, "rewards/accuracies": 1.0, "rewards/chosen": -0.13464096188545227, "rewards/margins": 0.13712337613105774, "rewards/rejected": -0.27176433801651, "step": 838 }, { "epoch": 1.3215130023640662, "grad_norm": 0.42804601788520813, "learning_rate": 1.2518062907904136e-06, "log_odds_chosen": 1.711381435394287, "log_odds_ratio": -0.18208424746990204, "logits/chosen": -0.6807981729507446, "logits/rejected": -1.630470871925354, "logps/chosen": -1.4140769243240356, "logps/rejected": -2.9069175720214844, "loss": 1.4442, "nll_loss": 1.4259682893753052, "rewards/accuracies": 1.0, "rewards/chosen": -0.14140769839286804, "rewards/margins": 0.14928403496742249, "rewards/rejected": -0.2906917333602905, "step": 839 }, { "epoch": 1.3230890464933018, "grad_norm": 0.24280807375907898, "learning_rate": 1.2467111697393446e-06, "log_odds_chosen": 1.4815362691879272, "log_odds_ratio": -0.21086756885051727, "logits/chosen": -0.6065419912338257, "logits/rejected": -1.6878957748413086, "logps/chosen": -1.5209952592849731, "logps/rejected": -2.8176212310791016, "loss": 1.5441, "nll_loss": 1.5230549573898315, "rewards/accuracies": 1.0, "rewards/chosen": -0.15209950506687164, "rewards/margins": 0.12966260313987732, "rewards/rejected": -0.28176212310791016, "step": 840 }, { "epoch": 1.3246650906225375, "grad_norm": 0.23113702237606049, "learning_rate": 1.2416217394255905e-06, "log_odds_chosen": 1.3655986785888672, "log_odds_ratio": -0.23284479975700378, "logits/chosen": -0.5418767929077148, "logits/rejected": -1.2516670227050781, "logps/chosen": -1.3566389083862305, "logps/rejected": -2.5103282928466797, "loss": 1.4175, "nll_loss": 1.394230842590332, "rewards/accuracies": 1.0, "rewards/chosen": -0.13566389679908752, "rewards/margins": 0.11536893248558044, "rewards/rejected": -0.25103282928466797, "step": 841 }, { "epoch": 1.326241134751773, "grad_norm": 0.27183687686920166, "learning_rate": 1.2365380382973669e-06, "log_odds_chosen": 1.2834677696228027, "log_odds_ratio": -0.2571195662021637, "logits/chosen": -0.6271907091140747, "logits/rejected": -1.198774814605713, "logps/chosen": -1.5170949697494507, "logps/rejected": -2.6317522525787354, "loss": 1.5319, "nll_loss": 1.5062181949615479, "rewards/accuracies": 1.0, "rewards/chosen": -0.15170949697494507, "rewards/margins": 0.11146571487188339, "rewards/rejected": -0.26317524909973145, "step": 842 }, { "epoch": 1.3278171788810087, "grad_norm": 0.24289065599441528, "learning_rate": 1.2314601047596061e-06, "log_odds_chosen": 1.746811032295227, "log_odds_ratio": -0.1659754067659378, "logits/chosen": -0.5585183501243591, "logits/rejected": -1.6068834066390991, "logps/chosen": -1.4367531538009644, "logps/rejected": -2.9650981426239014, "loss": 1.4522, "nll_loss": 1.4355918169021606, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436753123998642, "rewards/margins": 0.15283450484275818, "rewards/rejected": -0.2965098023414612, "step": 843 }, { "epoch": 1.3293932230102443, "grad_norm": 0.42870867252349854, "learning_rate": 1.2263879771736713e-06, "log_odds_chosen": 1.3868666887283325, "log_odds_ratio": -0.23231098055839539, "logits/chosen": -0.6453801989555359, "logits/rejected": -1.5813323259353638, "logps/chosen": -1.4578145742416382, "logps/rejected": -2.6517040729522705, "loss": 1.4862, "nll_loss": 1.462958812713623, "rewards/accuracies": 1.0, "rewards/chosen": -0.145781472325325, "rewards/margins": 0.11938894540071487, "rewards/rejected": -0.2651704251766205, "step": 844 }, { "epoch": 1.33096926713948, "grad_norm": 0.24256369471549988, "learning_rate": 1.2213216938570642e-06, "log_odds_chosen": 1.5715909004211426, "log_odds_ratio": -0.21311572194099426, "logits/chosen": -0.6242514848709106, "logits/rejected": -1.4461240768432617, "logps/chosen": -1.3803417682647705, "logps/rejected": -2.7272305488586426, "loss": 1.4229, "nll_loss": 1.401560664176941, "rewards/accuracies": 1.0, "rewards/chosen": -0.1380341649055481, "rewards/margins": 0.13468888401985168, "rewards/rejected": -0.27272307872772217, "step": 845 }, { "epoch": 1.3325453112687156, "grad_norm": 0.23952677845954895, "learning_rate": 1.2162612930831354e-06, "log_odds_chosen": 1.8564397096633911, "log_odds_ratio": -0.16202743351459503, "logits/chosen": -0.6841865181922913, "logits/rejected": -1.628495454788208, "logps/chosen": -1.2810691595077515, "logps/rejected": -2.8716635704040527, "loss": 1.3222, "nll_loss": 1.306032419204712, "rewards/accuracies": 1.0, "rewards/chosen": -0.12810692191123962, "rewards/margins": 0.15905943512916565, "rewards/rejected": -0.2871663570404053, "step": 846 }, { "epoch": 1.3341213553979512, "grad_norm": 0.2226683795452118, "learning_rate": 1.2112068130807949e-06, "log_odds_chosen": 1.9628236293792725, "log_odds_ratio": -0.13985173404216766, "logits/chosen": -0.6319723725318909, "logits/rejected": -1.795454978942871, "logps/chosen": -1.4285566806793213, "logps/rejected": -3.163897752761841, "loss": 1.4383, "nll_loss": 1.4242912530899048, "rewards/accuracies": 1.0, "rewards/chosen": -0.1428556740283966, "rewards/margins": 0.173534095287323, "rewards/rejected": -0.3163897693157196, "step": 847 }, { "epoch": 1.3356973995271868, "grad_norm": 0.26131734251976013, "learning_rate": 1.206158292034226e-06, "log_odds_chosen": 1.6567684412002563, "log_odds_ratio": -0.18120090663433075, "logits/chosen": -0.6492166519165039, "logits/rejected": -1.5648795366287231, "logps/chosen": -1.4466544389724731, "logps/rejected": -2.8918793201446533, "loss": 1.4714, "nll_loss": 1.4533123970031738, "rewards/accuracies": 1.0, "rewards/chosen": -0.1446654498577118, "rewards/margins": 0.1445225179195404, "rewards/rejected": -0.2891879677772522, "step": 848 }, { "epoch": 1.3372734436564224, "grad_norm": 0.253273069858551, "learning_rate": 1.2011157680825928e-06, "log_odds_chosen": 1.6794590950012207, "log_odds_ratio": -0.18548327684402466, "logits/chosen": -0.5395171046257019, "logits/rejected": -1.617524266242981, "logps/chosen": -1.3760827779769897, "logps/rejected": -2.8302364349365234, "loss": 1.4092, "nll_loss": 1.3906164169311523, "rewards/accuracies": 1.0, "rewards/chosen": -0.13760828971862793, "rewards/margins": 0.14541538059711456, "rewards/rejected": -0.2830236554145813, "step": 849 }, { "epoch": 1.338849487785658, "grad_norm": 0.24384582042694092, "learning_rate": 1.1960792793197553e-06, "log_odds_chosen": 1.6050926446914673, "log_odds_ratio": -0.1991470605134964, "logits/chosen": -0.707046389579773, "logits/rejected": -1.533729076385498, "logps/chosen": -1.427878975868225, "logps/rejected": -2.820812225341797, "loss": 1.4542, "nll_loss": 1.43429696559906, "rewards/accuracies": 1.0, "rewards/chosen": -0.14278791844844818, "rewards/margins": 0.13929331302642822, "rewards/rejected": -0.2820812165737152, "step": 850 }, { "epoch": 1.3404255319148937, "grad_norm": 0.24895353615283966, "learning_rate": 1.1910488637939824e-06, "log_odds_chosen": 1.9199038743972778, "log_odds_ratio": -0.1412464827299118, "logits/chosen": -0.7576161623001099, "logits/rejected": -1.6525764465332031, "logps/chosen": -1.4692214727401733, "logps/rejected": -3.17101788520813, "loss": 1.4715, "nll_loss": 1.4573686122894287, "rewards/accuracies": 1.0, "rewards/chosen": -0.14692214131355286, "rewards/margins": 0.17017965018749237, "rewards/rejected": -0.31710177659988403, "step": 851 }, { "epoch": 1.3420015760441293, "grad_norm": 0.26812073588371277, "learning_rate": 1.1860245595076582e-06, "log_odds_chosen": 1.313482642173767, "log_odds_ratio": -0.24426017701625824, "logits/chosen": -0.52881920337677, "logits/rejected": -1.493924617767334, "logps/chosen": -1.5094387531280518, "logps/rejected": -2.6473355293273926, "loss": 1.514, "nll_loss": 1.489540696144104, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094387531280518, "rewards/margins": 0.11378967761993408, "rewards/rejected": -0.26473355293273926, "step": 852 }, { "epoch": 1.343577620173365, "grad_norm": 0.2410273551940918, "learning_rate": 1.1810064044170027e-06, "log_odds_chosen": 1.636589765548706, "log_odds_ratio": -0.19249330461025238, "logits/chosen": -0.6974166035652161, "logits/rejected": -1.5706920623779297, "logps/chosen": -1.39455246925354, "logps/rejected": -2.8113789558410645, "loss": 1.4253, "nll_loss": 1.4060957431793213, "rewards/accuracies": 1.0, "rewards/chosen": -0.13945524394512177, "rewards/margins": 0.1416826695203781, "rewards/rejected": -0.2811379134654999, "step": 853 }, { "epoch": 1.3451536643026005, "grad_norm": 0.2574176490306854, "learning_rate": 1.1759944364317812e-06, "log_odds_chosen": 1.603165626525879, "log_odds_ratio": -0.19906370341777802, "logits/chosen": -0.6916913986206055, "logits/rejected": -1.5008288621902466, "logps/chosen": -1.4492762088775635, "logps/rejected": -2.8470959663391113, "loss": 1.4688, "nll_loss": 1.4488977193832397, "rewards/accuracies": 1.0, "rewards/chosen": -0.14492763578891754, "rewards/margins": 0.13978195190429688, "rewards/rejected": -0.2847095727920532, "step": 854 }, { "epoch": 1.3467297084318361, "grad_norm": 0.2300931215286255, "learning_rate": 1.1709886934150172e-06, "log_odds_chosen": 1.4698625802993774, "log_odds_ratio": -0.21434006094932556, "logits/chosen": -0.566104531288147, "logits/rejected": -1.3819411993026733, "logps/chosen": -1.471221923828125, "logps/rejected": -2.7428500652313232, "loss": 1.5042, "nll_loss": 1.4827244281768799, "rewards/accuracies": 1.0, "rewards/chosen": -0.14712218940258026, "rewards/margins": 0.12716282904148102, "rewards/rejected": -0.2742850184440613, "step": 855 }, { "epoch": 1.3483057525610718, "grad_norm": 0.25239959359169006, "learning_rate": 1.1659892131827097e-06, "log_odds_chosen": 1.6053402423858643, "log_odds_ratio": -0.1971941441297531, "logits/chosen": -0.6730937361717224, "logits/rejected": -1.7223103046417236, "logps/chosen": -1.4930871725082397, "logps/rejected": -2.903635025024414, "loss": 1.5094, "nll_loss": 1.489640474319458, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493087261915207, "rewards/margins": 0.14105476438999176, "rewards/rejected": -0.29036349058151245, "step": 856 }, { "epoch": 1.3498817966903074, "grad_norm": 0.21717876195907593, "learning_rate": 1.1609960335035423e-06, "log_odds_chosen": 2.0900535583496094, "log_odds_ratio": -0.12933163344860077, "logits/chosen": -0.642082691192627, "logits/rejected": -1.740064263343811, "logps/chosen": -1.4793895483016968, "logps/rejected": -3.3437135219573975, "loss": 1.4977, "nll_loss": 1.4847984313964844, "rewards/accuracies": 1.0, "rewards/chosen": -0.14793896675109863, "rewards/margins": 0.1864323616027832, "rewards/rejected": -0.33437132835388184, "step": 857 }, { "epoch": 1.351457840819543, "grad_norm": 0.24051296710968018, "learning_rate": 1.1560091920986028e-06, "log_odds_chosen": 1.7658697366714478, "log_odds_ratio": -0.1659710705280304, "logits/chosen": -0.642181932926178, "logits/rejected": -1.6060584783554077, "logps/chosen": -1.3994590044021606, "logps/rejected": -2.937842607498169, "loss": 1.4321, "nll_loss": 1.4154534339904785, "rewards/accuracies": 1.0, "rewards/chosen": -0.13994590938091278, "rewards/margins": 0.1538383513689041, "rewards/rejected": -0.2937842607498169, "step": 858 }, { "epoch": 1.3530338849487786, "grad_norm": 0.23283112049102783, "learning_rate": 1.1510287266410967e-06, "log_odds_chosen": 1.4090423583984375, "log_odds_ratio": -0.2285042703151703, "logits/chosen": -0.6504819989204407, "logits/rejected": -1.4080336093902588, "logps/chosen": -1.5133273601531982, "logps/rejected": -2.743722915649414, "loss": 1.5328, "nll_loss": 1.5099424123764038, "rewards/accuracies": 1.0, "rewards/chosen": -0.15133275091648102, "rewards/margins": 0.12303955107927322, "rewards/rejected": -0.27437227964401245, "step": 859 }, { "epoch": 1.3546099290780143, "grad_norm": 0.2225169539451599, "learning_rate": 1.1460546747560616e-06, "log_odds_chosen": 1.8164125680923462, "log_odds_ratio": -0.15852658450603485, "logits/chosen": -0.6664764285087585, "logits/rejected": -1.714491367340088, "logps/chosen": -1.4432940483093262, "logps/rejected": -3.0411689281463623, "loss": 1.4546, "nll_loss": 1.438779592514038, "rewards/accuracies": 1.0, "rewards/chosen": -0.14432939887046814, "rewards/margins": 0.15978752076625824, "rewards/rejected": -0.3041169047355652, "step": 860 }, { "epoch": 1.3561859732072499, "grad_norm": 0.2316436767578125, "learning_rate": 1.1410870740200839e-06, "log_odds_chosen": 1.617644190788269, "log_odds_ratio": -0.20058351755142212, "logits/chosen": -0.6289576292037964, "logits/rejected": -1.5740225315093994, "logps/chosen": -1.493541955947876, "logps/rejected": -2.917174816131592, "loss": 1.5095, "nll_loss": 1.489485263824463, "rewards/accuracies": 1.0, "rewards/chosen": -0.14935418963432312, "rewards/margins": 0.1423633098602295, "rewards/rejected": -0.2917174994945526, "step": 861 }, { "epoch": 1.3577620173364855, "grad_norm": 0.24486534297466278, "learning_rate": 1.1361259619610138e-06, "log_odds_chosen": 1.665561318397522, "log_odds_ratio": -0.1871521770954132, "logits/chosen": -0.6418501734733582, "logits/rejected": -1.6335361003875732, "logps/chosen": -1.431355595588684, "logps/rejected": -2.87868332862854, "loss": 1.4629, "nll_loss": 1.4441808462142944, "rewards/accuracies": 1.0, "rewards/chosen": -0.14313554763793945, "rewards/margins": 0.14473280310630798, "rewards/rejected": -0.28786835074424744, "step": 862 }, { "epoch": 1.3593380614657211, "grad_norm": 0.7537965178489685, "learning_rate": 1.1311713760576834e-06, "log_odds_chosen": 1.709722638130188, "log_odds_ratio": -0.1851150393486023, "logits/chosen": -0.7632265686988831, "logits/rejected": -1.3779895305633545, "logps/chosen": -1.4145159721374512, "logps/rejected": -2.9023544788360596, "loss": 1.4244, "nll_loss": 1.4058459997177124, "rewards/accuracies": 1.0, "rewards/chosen": -0.14145159721374512, "rewards/margins": 0.1487838327884674, "rewards/rejected": -0.2902354598045349, "step": 863 }, { "epoch": 1.3609141055949567, "grad_norm": 0.24928341805934906, "learning_rate": 1.1262233537396228e-06, "log_odds_chosen": 1.9565744400024414, "log_odds_ratio": -0.1643376350402832, "logits/chosen": -0.5985521078109741, "logits/rejected": -1.5526808500289917, "logps/chosen": -1.390761375427246, "logps/rejected": -3.1119015216827393, "loss": 1.4031, "nll_loss": 1.3867006301879883, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390761286020279, "rewards/margins": 0.1721140295267105, "rewards/rejected": -0.3111901879310608, "step": 864 }, { "epoch": 1.3624901497241924, "grad_norm": 0.22759051620960236, "learning_rate": 1.1212819323867778e-06, "log_odds_chosen": 1.6447187662124634, "log_odds_ratio": -0.19922199845314026, "logits/chosen": -0.6582179665565491, "logits/rejected": -1.5183279514312744, "logps/chosen": -1.3644665479660034, "logps/rejected": -2.7891225814819336, "loss": 1.4077, "nll_loss": 1.387730598449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.13644665479660034, "rewards/margins": 0.14246559143066406, "rewards/rejected": -0.2789122462272644, "step": 865 }, { "epoch": 1.364066193853428, "grad_norm": 0.24368128180503845, "learning_rate": 1.1163471493292267e-06, "log_odds_chosen": 1.6180881261825562, "log_odds_ratio": -0.19148674607276917, "logits/chosen": -0.6676050424575806, "logits/rejected": -1.5187058448791504, "logps/chosen": -1.446605920791626, "logps/rejected": -2.8559176921844482, "loss": 1.4644, "nll_loss": 1.4452784061431885, "rewards/accuracies": 1.0, "rewards/chosen": -0.1446605622768402, "rewards/margins": 0.14093118906021118, "rewards/rejected": -0.2855917513370514, "step": 866 }, { "epoch": 1.3656422379826636, "grad_norm": 0.21349558234214783, "learning_rate": 1.1114190418468972e-06, "log_odds_chosen": 1.581590175628662, "log_odds_ratio": -0.20506787300109863, "logits/chosen": -0.5727652311325073, "logits/rejected": -1.5056498050689697, "logps/chosen": -1.466938853263855, "logps/rejected": -2.853933811187744, "loss": 1.5026, "nll_loss": 1.4820456504821777, "rewards/accuracies": 1.0, "rewards/chosen": -0.1466939002275467, "rewards/margins": 0.138699471950531, "rewards/rejected": -0.2853933870792389, "step": 867 }, { "epoch": 1.367218282111899, "grad_norm": 0.23098094761371613, "learning_rate": 1.106497647169288e-06, "log_odds_chosen": 1.7774879932403564, "log_odds_ratio": -0.1619657725095749, "logits/chosen": -0.5783388018608093, "logits/rejected": -1.6011881828308105, "logps/chosen": -1.392938494682312, "logps/rejected": -2.9401845932006836, "loss": 1.4095, "nll_loss": 1.3932610750198364, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392938792705536, "rewards/margins": 0.15472456812858582, "rewards/rejected": -0.2940184473991394, "step": 868 }, { "epoch": 1.3687943262411348, "grad_norm": 0.22277240455150604, "learning_rate": 1.1015830024751854e-06, "log_odds_chosen": 1.5932520627975464, "log_odds_ratio": -0.19965462386608124, "logits/chosen": -0.7604570388793945, "logits/rejected": -1.3756550550460815, "logps/chosen": -1.3519713878631592, "logps/rejected": -2.720393180847168, "loss": 1.3854, "nll_loss": 1.3654124736785889, "rewards/accuracies": 1.0, "rewards/chosen": -0.13519714772701263, "rewards/margins": 0.13684219121932983, "rewards/rejected": -0.2720393240451813, "step": 869 }, { "epoch": 1.3703703703703702, "grad_norm": 0.22521238029003143, "learning_rate": 1.0966751448923834e-06, "log_odds_chosen": 2.0099873542785645, "log_odds_ratio": -0.1357879936695099, "logits/chosen": -0.5918890833854675, "logits/rejected": -1.5549322366714478, "logps/chosen": -1.3542336225509644, "logps/rejected": -3.1139070987701416, "loss": 1.3828, "nll_loss": 1.3691737651824951, "rewards/accuracies": 1.0, "rewards/chosen": -0.13542336225509644, "rewards/margins": 0.17596739530563354, "rewards/rejected": -0.31139075756073, "step": 870 }, { "epoch": 1.371946414499606, "grad_norm": 0.24158324301242828, "learning_rate": 1.0917741114974007e-06, "log_odds_chosen": 1.520578145980835, "log_odds_ratio": -0.2033848613500595, "logits/chosen": -0.5592161417007446, "logits/rejected": -1.544834017753601, "logps/chosen": -1.4935863018035889, "logps/rejected": -2.8185956478118896, "loss": 1.5161, "nll_loss": 1.4957417249679565, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493586301803589, "rewards/margins": 0.13250091671943665, "rewards/rejected": -0.28185954689979553, "step": 871 }, { "epoch": 1.3735224586288415, "grad_norm": 0.2237880378961563, "learning_rate": 1.0868799393152035e-06, "log_odds_chosen": 1.8863072395324707, "log_odds_ratio": -0.1634805053472519, "logits/chosen": -0.7058467268943787, "logits/rejected": -1.620200276374817, "logps/chosen": -1.3770359754562378, "logps/rejected": -3.026153087615967, "loss": 1.4051, "nll_loss": 1.3887983560562134, "rewards/accuracies": 1.0, "rewards/chosen": -0.13770359754562378, "rewards/margins": 0.16491171717643738, "rewards/rejected": -0.30261531472206116, "step": 872 }, { "epoch": 1.3750985027580773, "grad_norm": 0.28563395142555237, "learning_rate": 1.0819926653189271e-06, "log_odds_chosen": 1.5606904029846191, "log_odds_ratio": -0.2044568657875061, "logits/chosen": -0.6395079493522644, "logits/rejected": -1.3664402961730957, "logps/chosen": -1.4640522003173828, "logps/rejected": -2.8246538639068604, "loss": 1.4915, "nll_loss": 1.471075177192688, "rewards/accuracies": 1.0, "rewards/chosen": -0.14640523493289948, "rewards/margins": 0.13606014847755432, "rewards/rejected": -0.282465398311615, "step": 873 }, { "epoch": 1.3766745468873127, "grad_norm": 0.21589653193950653, "learning_rate": 1.0771123264295895e-06, "log_odds_chosen": 1.5921666622161865, "log_odds_ratio": -0.19474714994430542, "logits/chosen": -0.6634510159492493, "logits/rejected": -1.6877684593200684, "logps/chosen": -1.3579119443893433, "logps/rejected": -2.720470905303955, "loss": 1.3885, "nll_loss": 1.3689954280853271, "rewards/accuracies": 1.0, "rewards/chosen": -0.13579118251800537, "rewards/margins": 0.1362559199333191, "rewards/rejected": -0.27204710245132446, "step": 874 }, { "epoch": 1.3782505910165486, "grad_norm": 0.22386837005615234, "learning_rate": 1.0722389595158215e-06, "log_odds_chosen": 1.8254528045654297, "log_odds_ratio": -0.16639356315135956, "logits/chosen": -0.5494032502174377, "logits/rejected": -1.5416202545166016, "logps/chosen": -1.3461651802062988, "logps/rejected": -2.931638717651367, "loss": 1.382, "nll_loss": 1.3653497695922852, "rewards/accuracies": 1.0, "rewards/chosen": -0.13461652398109436, "rewards/margins": 0.15854734182357788, "rewards/rejected": -0.29316386580467224, "step": 875 }, { "epoch": 1.379826635145784, "grad_norm": 0.2681967318058014, "learning_rate": 1.0673726013935827e-06, "log_odds_chosen": 1.7635384798049927, "log_odds_ratio": -0.19242730736732483, "logits/chosen": -0.5731449723243713, "logits/rejected": -1.4597039222717285, "logps/chosen": -1.3603630065917969, "logps/rejected": -2.890061140060425, "loss": 1.3968, "nll_loss": 1.3775389194488525, "rewards/accuracies": 1.0, "rewards/chosen": -0.13603630661964417, "rewards/margins": 0.1529698371887207, "rewards/rejected": -0.28900614380836487, "step": 876 }, { "epoch": 1.3814026792750198, "grad_norm": 0.22812511026859283, "learning_rate": 1.0625132888258833e-06, "log_odds_chosen": 1.6586129665374756, "log_odds_ratio": -0.19311630725860596, "logits/chosen": -0.701583206653595, "logits/rejected": -1.421006679534912, "logps/chosen": -1.4265589714050293, "logps/rejected": -2.8771860599517822, "loss": 1.4502, "nll_loss": 1.4308946132659912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1426558941602707, "rewards/margins": 0.14506272971630096, "rewards/rejected": -0.28771862387657166, "step": 877 }, { "epoch": 1.3829787234042552, "grad_norm": 0.2343727946281433, "learning_rate": 1.057661058522509e-06, "log_odds_chosen": 1.8158915042877197, "log_odds_ratio": -0.15713225305080414, "logits/chosen": -0.6596369743347168, "logits/rejected": -1.681816816329956, "logps/chosen": -1.4494779109954834, "logps/rejected": -3.045167922973633, "loss": 1.4767, "nll_loss": 1.4609739780426025, "rewards/accuracies": 1.0, "rewards/chosen": -0.144947811961174, "rewards/margins": 0.15956899523735046, "rewards/rejected": -0.3045167922973633, "step": 878 }, { "epoch": 1.384554767533491, "grad_norm": 0.5640384554862976, "learning_rate": 1.0528159471397425e-06, "log_odds_chosen": 1.5712882280349731, "log_odds_ratio": -0.20260006189346313, "logits/chosen": -0.626277506351471, "logits/rejected": -1.5943480730056763, "logps/chosen": -1.3989372253417969, "logps/rejected": -2.756653070449829, "loss": 1.4339, "nll_loss": 1.4136770963668823, "rewards/accuracies": 1.0, "rewards/chosen": -0.13989374041557312, "rewards/margins": 0.13577157258987427, "rewards/rejected": -0.275665283203125, "step": 879 }, { "epoch": 1.3861308116627264, "grad_norm": 0.2508867383003235, "learning_rate": 1.0479779912800868e-06, "log_odds_chosen": 1.6103177070617676, "log_odds_ratio": -0.19111379981040955, "logits/chosen": -0.758451521396637, "logits/rejected": -1.5467413663864136, "logps/chosen": -1.3675017356872559, "logps/rejected": -2.7500529289245605, "loss": 1.4019, "nll_loss": 1.382838249206543, "rewards/accuracies": 1.0, "rewards/chosen": -0.13675017654895782, "rewards/margins": 0.13825511932373047, "rewards/rejected": -0.2750052809715271, "step": 880 }, { "epoch": 1.3877068557919623, "grad_norm": 0.22995373606681824, "learning_rate": 1.0431472274919863e-06, "log_odds_chosen": 1.8982397317886353, "log_odds_ratio": -0.1501951813697815, "logits/chosen": -0.547713041305542, "logits/rejected": -1.669274926185608, "logps/chosen": -1.3867900371551514, "logps/rejected": -3.0491251945495605, "loss": 1.4107, "nll_loss": 1.395638108253479, "rewards/accuracies": 1.0, "rewards/chosen": -0.13867899775505066, "rewards/margins": 0.16623355448246002, "rewards/rejected": -0.3049125671386719, "step": 881 }, { "epoch": 1.3892828999211977, "grad_norm": 0.24104370176792145, "learning_rate": 1.0383236922695543e-06, "log_odds_chosen": 1.910417914390564, "log_odds_ratio": -0.1467703878879547, "logits/chosen": -0.4995293915271759, "logits/rejected": -1.5275884866714478, "logps/chosen": -1.2323276996612549, "logps/rejected": -2.8423500061035156, "loss": 1.2803, "nll_loss": 1.2656131982803345, "rewards/accuracies": 1.0, "rewards/chosen": -0.12323278933763504, "rewards/margins": 0.16100221872329712, "rewards/rejected": -0.28423500061035156, "step": 882 }, { "epoch": 1.3908589440504335, "grad_norm": 0.25414207577705383, "learning_rate": 1.0335074220522962e-06, "log_odds_chosen": 1.8656892776489258, "log_odds_ratio": -0.16491052508354187, "logits/chosen": -0.6115697026252747, "logits/rejected": -1.4091947078704834, "logps/chosen": -1.4603948593139648, "logps/rejected": -3.113933563232422, "loss": 1.4856, "nll_loss": 1.4691309928894043, "rewards/accuracies": 1.0, "rewards/chosen": -0.14603950083255768, "rewards/margins": 0.16535384953022003, "rewards/rejected": -0.3113933503627777, "step": 883 }, { "epoch": 1.392434988179669, "grad_norm": 0.26081377267837524, "learning_rate": 1.0286984532248326e-06, "log_odds_chosen": 1.367045521736145, "log_odds_ratio": -0.24221490323543549, "logits/chosen": -0.5516694188117981, "logits/rejected": -1.562090277671814, "logps/chosen": -1.4935163259506226, "logps/rejected": -2.6765871047973633, "loss": 1.5151, "nll_loss": 1.4908676147460938, "rewards/accuracies": 1.0, "rewards/chosen": -0.14935162663459778, "rewards/margins": 0.11830709874629974, "rewards/rejected": -0.2676587402820587, "step": 884 }, { "epoch": 1.3940110323089048, "grad_norm": 0.2518186867237091, "learning_rate": 1.0238968221166269e-06, "log_odds_chosen": 1.606108546257019, "log_odds_ratio": -0.20955884456634521, "logits/chosen": -0.6107924580574036, "logits/rejected": -1.5653332471847534, "logps/chosen": -1.451974868774414, "logps/rejected": -2.859508991241455, "loss": 1.4846, "nll_loss": 1.4636868238449097, "rewards/accuracies": 1.0, "rewards/chosen": -0.14519749581813812, "rewards/margins": 0.1407533884048462, "rewards/rejected": -0.2859508693218231, "step": 885 }, { "epoch": 1.3955870764381402, "grad_norm": 0.23638294637203217, "learning_rate": 1.019102565001707e-06, "log_odds_chosen": 1.4686362743377686, "log_odds_ratio": -0.22250008583068848, "logits/chosen": -0.585566520690918, "logits/rejected": -1.3059039115905762, "logps/chosen": -1.4826006889343262, "logps/rejected": -2.76189923286438, "loss": 1.507, "nll_loss": 1.484725832939148, "rewards/accuracies": 1.0, "rewards/chosen": -0.14826007187366486, "rewards/margins": 0.12792986631393433, "rewards/rejected": -0.276189923286438, "step": 886 }, { "epoch": 1.397163120567376, "grad_norm": 0.2579563856124878, "learning_rate": 1.0143157180983965e-06, "log_odds_chosen": 1.5000890493392944, "log_odds_ratio": -0.2140539139509201, "logits/chosen": -0.6484724283218384, "logits/rejected": -1.4389346837997437, "logps/chosen": -1.4671251773834229, "logps/rejected": -2.7712459564208984, "loss": 1.4666, "nll_loss": 1.4452418088912964, "rewards/accuracies": 1.0, "rewards/chosen": -0.1467125117778778, "rewards/margins": 0.13041208684444427, "rewards/rejected": -0.2771245837211609, "step": 887 }, { "epoch": 1.3987391646966114, "grad_norm": 0.3018769919872284, "learning_rate": 1.0095363175690375e-06, "log_odds_chosen": 1.5418834686279297, "log_odds_ratio": -0.20728465914726257, "logits/chosen": -0.6682940125465393, "logits/rejected": -1.2616838216781616, "logps/chosen": -1.4611940383911133, "logps/rejected": -2.8027215003967285, "loss": 1.4726, "nll_loss": 1.4518331289291382, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461194008588791, "rewards/margins": 0.13415274024009705, "rewards/rejected": -0.28027215600013733, "step": 888 }, { "epoch": 1.4003152088258473, "grad_norm": 0.2607669234275818, "learning_rate": 1.0047643995197178e-06, "log_odds_chosen": 1.623878836631775, "log_odds_ratio": -0.1956598311662674, "logits/chosen": -0.5829888582229614, "logits/rejected": -1.5397287607192993, "logps/chosen": -1.3773345947265625, "logps/rejected": -2.7731235027313232, "loss": 1.403, "nll_loss": 1.383469581604004, "rewards/accuracies": 1.0, "rewards/chosen": -0.13773347437381744, "rewards/margins": 0.1395788937807083, "rewards/rejected": -0.27731236815452576, "step": 889 }, { "epoch": 1.4018912529550827, "grad_norm": 0.25077128410339355, "learning_rate": 1.0000000000000004e-06, "log_odds_chosen": 1.5483626127243042, "log_odds_ratio": -0.20669405162334442, "logits/chosen": -0.5734342336654663, "logits/rejected": -1.4856351613998413, "logps/chosen": -1.424758791923523, "logps/rejected": -2.760406255722046, "loss": 1.4639, "nll_loss": 1.4432692527770996, "rewards/accuracies": 1.0, "rewards/chosen": -0.142475888133049, "rewards/margins": 0.13356474041938782, "rewards/rejected": -0.27604061365127563, "step": 890 }, { "epoch": 1.4034672970843185, "grad_norm": 0.22610604763031006, "learning_rate": 9.952431550026459e-07, "log_odds_chosen": 1.6889674663543701, "log_odds_ratio": -0.18497657775878906, "logits/chosen": -0.7005455493927002, "logits/rejected": -1.4289085865020752, "logps/chosen": -1.4461010694503784, "logps/rejected": -2.9273979663848877, "loss": 1.4737, "nll_loss": 1.455183744430542, "rewards/accuracies": 1.0, "rewards/chosen": -0.14461010694503784, "rewards/margins": 0.14812970161437988, "rewards/rejected": -0.2927398085594177, "step": 891 }, { "epoch": 1.405043341213554, "grad_norm": 0.24834203720092773, "learning_rate": 9.904939004633471e-07, "log_odds_chosen": 1.790747046470642, "log_odds_ratio": -0.1575406938791275, "logits/chosen": -0.7561191916465759, "logits/rejected": -1.595555305480957, "logps/chosen": -1.5042771100997925, "logps/rejected": -3.087547540664673, "loss": 1.5081, "nll_loss": 1.4923908710479736, "rewards/accuracies": 1.0, "rewards/chosen": -0.1504276990890503, "rewards/margins": 0.15832704305648804, "rewards/rejected": -0.30875474214553833, "step": 892 }, { "epoch": 1.4066193853427895, "grad_norm": 0.2530110776424408, "learning_rate": 9.857522722604536e-07, "log_odds_chosen": 1.5290027856826782, "log_odds_ratio": -0.22384284436702728, "logits/chosen": -0.6061379909515381, "logits/rejected": -1.227806806564331, "logps/chosen": -1.3785853385925293, "logps/rejected": -2.6948788166046143, "loss": 1.424, "nll_loss": 1.4015873670578003, "rewards/accuracies": 1.0, "rewards/chosen": -0.1378585398197174, "rewards/margins": 0.1316293627023697, "rewards/rejected": -0.2694878876209259, "step": 893 }, { "epoch": 1.4081954294720251, "grad_norm": 0.2474088817834854, "learning_rate": 9.81018306214702e-07, "log_odds_chosen": 1.502267599105835, "log_odds_ratio": -0.2099406123161316, "logits/chosen": -0.6570034027099609, "logits/rejected": -1.3393932580947876, "logps/chosen": -1.3909960985183716, "logps/rejected": -2.6762640476226807, "loss": 1.4395, "nll_loss": 1.4185012578964233, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390996128320694, "rewards/margins": 0.12852680683135986, "rewards/rejected": -0.26762640476226807, "step": 894 }, { "epoch": 1.4097714736012608, "grad_norm": 0.33020880818367004, "learning_rate": 9.76292038088945e-07, "log_odds_chosen": 1.5085315704345703, "log_odds_ratio": -0.20608478784561157, "logits/chosen": -0.5883653163909912, "logits/rejected": -1.3349723815917969, "logps/chosen": -1.4683583974838257, "logps/rejected": -2.7802469730377197, "loss": 1.4866, "nll_loss": 1.4659663438796997, "rewards/accuracies": 1.0, "rewards/chosen": -0.14683584868907928, "rewards/margins": 0.13118883967399597, "rewards/rejected": -0.27802467346191406, "step": 895 }, { "epoch": 1.4113475177304964, "grad_norm": 0.44886964559555054, "learning_rate": 9.715735035878799e-07, "log_odds_chosen": 1.7419822216033936, "log_odds_ratio": -0.17037026584148407, "logits/chosen": -0.6762113571166992, "logits/rejected": -1.5708619356155396, "logps/chosen": -1.3584445714950562, "logps/rejected": -2.8568472862243652, "loss": 1.3838, "nll_loss": 1.3667407035827637, "rewards/accuracies": 1.0, "rewards/chosen": -0.13584445416927338, "rewards/margins": 0.14984026551246643, "rewards/rejected": -0.285684734582901, "step": 896 }, { "epoch": 1.412923561859732, "grad_norm": 0.23925159871578217, "learning_rate": 9.668627383577812e-07, "log_odds_chosen": 1.632826805114746, "log_odds_ratio": -0.21078652143478394, "logits/chosen": -0.6347646117210388, "logits/rejected": -1.6543445587158203, "logps/chosen": -1.4457504749298096, "logps/rejected": -2.8781142234802246, "loss": 1.4757, "nll_loss": 1.454664945602417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1445750594139099, "rewards/margins": 0.14323639869689941, "rewards/rejected": -0.2878114581108093, "step": 897 }, { "epoch": 1.4144996059889676, "grad_norm": 0.26766297221183777, "learning_rate": 9.621597779862307e-07, "log_odds_chosen": 1.3921657800674438, "log_odds_ratio": -0.23004251718521118, "logits/chosen": -0.708504319190979, "logits/rejected": -1.4522309303283691, "logps/chosen": -1.4511884450912476, "logps/rejected": -2.647890329360962, "loss": 1.4749, "nll_loss": 1.4518711566925049, "rewards/accuracies": 1.0, "rewards/chosen": -0.1451188623905182, "rewards/margins": 0.11967018246650696, "rewards/rejected": -0.26478904485702515, "step": 898 }, { "epoch": 1.4160756501182032, "grad_norm": 0.2688276171684265, "learning_rate": 9.57464658001848e-07, "log_odds_chosen": 1.5197861194610596, "log_odds_ratio": -0.20404323935508728, "logits/chosen": -0.6154606938362122, "logits/rejected": -1.4863282442092896, "logps/chosen": -1.457461953163147, "logps/rejected": -2.777696371078491, "loss": 1.4822, "nll_loss": 1.4618126153945923, "rewards/accuracies": 1.0, "rewards/chosen": -0.14574620127677917, "rewards/margins": 0.13202345371246338, "rewards/rejected": -0.27776965498924255, "step": 899 }, { "epoch": 1.4176516942474389, "grad_norm": 0.2558063268661499, "learning_rate": 9.527774138740212e-07, "log_odds_chosen": 1.5778982639312744, "log_odds_ratio": -0.19274799525737762, "logits/chosen": -0.7722602486610413, "logits/rejected": -1.5500413179397583, "logps/chosen": -1.417495608329773, "logps/rejected": -2.7712903022766113, "loss": 1.4513, "nll_loss": 1.4320013523101807, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417495608329773, "rewards/margins": 0.13537943363189697, "rewards/rejected": -0.27712899446487427, "step": 900 }, { "epoch": 1.4192277383766745, "grad_norm": 0.28994813561439514, "learning_rate": 9.480980810126411e-07, "log_odds_chosen": 1.7850899696350098, "log_odds_ratio": -0.19235913455486298, "logits/chosen": -0.7060902118682861, "logits/rejected": -1.4891314506530762, "logps/chosen": -1.3143898248672485, "logps/rejected": -2.8652384281158447, "loss": 1.3578, "nll_loss": 1.338534951210022, "rewards/accuracies": 1.0, "rewards/chosen": -0.1314389854669571, "rewards/margins": 0.15508489310741425, "rewards/rejected": -0.28652387857437134, "step": 901 }, { "epoch": 1.42080378250591, "grad_norm": 0.2505437433719635, "learning_rate": 9.434266947678324e-07, "log_odds_chosen": 1.6816164255142212, "log_odds_ratio": -0.19430740177631378, "logits/chosen": -0.6904462575912476, "logits/rejected": -1.5032645463943481, "logps/chosen": -1.4055613279342651, "logps/rejected": -2.8726818561553955, "loss": 1.4249, "nll_loss": 1.4054228067398071, "rewards/accuracies": 1.0, "rewards/chosen": -0.14055614173412323, "rewards/margins": 0.1467120200395584, "rewards/rejected": -0.28726816177368164, "step": 902 }, { "epoch": 1.4223798266351457, "grad_norm": 0.2679803669452667, "learning_rate": 9.387632904296872e-07, "log_odds_chosen": 2.116358518600464, "log_odds_ratio": -0.1409800499677658, "logits/chosen": -0.6455526351928711, "logits/rejected": -1.6516026258468628, "logps/chosen": -1.4351006746292114, "logps/rejected": -3.3209948539733887, "loss": 1.4499, "nll_loss": 1.4358484745025635, "rewards/accuracies": 1.0, "rewards/chosen": -0.14351005852222443, "rewards/margins": 0.188589408993721, "rewards/rejected": -0.3320994973182678, "step": 903 }, { "epoch": 1.4239558707643813, "grad_norm": 0.37360966205596924, "learning_rate": 9.341079032279986e-07, "log_odds_chosen": 1.7366825342178345, "log_odds_ratio": -0.1803555190563202, "logits/chosen": -0.589972972869873, "logits/rejected": -1.415221929550171, "logps/chosen": -1.393925428390503, "logps/rejected": -2.9021759033203125, "loss": 1.4264, "nll_loss": 1.408334732055664, "rewards/accuracies": 1.0, "rewards/chosen": -0.13939253985881805, "rewards/margins": 0.15082502365112305, "rewards/rejected": -0.2902175784111023, "step": 904 }, { "epoch": 1.425531914893617, "grad_norm": 0.2487158626317978, "learning_rate": 9.294605683319919e-07, "log_odds_chosen": 1.7900364398956299, "log_odds_ratio": -0.17004141211509705, "logits/chosen": -0.6846928596496582, "logits/rejected": -1.5274299383163452, "logps/chosen": -1.2867729663848877, "logps/rejected": -2.8194665908813477, "loss": 1.318, "nll_loss": 1.3010035753250122, "rewards/accuracies": 1.0, "rewards/chosen": -0.12867729365825653, "rewards/margins": 0.15326935052871704, "rewards/rejected": -0.28194665908813477, "step": 905 }, { "epoch": 1.4271079590228526, "grad_norm": 0.24308228492736816, "learning_rate": 9.248213208500629e-07, "log_odds_chosen": 1.6273454427719116, "log_odds_ratio": -0.19989073276519775, "logits/chosen": -0.7513257265090942, "logits/rejected": -1.5772173404693604, "logps/chosen": -1.467084527015686, "logps/rejected": -2.891197443008423, "loss": 1.4786, "nll_loss": 1.4586395025253296, "rewards/accuracies": 1.0, "rewards/chosen": -0.14670845866203308, "rewards/margins": 0.14241132140159607, "rewards/rejected": -0.28911978006362915, "step": 906 }, { "epoch": 1.4286840031520882, "grad_norm": 0.24337664246559143, "learning_rate": 9.201901958295115e-07, "log_odds_chosen": 1.9387871026992798, "log_odds_ratio": -0.15630395710468292, "logits/chosen": -0.6508644819259644, "logits/rejected": -1.4973303079605103, "logps/chosen": -1.4235007762908936, "logps/rejected": -3.137270450592041, "loss": 1.4616, "nll_loss": 1.4459247589111328, "rewards/accuracies": 1.0, "rewards/chosen": -0.14235009253025055, "rewards/margins": 0.17137697339057922, "rewards/rejected": -0.3137270510196686, "step": 907 }, { "epoch": 1.4302600472813238, "grad_norm": 0.2549786865711212, "learning_rate": 9.155672282562736e-07, "log_odds_chosen": 1.659327745437622, "log_odds_ratio": -0.18966315686702728, "logits/chosen": -0.7559006810188293, "logits/rejected": -1.5569121837615967, "logps/chosen": -1.4450880289077759, "logps/rejected": -2.8973772525787354, "loss": 1.4747, "nll_loss": 1.455707311630249, "rewards/accuracies": 1.0, "rewards/chosen": -0.14450879395008087, "rewards/margins": 0.14522895216941833, "rewards/rejected": -0.289737731218338, "step": 908 }, { "epoch": 1.4318360914105595, "grad_norm": 0.25416597723960876, "learning_rate": 9.109524530546622e-07, "log_odds_chosen": 1.5111150741577148, "log_odds_ratio": -0.20951895415782928, "logits/chosen": -0.7180823683738708, "logits/rejected": -1.4877595901489258, "logps/chosen": -1.4433443546295166, "logps/rejected": -2.7477176189422607, "loss": 1.4756, "nll_loss": 1.4546022415161133, "rewards/accuracies": 1.0, "rewards/chosen": -0.14433442056179047, "rewards/margins": 0.13043732941150665, "rewards/rejected": -0.2747717499732971, "step": 909 }, { "epoch": 1.433412135539795, "grad_norm": 0.29453134536743164, "learning_rate": 9.063459050871001e-07, "log_odds_chosen": 1.5925884246826172, "log_odds_ratio": -0.1985904574394226, "logits/chosen": -0.7714993953704834, "logits/rejected": -1.2546613216400146, "logps/chosen": -1.410155177116394, "logps/rejected": -2.788203001022339, "loss": 1.428, "nll_loss": 1.408141016960144, "rewards/accuracies": 1.0, "rewards/chosen": -0.14101552963256836, "rewards/margins": 0.13780477643013, "rewards/rejected": -0.27882030606269836, "step": 910 }, { "epoch": 1.4349881796690307, "grad_norm": 0.24730490148067474, "learning_rate": 9.017476191538555e-07, "log_odds_chosen": 1.8357172012329102, "log_odds_ratio": -0.17157533764839172, "logits/chosen": -0.7459608912467957, "logits/rejected": -1.3676772117614746, "logps/chosen": -1.3629581928253174, "logps/rejected": -2.9562366008758545, "loss": 1.4015, "nll_loss": 1.38435697555542, "rewards/accuracies": 1.0, "rewards/chosen": -0.13629582524299622, "rewards/margins": 0.15932784974575043, "rewards/rejected": -0.29562368988990784, "step": 911 }, { "epoch": 1.4365642237982663, "grad_norm": 0.22700850665569305, "learning_rate": 8.971576299927832e-07, "log_odds_chosen": 1.86017906665802, "log_odds_ratio": -0.15699787437915802, "logits/chosen": -0.6873756647109985, "logits/rejected": -1.701200246810913, "logps/chosen": -1.449873447418213, "logps/rejected": -3.0903689861297607, "loss": 1.4724, "nll_loss": 1.4567129611968994, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449873447418213, "rewards/margins": 0.16404958069324493, "rewards/rejected": -0.3090369403362274, "step": 912 }, { "epoch": 1.438140267927502, "grad_norm": 0.28582850098609924, "learning_rate": 8.925759722790591e-07, "log_odds_chosen": 1.6427593231201172, "log_odds_ratio": -0.18476685881614685, "logits/chosen": -0.6677998304367065, "logits/rejected": -1.5819510221481323, "logps/chosen": -1.421278953552246, "logps/rejected": -2.847679615020752, "loss": 1.449, "nll_loss": 1.4305495023727417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1421278864145279, "rewards/margins": 0.14264006912708282, "rewards/rejected": -0.2847679853439331, "step": 913 }, { "epoch": 1.4397163120567376, "grad_norm": 0.2541874051094055, "learning_rate": 8.880026806249192e-07, "log_odds_chosen": 1.9597915410995483, "log_odds_ratio": -0.13926208019256592, "logits/chosen": -0.5797353982925415, "logits/rejected": -1.6747076511383057, "logps/chosen": -1.4759434461593628, "logps/rejected": -3.2159194946289062, "loss": 1.4919, "nll_loss": 1.4780035018920898, "rewards/accuracies": 1.0, "rewards/chosen": -0.14759434759616852, "rewards/margins": 0.17399759590625763, "rewards/rejected": -0.32159194350242615, "step": 914 }, { "epoch": 1.4412923561859732, "grad_norm": 0.2716968357563019, "learning_rate": 8.834377895793965e-07, "log_odds_chosen": 1.6315258741378784, "log_odds_ratio": -0.2065974473953247, "logits/chosen": -0.6029942631721497, "logits/rejected": -1.4257211685180664, "logps/chosen": -1.5510449409484863, "logps/rejected": -2.998202323913574, "loss": 1.5693, "nll_loss": 1.5486116409301758, "rewards/accuracies": 1.0, "rewards/chosen": -0.15510451793670654, "rewards/margins": 0.14471575617790222, "rewards/rejected": -0.29982027411460876, "step": 915 }, { "epoch": 1.4428684003152088, "grad_norm": 0.23609581589698792, "learning_rate": 8.78881333628063e-07, "log_odds_chosen": 1.6295254230499268, "log_odds_ratio": -0.1823047250509262, "logits/chosen": -0.674763023853302, "logits/rejected": -1.675118088722229, "logps/chosen": -1.4699325561523438, "logps/rejected": -2.8940014839172363, "loss": 1.4839, "nll_loss": 1.4656920433044434, "rewards/accuracies": 1.0, "rewards/chosen": -0.1469932496547699, "rewards/margins": 0.1424068808555603, "rewards/rejected": -0.2894001603126526, "step": 916 }, { "epoch": 1.4444444444444444, "grad_norm": 0.24852561950683594, "learning_rate": 8.743333471927672e-07, "log_odds_chosen": 1.8598127365112305, "log_odds_ratio": -0.15133577585220337, "logits/chosen": -0.7297662496566772, "logits/rejected": -1.6425448656082153, "logps/chosen": -1.4743287563323975, "logps/rejected": -3.1196818351745605, "loss": 1.4979, "nll_loss": 1.4827524423599243, "rewards/accuracies": 1.0, "rewards/chosen": -0.14743287861347198, "rewards/margins": 0.1645352989435196, "rewards/rejected": -0.31196820735931396, "step": 917 }, { "epoch": 1.44602048857368, "grad_norm": 0.3501012623310089, "learning_rate": 8.697938646313742e-07, "log_odds_chosen": 1.7917332649230957, "log_odds_ratio": -0.1569167822599411, "logits/chosen": -0.733870267868042, "logits/rejected": -1.4836645126342773, "logps/chosen": -1.3811607360839844, "logps/rejected": -2.9354400634765625, "loss": 1.4034, "nll_loss": 1.3876993656158447, "rewards/accuracies": 1.0, "rewards/chosen": -0.13811607658863068, "rewards/margins": 0.1554279625415802, "rewards/rejected": -0.2935440242290497, "step": 918 }, { "epoch": 1.4475965327029157, "grad_norm": 0.29661837220191956, "learning_rate": 8.652629202375075e-07, "log_odds_chosen": 1.5683361291885376, "log_odds_ratio": -0.20053435862064362, "logits/chosen": -0.5060651898384094, "logits/rejected": -1.541445016860962, "logps/chosen": -1.397658348083496, "logps/rejected": -2.7494089603424072, "loss": 1.4367, "nll_loss": 1.4166853427886963, "rewards/accuracies": 1.0, "rewards/chosen": -0.13976582884788513, "rewards/margins": 0.13517507910728455, "rewards/rejected": -0.2749409079551697, "step": 919 }, { "epoch": 1.4491725768321513, "grad_norm": 0.2720679044723511, "learning_rate": 8.607405482402861e-07, "log_odds_chosen": 1.9913804531097412, "log_odds_ratio": -0.1532764434814453, "logits/chosen": -0.7506877779960632, "logits/rejected": -1.6161788702011108, "logps/chosen": -1.2819592952728271, "logps/rejected": -3.0097174644470215, "loss": 1.3169, "nll_loss": 1.3015979528427124, "rewards/accuracies": 1.0, "rewards/chosen": -0.12819592654705048, "rewards/margins": 0.17277583479881287, "rewards/rejected": -0.30097177624702454, "step": 920 }, { "epoch": 1.450748620961387, "grad_norm": 0.22886787354946136, "learning_rate": 8.562267828040712e-07, "log_odds_chosen": 1.8573884963989258, "log_odds_ratio": -0.15454642474651337, "logits/chosen": -0.7215290665626526, "logits/rejected": -1.881230354309082, "logps/chosen": -1.4047327041625977, "logps/rejected": -3.031428337097168, "loss": 1.4239, "nll_loss": 1.408409595489502, "rewards/accuracies": 1.0, "rewards/chosen": -0.14047329127788544, "rewards/margins": 0.1626695692539215, "rewards/rejected": -0.30314287543296814, "step": 921 }, { "epoch": 1.4523246650906225, "grad_norm": 0.2560023069381714, "learning_rate": 8.517216580282048e-07, "log_odds_chosen": 1.6058309078216553, "log_odds_ratio": -0.19469963014125824, "logits/chosen": -0.5811789631843567, "logits/rejected": -1.6048799753189087, "logps/chosen": -1.4780242443084717, "logps/rejected": -2.8814961910247803, "loss": 1.5187, "nll_loss": 1.4992363452911377, "rewards/accuracies": 1.0, "rewards/chosen": -0.1478024423122406, "rewards/margins": 0.1403472125530243, "rewards/rejected": -0.2881496250629425, "step": 922 }, { "epoch": 1.4539007092198581, "grad_norm": 0.2533600330352783, "learning_rate": 8.472252079467535e-07, "log_odds_chosen": 1.6077029705047607, "log_odds_ratio": -0.18846000730991364, "logits/chosen": -0.6689071655273438, "logits/rejected": -1.556738257408142, "logps/chosen": -1.4537690877914429, "logps/rejected": -2.8543403148651123, "loss": 1.4782, "nll_loss": 1.4593662023544312, "rewards/accuracies": 1.0, "rewards/chosen": -0.14537690579891205, "rewards/margins": 0.14005713164806366, "rewards/rejected": -0.2854340374469757, "step": 923 }, { "epoch": 1.4554767533490938, "grad_norm": 0.32297271490097046, "learning_rate": 8.427374665282488e-07, "log_odds_chosen": 1.466186761856079, "log_odds_ratio": -0.21466611325740814, "logits/chosen": -0.5589500069618225, "logits/rejected": -1.2779663801193237, "logps/chosen": -1.3692665100097656, "logps/rejected": -2.6176676750183105, "loss": 1.4064, "nll_loss": 1.3848856687545776, "rewards/accuracies": 1.0, "rewards/chosen": -0.13692665100097656, "rewards/margins": 0.1248401328921318, "rewards/rejected": -0.2617667615413666, "step": 924 }, { "epoch": 1.4570527974783294, "grad_norm": 0.4079399108886719, "learning_rate": 8.382584676754336e-07, "log_odds_chosen": 1.702414870262146, "log_odds_ratio": -0.1787688285112381, "logits/chosen": -0.7131695747375488, "logits/rejected": -1.474623203277588, "logps/chosen": -1.4565544128417969, "logps/rejected": -2.950603723526001, "loss": 1.4802, "nll_loss": 1.4623353481292725, "rewards/accuracies": 1.0, "rewards/chosen": -0.14565543830394745, "rewards/margins": 0.14940495789051056, "rewards/rejected": -0.295060396194458, "step": 925 }, { "epoch": 1.458628841607565, "grad_norm": 0.2525225877761841, "learning_rate": 8.337882452250058e-07, "log_odds_chosen": 2.172259569168091, "log_odds_ratio": -0.13398639857769012, "logits/chosen": -0.7223413586616516, "logits/rejected": -1.7857404947280884, "logps/chosen": -1.4725472927093506, "logps/rejected": -3.4278881549835205, "loss": 1.4867, "nll_loss": 1.4733326435089111, "rewards/accuracies": 1.0, "rewards/chosen": -0.14725472033023834, "rewards/margins": 0.1955341100692749, "rewards/rejected": -0.34278884530067444, "step": 926 }, { "epoch": 1.4602048857368006, "grad_norm": 0.2317046821117401, "learning_rate": 8.293268329473602e-07, "log_odds_chosen": 2.142016887664795, "log_odds_ratio": -0.12687087059020996, "logits/chosen": -0.7637448310852051, "logits/rejected": -1.6723371744155884, "logps/chosen": -1.4088551998138428, "logps/rejected": -3.313737392425537, "loss": 1.4323, "nll_loss": 1.419594407081604, "rewards/accuracies": 1.0, "rewards/chosen": -0.14088551700115204, "rewards/margins": 0.19048823416233063, "rewards/rejected": -0.33137375116348267, "step": 927 }, { "epoch": 1.4617809298660362, "grad_norm": 0.286592036485672, "learning_rate": 8.248742645463367e-07, "log_odds_chosen": 1.9152957201004028, "log_odds_ratio": -0.15829113125801086, "logits/chosen": -0.6550620794296265, "logits/rejected": -1.551281452178955, "logps/chosen": -1.4756174087524414, "logps/rejected": -3.178788900375366, "loss": 1.4828, "nll_loss": 1.4670010805130005, "rewards/accuracies": 1.0, "rewards/chosen": -0.14756174385547638, "rewards/margins": 0.1703171581029892, "rewards/rejected": -0.3178789019584656, "step": 928 }, { "epoch": 1.4633569739952719, "grad_norm": 0.23163653910160065, "learning_rate": 8.204305736589612e-07, "log_odds_chosen": 1.832330584526062, "log_odds_ratio": -0.16355212032794952, "logits/chosen": -0.6462885141372681, "logits/rejected": -1.4336743354797363, "logps/chosen": -1.4303230047225952, "logps/rejected": -3.0414857864379883, "loss": 1.4481, "nll_loss": 1.4317905902862549, "rewards/accuracies": 1.0, "rewards/chosen": -0.14303229749202728, "rewards/margins": 0.16111627221107483, "rewards/rejected": -0.3041485846042633, "step": 929 }, { "epoch": 1.4649330181245075, "grad_norm": 0.23074650764465332, "learning_rate": 8.159957938551966e-07, "log_odds_chosen": 1.7751812934875488, "log_odds_ratio": -0.17015007138252258, "logits/chosen": -0.6098726391792297, "logits/rejected": -1.5128607749938965, "logps/chosen": -1.4367517232894897, "logps/rejected": -2.985832929611206, "loss": 1.464, "nll_loss": 1.4469714164733887, "rewards/accuracies": 1.0, "rewards/chosen": -0.14367519319057465, "rewards/margins": 0.15490810573101044, "rewards/rejected": -0.2985832691192627, "step": 930 }, { "epoch": 1.4665090622537431, "grad_norm": 0.23624230921268463, "learning_rate": 8.115699586376865e-07, "log_odds_chosen": 1.5871126651763916, "log_odds_ratio": -0.1996283382177353, "logits/chosen": -0.7303210496902466, "logits/rejected": -1.4325451850891113, "logps/chosen": -1.480905532836914, "logps/rejected": -2.8737592697143555, "loss": 1.5001, "nll_loss": 1.480181336402893, "rewards/accuracies": 1.0, "rewards/chosen": -0.14809054136276245, "rewards/margins": 0.13928541541099548, "rewards/rejected": -0.28737592697143555, "step": 931 }, { "epoch": 1.4680851063829787, "grad_norm": 0.25147050619125366, "learning_rate": 8.071531014415018e-07, "log_odds_chosen": 2.015183448791504, "log_odds_ratio": -0.14438295364379883, "logits/chosen": -0.683853805065155, "logits/rejected": -1.5350584983825684, "logps/chosen": -1.4378182888031006, "logps/rejected": -3.22896146774292, "loss": 1.4412, "nll_loss": 1.4267596006393433, "rewards/accuracies": 1.0, "rewards/chosen": -0.14378182590007782, "rewards/margins": 0.17911432683467865, "rewards/rejected": -0.32289615273475647, "step": 932 }, { "epoch": 1.4696611505122144, "grad_norm": 0.21603430807590485, "learning_rate": 8.027452556338894e-07, "log_odds_chosen": 1.8661415576934814, "log_odds_ratio": -0.14683236181735992, "logits/chosen": -0.6550284624099731, "logits/rejected": -1.7531710863113403, "logps/chosen": -1.4416742324829102, "logps/rejected": -3.0784685611724854, "loss": 1.4616, "nll_loss": 1.4468977451324463, "rewards/accuracies": 1.0, "rewards/chosen": -0.14416742324829102, "rewards/margins": 0.16367945075035095, "rewards/rejected": -0.30784687399864197, "step": 933 }, { "epoch": 1.47123719464145, "grad_norm": 0.26335662603378296, "learning_rate": 7.983464545140179e-07, "log_odds_chosen": 1.9164276123046875, "log_odds_ratio": -0.1412404477596283, "logits/chosen": -0.7396938800811768, "logits/rejected": -1.5333837270736694, "logps/chosen": -1.3823633193969727, "logps/rejected": -3.0580759048461914, "loss": 1.3968, "nll_loss": 1.3826264142990112, "rewards/accuracies": 1.0, "rewards/chosen": -0.13823631405830383, "rewards/margins": 0.1675712913274765, "rewards/rejected": -0.30580762028694153, "step": 934 }, { "epoch": 1.4728132387706856, "grad_norm": 0.25700458884239197, "learning_rate": 7.939567313127295e-07, "log_odds_chosen": 1.8454346656799316, "log_odds_ratio": -0.15232446789741516, "logits/chosen": -0.6836897730827332, "logits/rejected": -1.4595261812210083, "logps/chosen": -1.446830153465271, "logps/rejected": -3.071077346801758, "loss": 1.4705, "nll_loss": 1.4552760124206543, "rewards/accuracies": 1.0, "rewards/chosen": -0.14468303322792053, "rewards/margins": 0.1624247133731842, "rewards/rejected": -0.30710774660110474, "step": 935 }, { "epoch": 1.4743892828999212, "grad_norm": 0.24883194267749786, "learning_rate": 7.89576119192286e-07, "log_odds_chosen": 2.0168988704681396, "log_odds_ratio": -0.13523153960704803, "logits/chosen": -0.6411904692649841, "logits/rejected": -1.6028637886047363, "logps/chosen": -1.3860918283462524, "logps/rejected": -3.1596243381500244, "loss": 1.3974, "nll_loss": 1.3838660717010498, "rewards/accuracies": 1.0, "rewards/chosen": -0.13860918581485748, "rewards/margins": 0.17735326290130615, "rewards/rejected": -0.31596243381500244, "step": 936 }, { "epoch": 1.4759653270291568, "grad_norm": 0.23368825018405914, "learning_rate": 7.852046512461201e-07, "log_odds_chosen": 1.8750478029251099, "log_odds_ratio": -0.1503620147705078, "logits/chosen": -0.6881729960441589, "logits/rejected": -1.6647746562957764, "logps/chosen": -1.4039406776428223, "logps/rejected": -3.042243003845215, "loss": 1.4253, "nll_loss": 1.410217046737671, "rewards/accuracies": 1.0, "rewards/chosen": -0.14039407670497894, "rewards/margins": 0.1638302356004715, "rewards/rejected": -0.30422431230545044, "step": 937 }, { "epoch": 1.4775413711583925, "grad_norm": 0.23154808580875397, "learning_rate": 7.808423604985843e-07, "log_odds_chosen": 1.8925951719284058, "log_odds_ratio": -0.15831367671489716, "logits/chosen": -0.6020694375038147, "logits/rejected": -1.4571864604949951, "logps/chosen": -1.4015604257583618, "logps/rejected": -3.0591988563537598, "loss": 1.4503, "nll_loss": 1.4344905614852905, "rewards/accuracies": 1.0, "rewards/chosen": -0.14015603065490723, "rewards/margins": 0.16576388478279114, "rewards/rejected": -0.30591991543769836, "step": 938 }, { "epoch": 1.479117415287628, "grad_norm": 0.2233363389968872, "learning_rate": 7.764892799047005e-07, "log_odds_chosen": 1.9696848392486572, "log_odds_ratio": -0.1418209969997406, "logits/chosen": -0.6627472043037415, "logits/rejected": -1.6807780265808105, "logps/chosen": -1.4396893978118896, "logps/rejected": -3.1809778213500977, "loss": 1.4603, "nll_loss": 1.4460759162902832, "rewards/accuracies": 1.0, "rewards/chosen": -0.14396893978118896, "rewards/margins": 0.17412887513637543, "rewards/rejected": -0.3180978000164032, "step": 939 }, { "epoch": 1.4806934594168637, "grad_norm": 0.24284207820892334, "learning_rate": 7.721454423499143e-07, "log_odds_chosen": 2.046962261199951, "log_odds_ratio": -0.13433170318603516, "logits/chosen": -0.7078008055686951, "logits/rejected": -1.7467715740203857, "logps/chosen": -1.4217815399169922, "logps/rejected": -3.2299644947052, "loss": 1.4417, "nll_loss": 1.428259015083313, "rewards/accuracies": 1.0, "rewards/chosen": -0.14217817783355713, "rewards/margins": 0.18081827461719513, "rewards/rejected": -0.32299643754959106, "step": 940 }, { "epoch": 1.4822695035460993, "grad_norm": 0.2638827860355377, "learning_rate": 7.678108806498441e-07, "log_odds_chosen": 2.0251903533935547, "log_odds_ratio": -0.14184138178825378, "logits/chosen": -0.636859655380249, "logits/rejected": -1.5217381715774536, "logps/chosen": -1.3980457782745361, "logps/rejected": -3.1872143745422363, "loss": 1.4285, "nll_loss": 1.414327621459961, "rewards/accuracies": 1.0, "rewards/chosen": -0.13980457186698914, "rewards/margins": 0.17891687154769897, "rewards/rejected": -0.3187214434146881, "step": 941 }, { "epoch": 1.483845547675335, "grad_norm": 0.270585298538208, "learning_rate": 7.634856275500315e-07, "log_odds_chosen": 1.4698821306228638, "log_odds_ratio": -0.22061733901500702, "logits/chosen": -0.650209367275238, "logits/rejected": -1.3704414367675781, "logps/chosen": -1.5313105583190918, "logps/rejected": -2.823129892349243, "loss": 1.554, "nll_loss": 1.5318888425827026, "rewards/accuracies": 1.0, "rewards/chosen": -0.15313105285167694, "rewards/margins": 0.12918195128440857, "rewards/rejected": -0.2823129892349243, "step": 942 }, { "epoch": 1.4854215918045706, "grad_norm": 0.21558013558387756, "learning_rate": 7.591697157256991e-07, "log_odds_chosen": 1.9673588275909424, "log_odds_ratio": -0.14971627295017242, "logits/chosen": -0.6312054991722107, "logits/rejected": -1.532444715499878, "logps/chosen": -1.3634440898895264, "logps/rejected": -3.0835490226745605, "loss": 1.3868, "nll_loss": 1.3718693256378174, "rewards/accuracies": 1.0, "rewards/chosen": -0.13634440302848816, "rewards/margins": 0.17201051115989685, "rewards/rejected": -0.308354914188385, "step": 943 }, { "epoch": 1.4869976359338062, "grad_norm": 0.304609090089798, "learning_rate": 7.548631777814995e-07, "log_odds_chosen": 2.017549753189087, "log_odds_ratio": -0.14845839142799377, "logits/chosen": -0.652974009513855, "logits/rejected": -1.509749174118042, "logps/chosen": -1.4153010845184326, "logps/rejected": -3.203185558319092, "loss": 1.4445, "nll_loss": 1.4296934604644775, "rewards/accuracies": 1.0, "rewards/chosen": -0.1415301263332367, "rewards/margins": 0.1787884533405304, "rewards/rejected": -0.3203185796737671, "step": 944 }, { "epoch": 1.4885736800630418, "grad_norm": 0.24522024393081665, "learning_rate": 7.505660462512689e-07, "log_odds_chosen": 1.6701476573944092, "log_odds_ratio": -0.18431419134140015, "logits/chosen": -0.7164368629455566, "logits/rejected": -1.4171056747436523, "logps/chosen": -1.33930504322052, "logps/rejected": -2.765798330307007, "loss": 1.3831, "nll_loss": 1.3646361827850342, "rewards/accuracies": 1.0, "rewards/chosen": -0.133930504322052, "rewards/margins": 0.1426493227481842, "rewards/rejected": -0.2765798568725586, "step": 945 }, { "epoch": 1.4901497241922774, "grad_norm": 0.2594417333602905, "learning_rate": 7.462783535977842e-07, "log_odds_chosen": 1.9742743968963623, "log_odds_ratio": -0.13439792394638062, "logits/chosen": -0.691044807434082, "logits/rejected": -1.5234267711639404, "logps/chosen": -1.3967187404632568, "logps/rejected": -3.130528211593628, "loss": 1.4195, "nll_loss": 1.4061055183410645, "rewards/accuracies": 1.0, "rewards/chosen": -0.13967187702655792, "rewards/margins": 0.17338094115257263, "rewards/rejected": -0.31305280327796936, "step": 946 }, { "epoch": 1.491725768321513, "grad_norm": 0.2217145413160324, "learning_rate": 7.420001322125156e-07, "log_odds_chosen": 1.2451107501983643, "log_odds_ratio": -0.2715088427066803, "logits/chosen": -0.7807783484458923, "logits/rejected": -1.3123338222503662, "logps/chosen": -1.3806712627410889, "logps/rejected": -2.438361644744873, "loss": 1.414, "nll_loss": 1.3868746757507324, "rewards/accuracies": 1.0, "rewards/chosen": -0.1380671262741089, "rewards/margins": 0.10576906055212021, "rewards/rejected": -0.2438361942768097, "step": 947 }, { "epoch": 1.4933018124507487, "grad_norm": 0.23894211649894714, "learning_rate": 7.377314144153814e-07, "log_odds_chosen": 1.5071243047714233, "log_odds_ratio": -0.20632603764533997, "logits/chosen": -0.7174784541130066, "logits/rejected": -1.4949740171432495, "logps/chosen": -1.4536449909210205, "logps/rejected": -2.7604901790618896, "loss": 1.4843, "nll_loss": 1.4636691808700562, "rewards/accuracies": 1.0, "rewards/chosen": -0.14536450803279877, "rewards/margins": 0.130684494972229, "rewards/rejected": -0.27604901790618896, "step": 948 }, { "epoch": 1.4948778565799843, "grad_norm": 0.23438452184200287, "learning_rate": 7.334722324545064e-07, "log_odds_chosen": 1.9249755144119263, "log_odds_ratio": -0.14309164881706238, "logits/chosen": -0.6152241230010986, "logits/rejected": -1.7680805921554565, "logps/chosen": -1.4081079959869385, "logps/rejected": -3.0983264446258545, "loss": 1.4138, "nll_loss": 1.3994516134262085, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408107876777649, "rewards/margins": 0.169021874666214, "rewards/rejected": -0.30983269214630127, "step": 949 }, { "epoch": 1.49645390070922, "grad_norm": 0.24368099868297577, "learning_rate": 7.292226185059756e-07, "log_odds_chosen": 1.7671029567718506, "log_odds_ratio": -0.17132027447223663, "logits/chosen": -0.6248744130134583, "logits/rejected": -1.4099971055984497, "logps/chosen": -1.4691294431686401, "logps/rejected": -3.029083251953125, "loss": 1.5011, "nll_loss": 1.483997106552124, "rewards/accuracies": 1.0, "rewards/chosen": -0.14691296219825745, "rewards/margins": 0.15599539875984192, "rewards/rejected": -0.30290836095809937, "step": 950 }, { "epoch": 1.4980299448384555, "grad_norm": 0.21915243566036224, "learning_rate": 7.249826046735927e-07, "log_odds_chosen": 1.8139275312423706, "log_odds_ratio": -0.1609688103199005, "logits/chosen": -0.6943358778953552, "logits/rejected": -1.5638314485549927, "logps/chosen": -1.4063531160354614, "logps/rejected": -2.99365496635437, "loss": 1.4284, "nll_loss": 1.412264347076416, "rewards/accuracies": 1.0, "rewards/chosen": -0.14063531160354614, "rewards/margins": 0.1587301790714264, "rewards/rejected": -0.2993655204772949, "step": 951 }, { "epoch": 1.4996059889676912, "grad_norm": 0.2773244082927704, "learning_rate": 7.207522229886379e-07, "log_odds_chosen": 1.9699525833129883, "log_odds_ratio": -0.14488418400287628, "logits/chosen": -0.713561475276947, "logits/rejected": -1.392665982246399, "logps/chosen": -1.332568645477295, "logps/rejected": -3.0436148643493652, "loss": 1.362, "nll_loss": 1.3474806547164917, "rewards/accuracies": 1.0, "rewards/chosen": -0.13325685262680054, "rewards/margins": 0.17110465466976166, "rewards/rejected": -0.3043615221977234, "step": 952 }, { "epoch": 1.5011820330969265, "grad_norm": 0.2424214631319046, "learning_rate": 7.165315054096228e-07, "log_odds_chosen": 1.8157105445861816, "log_odds_ratio": -0.17059148848056793, "logits/chosen": -0.6951842904090881, "logits/rejected": -1.5444356203079224, "logps/chosen": -1.4868419170379639, "logps/rejected": -3.096999168395996, "loss": 1.4947, "nll_loss": 1.4776148796081543, "rewards/accuracies": 1.0, "rewards/chosen": -0.14868420362472534, "rewards/margins": 0.16101573407649994, "rewards/rejected": -0.3096999228000641, "step": 953 }, { "epoch": 1.5027580772261624, "grad_norm": 0.21515390276908875, "learning_rate": 7.123204838220533e-07, "log_odds_chosen": 2.155853033065796, "log_odds_ratio": -0.13583272695541382, "logits/chosen": -0.7590830326080322, "logits/rejected": -1.6461933851242065, "logps/chosen": -1.365823745727539, "logps/rejected": -3.2752747535705566, "loss": 1.3884, "nll_loss": 1.3747961521148682, "rewards/accuracies": 1.0, "rewards/chosen": -0.1365823745727539, "rewards/margins": 0.1909450888633728, "rewards/rejected": -0.3275274634361267, "step": 954 }, { "epoch": 1.5043341213553978, "grad_norm": 0.2391800582408905, "learning_rate": 7.081191900381862e-07, "log_odds_chosen": 1.9468826055526733, "log_odds_ratio": -0.1482704132795334, "logits/chosen": -0.6943378448486328, "logits/rejected": -1.5789308547973633, "logps/chosen": -1.4783952236175537, "logps/rejected": -3.210561990737915, "loss": 1.5009, "nll_loss": 1.4860384464263916, "rewards/accuracies": 1.0, "rewards/chosen": -0.1478395164012909, "rewards/margins": 0.17321667075157166, "rewards/rejected": -0.32105618715286255, "step": 955 }, { "epoch": 1.5059101654846336, "grad_norm": 0.22466237843036652, "learning_rate": 7.039276557967895e-07, "log_odds_chosen": 2.2401113510131836, "log_odds_ratio": -0.11504784226417542, "logits/chosen": -0.6450394988059998, "logits/rejected": -1.8218344449996948, "logps/chosen": -1.361754298210144, "logps/rejected": -3.338789701461792, "loss": 1.3929, "nll_loss": 1.3813717365264893, "rewards/accuracies": 1.0, "rewards/chosen": -0.13617542386054993, "rewards/margins": 0.1977035403251648, "rewards/rejected": -0.3338789641857147, "step": 956 }, { "epoch": 1.507486209613869, "grad_norm": 0.22904148697853088, "learning_rate": 6.997459127629028e-07, "log_odds_chosen": 1.5114691257476807, "log_odds_ratio": -0.21334531903266907, "logits/chosen": -0.6755697727203369, "logits/rejected": -1.4697928428649902, "logps/chosen": -1.4383251667022705, "logps/rejected": -2.7475852966308594, "loss": 1.4786, "nll_loss": 1.457273244857788, "rewards/accuracies": 1.0, "rewards/chosen": -0.14383253455162048, "rewards/margins": 0.13092602789402008, "rewards/rejected": -0.27475854754447937, "step": 957 }, { "epoch": 1.5090622537431049, "grad_norm": 0.23240822553634644, "learning_rate": 6.955739925275963e-07, "log_odds_chosen": 2.2500858306884766, "log_odds_ratio": -0.11607085168361664, "logits/chosen": -0.7263058423995972, "logits/rejected": -1.6341439485549927, "logps/chosen": -1.357983946800232, "logps/rejected": -3.345592737197876, "loss": 1.3866, "nll_loss": 1.3749525547027588, "rewards/accuracies": 1.0, "rewards/chosen": -0.1357983946800232, "rewards/margins": 0.19876088201999664, "rewards/rejected": -0.33455926179885864, "step": 958 }, { "epoch": 1.5106382978723403, "grad_norm": 0.2334897220134735, "learning_rate": 6.914119266077354e-07, "log_odds_chosen": 2.0655581951141357, "log_odds_ratio": -0.12655213475227356, "logits/chosen": -0.5946290493011475, "logits/rejected": -1.7702455520629883, "logps/chosen": -1.485993504524231, "logps/rejected": -3.332597017288208, "loss": 1.5112, "nll_loss": 1.498542070388794, "rewards/accuracies": 1.0, "rewards/chosen": -0.14859935641288757, "rewards/margins": 0.18466034531593323, "rewards/rejected": -0.3332597017288208, "step": 959 }, { "epoch": 1.5122143420015761, "grad_norm": 0.3587821125984192, "learning_rate": 6.872597464457397e-07, "log_odds_chosen": 1.7171913385391235, "log_odds_ratio": -0.18248029053211212, "logits/chosen": -0.6445052027702332, "logits/rejected": -1.4844458103179932, "logps/chosen": -1.3720635175704956, "logps/rejected": -2.8570876121520996, "loss": 1.3911, "nll_loss": 1.3728132247924805, "rewards/accuracies": 1.0, "rewards/chosen": -0.13720636069774628, "rewards/margins": 0.1485024243593216, "rewards/rejected": -0.28570878505706787, "step": 960 }, { "epoch": 1.5137903861308115, "grad_norm": 0.4160895347595215, "learning_rate": 6.831174834093476e-07, "log_odds_chosen": 1.7886178493499756, "log_odds_ratio": -0.16238519549369812, "logits/chosen": -0.6922329068183899, "logits/rejected": -1.4965581893920898, "logps/chosen": -1.3912993669509888, "logps/rejected": -2.948406934738159, "loss": 1.4105, "nll_loss": 1.3942997455596924, "rewards/accuracies": 1.0, "rewards/chosen": -0.13912993669509888, "rewards/margins": 0.15571075677871704, "rewards/rejected": -0.2948406934738159, "step": 961 }, { "epoch": 1.5153664302600474, "grad_norm": 0.23221762478351593, "learning_rate": 6.789851687913784e-07, "log_odds_chosen": 1.5177117586135864, "log_odds_ratio": -0.22095687687397003, "logits/chosen": -0.7221294045448303, "logits/rejected": -1.587545394897461, "logps/chosen": -1.3432116508483887, "logps/rejected": -2.6421313285827637, "loss": 1.3738, "nll_loss": 1.3516777753829956, "rewards/accuracies": 1.0, "rewards/chosen": -0.1343211829662323, "rewards/margins": 0.12989196181297302, "rewards/rejected": -0.26421311497688293, "step": 962 }, { "epoch": 1.5169424743892828, "grad_norm": 0.22573482990264893, "learning_rate": 6.748628338094937e-07, "log_odds_chosen": 1.9619501829147339, "log_odds_ratio": -0.1419064700603485, "logits/chosen": -0.6415755152702332, "logits/rejected": -1.533616304397583, "logps/chosen": -1.3573863506317139, "logps/rejected": -3.067230463027954, "loss": 1.3843, "nll_loss": 1.3701223134994507, "rewards/accuracies": 1.0, "rewards/chosen": -0.13573864102363586, "rewards/margins": 0.1709844172000885, "rewards/rejected": -0.30672305822372437, "step": 963 }, { "epoch": 1.5185185185185186, "grad_norm": 0.2693047523498535, "learning_rate": 6.707505096059662e-07, "log_odds_chosen": 1.9928638935089111, "log_odds_ratio": -0.14110992848873138, "logits/chosen": -0.6593167781829834, "logits/rejected": -1.5224437713623047, "logps/chosen": -1.368179440498352, "logps/rejected": -3.118009090423584, "loss": 1.4031, "nll_loss": 1.3889775276184082, "rewards/accuracies": 1.0, "rewards/chosen": -0.13681794703006744, "rewards/margins": 0.1749829649925232, "rewards/rejected": -0.31180089712142944, "step": 964 }, { "epoch": 1.520094562647754, "grad_norm": 0.23469862341880798, "learning_rate": 6.666482272474412e-07, "log_odds_chosen": 2.061997413635254, "log_odds_ratio": -0.12642227113246918, "logits/chosen": -0.6676424145698547, "logits/rejected": -1.6210864782333374, "logps/chosen": -1.4189386367797852, "logps/rejected": -3.244745969772339, "loss": 1.4398, "nll_loss": 1.427161455154419, "rewards/accuracies": 1.0, "rewards/chosen": -0.1418938785791397, "rewards/margins": 0.18258069455623627, "rewards/rejected": -0.324474573135376, "step": 965 }, { "epoch": 1.5216706067769898, "grad_norm": 0.23726844787597656, "learning_rate": 6.625560177247023e-07, "log_odds_chosen": 1.8917025327682495, "log_odds_ratio": -0.1467244178056717, "logits/chosen": -0.6768726110458374, "logits/rejected": -1.567640781402588, "logps/chosen": -1.5120936632156372, "logps/rejected": -3.1917314529418945, "loss": 1.526, "nll_loss": 1.511344075202942, "rewards/accuracies": 1.0, "rewards/chosen": -0.15120935440063477, "rewards/margins": 0.16796378791332245, "rewards/rejected": -0.319173127412796, "step": 966 }, { "epoch": 1.5232466509062252, "grad_norm": 0.2998630106449127, "learning_rate": 6.584739119524383e-07, "log_odds_chosen": 2.130988836288452, "log_odds_ratio": -0.13314604759216309, "logits/chosen": -0.6467706561088562, "logits/rejected": -1.6562049388885498, "logps/chosen": -1.3789918422698975, "logps/rejected": -3.2636961936950684, "loss": 1.387, "nll_loss": 1.373658537864685, "rewards/accuracies": 1.0, "rewards/chosen": -0.13789919018745422, "rewards/margins": 0.18847045302391052, "rewards/rejected": -0.32636961340904236, "step": 967 }, { "epoch": 1.524822695035461, "grad_norm": 0.2360096573829651, "learning_rate": 6.544019407690077e-07, "log_odds_chosen": 1.5989983081817627, "log_odds_ratio": -0.20044191181659698, "logits/chosen": -0.6707049608230591, "logits/rejected": -1.4034690856933594, "logps/chosen": -1.4732283353805542, "logps/rejected": -2.875408172607422, "loss": 1.4888, "nll_loss": 1.4687124490737915, "rewards/accuracies": 1.0, "rewards/chosen": -0.1473228484392166, "rewards/margins": 0.14021798968315125, "rewards/rejected": -0.28754085302352905, "step": 968 }, { "epoch": 1.5263987391646965, "grad_norm": 0.23426444828510284, "learning_rate": 6.503401349362083e-07, "log_odds_chosen": 1.7697782516479492, "log_odds_ratio": -0.16758820414543152, "logits/chosen": -0.7855375409126282, "logits/rejected": -1.5976003408432007, "logps/chosen": -1.5018411874771118, "logps/rejected": -3.0670080184936523, "loss": 1.5198, "nll_loss": 1.5030412673950195, "rewards/accuracies": 1.0, "rewards/chosen": -0.15018412470817566, "rewards/margins": 0.1565166711807251, "rewards/rejected": -0.30670079588890076, "step": 969 }, { "epoch": 1.5279747832939323, "grad_norm": 0.2340129315853119, "learning_rate": 6.462885251390433e-07, "log_odds_chosen": 1.9121594429016113, "log_odds_ratio": -0.14837515354156494, "logits/chosen": -0.729539692401886, "logits/rejected": -1.4843120574951172, "logps/chosen": -1.4921919107437134, "logps/rejected": -3.192976236343384, "loss": 1.4954, "nll_loss": 1.4805225133895874, "rewards/accuracies": 1.0, "rewards/chosen": -0.14921918511390686, "rewards/margins": 0.17007845640182495, "rewards/rejected": -0.3192976415157318, "step": 970 }, { "epoch": 1.5295508274231677, "grad_norm": 0.21162472665309906, "learning_rate": 6.422471419854898e-07, "log_odds_chosen": 1.9907008409500122, "log_odds_ratio": -0.13672001659870148, "logits/chosen": -0.6383552551269531, "logits/rejected": -1.5342824459075928, "logps/chosen": -1.3227609395980835, "logps/rejected": -3.050245761871338, "loss": 1.3496, "nll_loss": 1.3359603881835938, "rewards/accuracies": 1.0, "rewards/chosen": -0.13227610290050507, "rewards/margins": 0.17274849116802216, "rewards/rejected": -0.3050246238708496, "step": 971 }, { "epoch": 1.5311268715524036, "grad_norm": 0.2712085247039795, "learning_rate": 6.382160160062662e-07, "log_odds_chosen": 2.169814348220825, "log_odds_ratio": -0.13167349994182587, "logits/chosen": -0.6404174566268921, "logits/rejected": -1.5612825155258179, "logps/chosen": -1.4152562618255615, "logps/rejected": -3.348329782485962, "loss": 1.4393, "nll_loss": 1.4260960817337036, "rewards/accuracies": 1.0, "rewards/chosen": -0.14152562618255615, "rewards/margins": 0.19330734014511108, "rewards/rejected": -0.33483296632766724, "step": 972 }, { "epoch": 1.532702915681639, "grad_norm": 0.2747749090194702, "learning_rate": 6.341951776546044e-07, "log_odds_chosen": 1.8643766641616821, "log_odds_ratio": -0.15171566605567932, "logits/chosen": -0.6093170046806335, "logits/rejected": -1.5533154010772705, "logps/chosen": -1.4239542484283447, "logps/rejected": -3.06095552444458, "loss": 1.4426, "nll_loss": 1.4274111986160278, "rewards/accuracies": 1.0, "rewards/chosen": -0.14239542186260223, "rewards/margins": 0.163700133562088, "rewards/rejected": -0.30609557032585144, "step": 973 }, { "epoch": 1.5342789598108748, "grad_norm": 0.2471790760755539, "learning_rate": 6.301846573060177e-07, "log_odds_chosen": 1.6332510709762573, "log_odds_ratio": -0.18624919652938843, "logits/chosen": -0.7112443447113037, "logits/rejected": -1.3776127099990845, "logps/chosen": -1.3730487823486328, "logps/rejected": -2.780224561691284, "loss": 1.4074, "nll_loss": 1.3887758255004883, "rewards/accuracies": 1.0, "rewards/chosen": -0.1373048722743988, "rewards/margins": 0.14071762561798096, "rewards/rejected": -0.2780224680900574, "step": 974 }, { "epoch": 1.5358550039401102, "grad_norm": 0.235744908452034, "learning_rate": 6.261844852580722e-07, "log_odds_chosen": 1.8206223249435425, "log_odds_ratio": -0.17329441010951996, "logits/chosen": -0.7152392268180847, "logits/rejected": -1.278751015663147, "logps/chosen": -1.3252991437911987, "logps/rejected": -2.9039957523345947, "loss": 1.3583, "nll_loss": 1.3409415483474731, "rewards/accuracies": 1.0, "rewards/chosen": -0.13252991437911987, "rewards/margins": 0.15786969661712646, "rewards/rejected": -0.29039961099624634, "step": 975 }, { "epoch": 1.537431048069346, "grad_norm": 0.2193654179573059, "learning_rate": 6.221946917301561e-07, "log_odds_chosen": 1.9541443586349487, "log_odds_ratio": -0.14933165907859802, "logits/chosen": -0.6110002994537354, "logits/rejected": -1.5597983598709106, "logps/chosen": -1.4144717454910278, "logps/rejected": -3.135641098022461, "loss": 1.4392, "nll_loss": 1.4242618083953857, "rewards/accuracies": 1.0, "rewards/chosen": -0.14144718647003174, "rewards/margins": 0.1721169352531433, "rewards/rejected": -0.31356412172317505, "step": 976 }, { "epoch": 1.5390070921985815, "grad_norm": 0.23790855705738068, "learning_rate": 6.182153068632545e-07, "log_odds_chosen": 1.9404877424240112, "log_odds_ratio": -0.15256814658641815, "logits/chosen": -0.6408179998397827, "logits/rejected": -1.6562281847000122, "logps/chosen": -1.4317872524261475, "logps/rejected": -3.1495001316070557, "loss": 1.467, "nll_loss": 1.4517197608947754, "rewards/accuracies": 1.0, "rewards/chosen": -0.14317873120307922, "rewards/margins": 0.1717713326215744, "rewards/rejected": -0.31495004892349243, "step": 977 }, { "epoch": 1.5405831363278173, "grad_norm": 0.26740771532058716, "learning_rate": 6.142463607197197e-07, "log_odds_chosen": 1.9451872110366821, "log_odds_ratio": -0.15472793579101562, "logits/chosen": -0.708315908908844, "logits/rejected": -1.6773669719696045, "logps/chosen": -1.4440866708755493, "logps/rejected": -3.1698157787323, "loss": 1.4514, "nll_loss": 1.4359383583068848, "rewards/accuracies": 1.0, "rewards/chosen": -0.1444086730480194, "rewards/margins": 0.17257292568683624, "rewards/rejected": -0.31698161363601685, "step": 978 }, { "epoch": 1.5421591804570527, "grad_norm": 0.22817708551883698, "learning_rate": 6.102878832830431e-07, "log_odds_chosen": 1.8373013734817505, "log_odds_ratio": -0.15536919236183167, "logits/chosen": -0.7249546051025391, "logits/rejected": -1.6927638053894043, "logps/chosen": -1.4438010454177856, "logps/rejected": -3.0623109340667725, "loss": 1.4726, "nll_loss": 1.4570345878601074, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443801075220108, "rewards/margins": 0.16185101866722107, "rewards/rejected": -0.30623114109039307, "step": 979 }, { "epoch": 1.5437352245862885, "grad_norm": 0.22524091601371765, "learning_rate": 6.063399044576316e-07, "log_odds_chosen": 1.8837974071502686, "log_odds_ratio": -0.15109311044216156, "logits/chosen": -0.6819709539413452, "logits/rejected": -1.521268606185913, "logps/chosen": -1.3848626613616943, "logps/rejected": -3.028841733932495, "loss": 1.4119, "nll_loss": 1.39683198928833, "rewards/accuracies": 1.0, "rewards/chosen": -0.13848626613616943, "rewards/margins": 0.1643979251384735, "rewards/rejected": -0.30288416147232056, "step": 980 }, { "epoch": 1.545311268715524, "grad_norm": 0.2502342462539673, "learning_rate": 6.024024540685802e-07, "log_odds_chosen": 1.9373964071273804, "log_odds_ratio": -0.15239368379116058, "logits/chosen": -0.62217116355896, "logits/rejected": -1.4089964628219604, "logps/chosen": -1.3904428482055664, "logps/rejected": -3.0897319316864014, "loss": 1.4194, "nll_loss": 1.4041366577148438, "rewards/accuracies": 1.0, "rewards/chosen": -0.13904426991939545, "rewards/margins": 0.1699289232492447, "rewards/rejected": -0.30897319316864014, "step": 981 }, { "epoch": 1.5468873128447598, "grad_norm": 0.23337644338607788, "learning_rate": 5.984755618614443e-07, "log_odds_chosen": 1.7698785066604614, "log_odds_ratio": -0.168540820479393, "logits/chosen": -0.7300572991371155, "logits/rejected": -1.625712275505066, "logps/chosen": -1.4284956455230713, "logps/rejected": -2.9774229526519775, "loss": 1.4519, "nll_loss": 1.4350183010101318, "rewards/accuracies": 1.0, "rewards/chosen": -0.14284957945346832, "rewards/margins": 0.1548927277326584, "rewards/rejected": -0.2977423071861267, "step": 982 }, { "epoch": 1.5484633569739952, "grad_norm": 0.2333681285381317, "learning_rate": 5.945592575020199e-07, "log_odds_chosen": 1.8586804866790771, "log_odds_ratio": -0.1674220710992813, "logits/chosen": -0.5435483455657959, "logits/rejected": -1.4977864027023315, "logps/chosen": -1.3972073793411255, "logps/rejected": -3.027332067489624, "loss": 1.433, "nll_loss": 1.416232943534851, "rewards/accuracies": 1.0, "rewards/chosen": -0.13972075283527374, "rewards/margins": 0.16301245987415314, "rewards/rejected": -0.3027332127094269, "step": 983 }, { "epoch": 1.550039401103231, "grad_norm": 0.26622799038887024, "learning_rate": 5.90653570576116e-07, "log_odds_chosen": 2.3747029304504395, "log_odds_ratio": -0.10542309284210205, "logits/chosen": -0.7676352858543396, "logits/rejected": -1.6198368072509766, "logps/chosen": -1.4438918828964233, "logps/rejected": -3.5821423530578613, "loss": 1.4601, "nll_loss": 1.4495563507080078, "rewards/accuracies": 1.0, "rewards/chosen": -0.14438918232917786, "rewards/margins": 0.2138250321149826, "rewards/rejected": -0.35821419954299927, "step": 984 }, { "epoch": 1.5516154452324664, "grad_norm": 0.2317635416984558, "learning_rate": 5.867585305893315e-07, "log_odds_chosen": 1.8688759803771973, "log_odds_ratio": -0.16067832708358765, "logits/chosen": -0.7286397218704224, "logits/rejected": -1.4049956798553467, "logps/chosen": -1.366334080696106, "logps/rejected": -2.998530387878418, "loss": 1.4046, "nll_loss": 1.3885746002197266, "rewards/accuracies": 1.0, "rewards/chosen": -0.13663341104984283, "rewards/margins": 0.1632196605205536, "rewards/rejected": -0.29985305666923523, "step": 985 }, { "epoch": 1.5531914893617023, "grad_norm": 0.21900911629199982, "learning_rate": 5.828741669668337e-07, "log_odds_chosen": 2.1781530380249023, "log_odds_ratio": -0.11259491741657257, "logits/chosen": -0.6381757259368896, "logits/rejected": -1.6998456716537476, "logps/chosen": -1.4175000190734863, "logps/rejected": -3.354124069213867, "loss": 1.4296, "nll_loss": 1.4183080196380615, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417500078678131, "rewards/margins": 0.19366240501403809, "rewards/rejected": -0.3354123830795288, "step": 986 }, { "epoch": 1.5547675334909377, "grad_norm": 0.217463880777359, "learning_rate": 5.790005090531333e-07, "log_odds_chosen": 2.2625317573547363, "log_odds_ratio": -0.12319906800985336, "logits/chosen": -0.6966750621795654, "logits/rejected": -1.684138536453247, "logps/chosen": -1.4314756393432617, "logps/rejected": -3.453754425048828, "loss": 1.4485, "nll_loss": 1.4361519813537598, "rewards/accuracies": 1.0, "rewards/chosen": -0.1431475728750229, "rewards/margins": 0.2022278755903244, "rewards/rejected": -0.3453754782676697, "step": 987 }, { "epoch": 1.5563435776201735, "grad_norm": 0.22708973288536072, "learning_rate": 5.751375861118666e-07, "log_odds_chosen": 2.0565907955169678, "log_odds_ratio": -0.15445038676261902, "logits/chosen": -0.6868225932121277, "logits/rejected": -1.5475994348526, "logps/chosen": -1.444994330406189, "logps/rejected": -3.2818796634674072, "loss": 1.4526, "nll_loss": 1.437137246131897, "rewards/accuracies": 1.0, "rewards/chosen": -0.14449943602085114, "rewards/margins": 0.18368852138519287, "rewards/rejected": -0.3281879127025604, "step": 988 }, { "epoch": 1.557919621749409, "grad_norm": 0.2407715618610382, "learning_rate": 5.712854273255707e-07, "log_odds_chosen": 1.7788302898406982, "log_odds_ratio": -0.1637771874666214, "logits/chosen": -0.5679078102111816, "logits/rejected": -1.6175519227981567, "logps/chosen": -1.4289250373840332, "logps/rejected": -2.9876627922058105, "loss": 1.4662, "nll_loss": 1.4498696327209473, "rewards/accuracies": 1.0, "rewards/chosen": -0.1428924947977066, "rewards/margins": 0.15587377548217773, "rewards/rejected": -0.29876625537872314, "step": 989 }, { "epoch": 1.5594956658786447, "grad_norm": 0.21809129416942596, "learning_rate": 5.674440617954659e-07, "log_odds_chosen": 1.9170739650726318, "log_odds_ratio": -0.14835508167743683, "logits/chosen": -0.7263450026512146, "logits/rejected": -1.3957607746124268, "logps/chosen": -1.3353750705718994, "logps/rejected": -2.998387336730957, "loss": 1.3553, "nll_loss": 1.3404512405395508, "rewards/accuracies": 1.0, "rewards/chosen": -0.13353751599788666, "rewards/margins": 0.16630125045776367, "rewards/rejected": -0.29983875155448914, "step": 990 }, { "epoch": 1.5610717100078801, "grad_norm": 0.24128296971321106, "learning_rate": 5.63613518541234e-07, "log_odds_chosen": 1.9754855632781982, "log_odds_ratio": -0.15521501004695892, "logits/chosen": -0.6127564311027527, "logits/rejected": -1.4858635663986206, "logps/chosen": -1.3786113262176514, "logps/rejected": -3.1153194904327393, "loss": 1.4152, "nll_loss": 1.3997262716293335, "rewards/accuracies": 1.0, "rewards/chosen": -0.13786114752292633, "rewards/margins": 0.17367081344127655, "rewards/rejected": -0.3115319609642029, "step": 991 }, { "epoch": 1.562647754137116, "grad_norm": 0.47279778122901917, "learning_rate": 5.597938265007993e-07, "log_odds_chosen": 2.017469882965088, "log_odds_ratio": -0.1291920691728592, "logits/chosen": -0.6799546480178833, "logits/rejected": -1.6301541328430176, "logps/chosen": -1.3704148530960083, "logps/rejected": -3.135061502456665, "loss": 1.3918, "nll_loss": 1.3788890838623047, "rewards/accuracies": 1.0, "rewards/chosen": -0.13704147934913635, "rewards/margins": 0.17646467685699463, "rewards/rejected": -0.313506156206131, "step": 992 }, { "epoch": 1.5642237982663514, "grad_norm": 0.2880786061286926, "learning_rate": 5.559850145301106e-07, "log_odds_chosen": 1.670323133468628, "log_odds_ratio": -0.18176257610321045, "logits/chosen": -0.6458690166473389, "logits/rejected": -1.3920232057571411, "logps/chosen": -1.407934546470642, "logps/rejected": -2.857358694076538, "loss": 1.4419, "nll_loss": 1.4237000942230225, "rewards/accuracies": 1.0, "rewards/chosen": -0.14079347252845764, "rewards/margins": 0.14494240283966064, "rewards/rejected": -0.2857358753681183, "step": 993 }, { "epoch": 1.5657998423955872, "grad_norm": 0.5181437730789185, "learning_rate": 5.521871114029233e-07, "log_odds_chosen": 1.8897773027420044, "log_odds_ratio": -0.1437710076570511, "logits/chosen": -0.7042770385742188, "logits/rejected": -1.5562762022018433, "logps/chosen": -1.3297470808029175, "logps/rejected": -2.9654946327209473, "loss": 1.3725, "nll_loss": 1.3580961227416992, "rewards/accuracies": 1.0, "rewards/chosen": -0.13297469913959503, "rewards/margins": 0.16357475519180298, "rewards/rejected": -0.2965494394302368, "step": 994 }, { "epoch": 1.5673758865248226, "grad_norm": 0.33190351724624634, "learning_rate": 5.484001458105823e-07, "log_odds_chosen": 1.7056673765182495, "log_odds_ratio": -0.18585379421710968, "logits/chosen": -0.7297174334526062, "logits/rejected": -1.2175813913345337, "logps/chosen": -1.4123536348342896, "logps/rejected": -2.898897171020508, "loss": 1.4367, "nll_loss": 1.4181629419326782, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412353813648224, "rewards/margins": 0.14865434169769287, "rewards/rejected": -0.28988972306251526, "step": 995 }, { "epoch": 1.5689519306540585, "grad_norm": 0.2508215308189392, "learning_rate": 5.446241463618027e-07, "log_odds_chosen": 2.3727543354034424, "log_odds_ratio": -0.1468883752822876, "logits/chosen": -0.6941895484924316, "logits/rejected": -1.445560336112976, "logps/chosen": -1.4314374923706055, "logps/rejected": -3.574099540710449, "loss": 1.455, "nll_loss": 1.4403475522994995, "rewards/accuracies": 1.0, "rewards/chosen": -0.14314374327659607, "rewards/margins": 0.21426618099212646, "rewards/rejected": -0.3574099540710449, "step": 996 }, { "epoch": 1.5705279747832939, "grad_norm": 0.2683565020561218, "learning_rate": 5.40859141582457e-07, "log_odds_chosen": 2.3095686435699463, "log_odds_ratio": -0.10337819159030914, "logits/chosen": -0.7148799896240234, "logits/rejected": -1.6118800640106201, "logps/chosen": -1.2982076406478882, "logps/rejected": -3.3256571292877197, "loss": 1.32, "nll_loss": 1.3096983432769775, "rewards/accuracies": 1.0, "rewards/chosen": -0.12982076406478882, "rewards/margins": 0.2027449607849121, "rewards/rejected": -0.3325657248497009, "step": 997 }, { "epoch": 1.5721040189125297, "grad_norm": 0.260953813791275, "learning_rate": 5.371051599153582e-07, "log_odds_chosen": 1.7071082592010498, "log_odds_ratio": -0.2015705555677414, "logits/chosen": -0.6238905191421509, "logits/rejected": -1.2173104286193848, "logps/chosen": -1.3559688329696655, "logps/rejected": -2.8317506313323975, "loss": 1.3902, "nll_loss": 1.3700270652770996, "rewards/accuracies": 1.0, "rewards/chosen": -0.1355968713760376, "rewards/margins": 0.1475781798362732, "rewards/rejected": -0.2831750512123108, "step": 998 }, { "epoch": 1.573680063041765, "grad_norm": 0.24443098902702332, "learning_rate": 5.333622297200449e-07, "log_odds_chosen": 1.772143006324768, "log_odds_ratio": -0.16182053089141846, "logits/chosen": -0.6557694673538208, "logits/rejected": -1.3393594026565552, "logps/chosen": -1.3531529903411865, "logps/rejected": -2.8749842643737793, "loss": 1.3963, "nll_loss": 1.3801213502883911, "rewards/accuracies": 1.0, "rewards/chosen": -0.13531529903411865, "rewards/margins": 0.1521831452846527, "rewards/rejected": -0.287498414516449, "step": 999 }, { "epoch": 1.575256107171001, "grad_norm": 0.2207130491733551, "learning_rate": 5.296303792725676e-07, "log_odds_chosen": 2.081167221069336, "log_odds_ratio": -0.12388971447944641, "logits/chosen": -0.6979268789291382, "logits/rejected": -1.7110087871551514, "logps/chosen": -1.4649267196655273, "logps/rejected": -3.3177027702331543, "loss": 1.4761, "nll_loss": 1.463676929473877, "rewards/accuracies": 1.0, "rewards/chosen": -0.14649267494678497, "rewards/margins": 0.18527761101722717, "rewards/rejected": -0.33177027106285095, "step": 1000 } ], "logging_steps": 1, "max_steps": 1270, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }