{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1089, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002761477390403866, "grad_norm": 0.6915842294692993, "learning_rate": 0.0, "log_odds_chosen": 0.39300817251205444, "log_odds_ratio": -0.5194607377052307, "logits/chosen": -0.6593188047409058, "logits/rejected": -0.11274649202823639, "logps/chosen": -1.9585602283477783, "logps/rejected": -2.3031246662139893, "loss": 2.2362, "nll_loss": 2.1842355728149414, "rewards/accuracies": 1.0, "rewards/chosen": -0.1958560198545456, "rewards/margins": 0.034456461668014526, "rewards/rejected": -0.23031246662139893, "step": 1 }, { "epoch": 0.005522954780807732, "grad_norm": 0.8511083722114563, "learning_rate": 4.587155963302753e-08, "log_odds_chosen": 0.4412614703178406, "log_odds_ratio": -0.49888327717781067, "logits/chosen": -0.6945286393165588, "logits/rejected": -0.08449751883745193, "logps/chosen": -1.9912033081054688, "logps/rejected": -2.382397413253784, "loss": 2.275, "nll_loss": 2.2251474857330322, "rewards/accuracies": 1.0, "rewards/chosen": -0.19912034273147583, "rewards/margins": 0.0391194187104702, "rewards/rejected": -0.23823975026607513, "step": 2 }, { "epoch": 0.008284432171211598, "grad_norm": 0.6988061666488647, "learning_rate": 9.174311926605506e-08, "log_odds_chosen": 0.49348145723342896, "log_odds_ratio": -0.4824022948741913, "logits/chosen": -0.7197601795196533, "logits/rejected": -0.13051480054855347, "logps/chosen": -1.9484779834747314, "logps/rejected": -2.385488271713257, "loss": 2.1977, "nll_loss": 2.149470567703247, "rewards/accuracies": 1.0, "rewards/chosen": -0.19484779238700867, "rewards/margins": 0.04370103031396866, "rewards/rejected": -0.23854880034923553, "step": 3 }, { "epoch": 0.011045909561615464, "grad_norm": 0.6778914928436279, "learning_rate": 1.376146788990826e-07, "log_odds_chosen": 0.4488891661167145, "log_odds_ratio": -0.5013620853424072, "logits/chosen": -0.6518482565879822, "logits/rejected": -0.08967436850070953, "logps/chosen": -1.9699251651763916, "logps/rejected": -2.3685381412506104, "loss": 2.204, "nll_loss": 2.1538803577423096, "rewards/accuracies": 0.875, "rewards/chosen": -0.19699251651763916, "rewards/margins": 0.0398612916469574, "rewards/rejected": -0.23685382306575775, "step": 4 }, { "epoch": 0.013807386952019331, "grad_norm": 0.7014802694320679, "learning_rate": 1.8348623853211012e-07, "log_odds_chosen": 0.4621143639087677, "log_odds_ratio": -0.5008962750434875, "logits/chosen": -0.6857364177703857, "logits/rejected": -0.15241342782974243, "logps/chosen": -1.9302603006362915, "logps/rejected": -2.335421562194824, "loss": 2.1786, "nll_loss": 2.128532648086548, "rewards/accuracies": 1.0, "rewards/chosen": -0.19302603602409363, "rewards/margins": 0.04051613062620163, "rewards/rejected": -0.23354215919971466, "step": 5 }, { "epoch": 0.016568864342423197, "grad_norm": 0.6745293140411377, "learning_rate": 2.2935779816513764e-07, "log_odds_chosen": 0.37698638439178467, "log_odds_ratio": -0.5254199504852295, "logits/chosen": -0.6411757469177246, "logits/rejected": -0.2817387282848358, "logps/chosen": -1.9658446311950684, "logps/rejected": -2.2975409030914307, "loss": 2.2215, "nll_loss": 2.168914794921875, "rewards/accuracies": 1.0, "rewards/chosen": -0.19658446311950684, "rewards/margins": 0.03316962346434593, "rewards/rejected": -0.22975412011146545, "step": 6 }, { "epoch": 0.019330341732827064, "grad_norm": 0.6645856499671936, "learning_rate": 2.752293577981652e-07, "log_odds_chosen": 0.22311115264892578, "log_odds_ratio": -0.5942606329917908, "logits/chosen": -0.5768107175827026, "logits/rejected": -0.23452866077423096, "logps/chosen": -2.0238585472106934, "logps/rejected": -2.223414421081543, "loss": 2.2796, "nll_loss": 2.2201786041259766, "rewards/accuracies": 0.875, "rewards/chosen": -0.20238585770130157, "rewards/margins": 0.019955584779381752, "rewards/rejected": -0.22234144806861877, "step": 7 }, { "epoch": 0.022091819123230928, "grad_norm": 0.7377482652664185, "learning_rate": 3.211009174311927e-07, "log_odds_chosen": 0.39164018630981445, "log_odds_ratio": -0.5184764266014099, "logits/chosen": -0.566084623336792, "logits/rejected": -0.12580394744873047, "logps/chosen": -2.0374059677124023, "logps/rejected": -2.3862533569335938, "loss": 2.3026, "nll_loss": 2.2507545948028564, "rewards/accuracies": 1.0, "rewards/chosen": -0.20374059677124023, "rewards/margins": 0.0348847433924675, "rewards/rejected": -0.23862534761428833, "step": 8 }, { "epoch": 0.024853296513634795, "grad_norm": 0.7108282446861267, "learning_rate": 3.6697247706422023e-07, "log_odds_chosen": 0.6321737170219421, "log_odds_ratio": -0.4328705072402954, "logits/chosen": -0.7941892743110657, "logits/rejected": -0.22760047018527985, "logps/chosen": -1.9183871746063232, "logps/rejected": -2.4786453247070312, "loss": 2.1856, "nll_loss": 2.142301082611084, "rewards/accuracies": 1.0, "rewards/chosen": -0.19183871150016785, "rewards/margins": 0.05602581799030304, "rewards/rejected": -0.24786454439163208, "step": 9 }, { "epoch": 0.027614773904038662, "grad_norm": 0.8324993252754211, "learning_rate": 4.128440366972478e-07, "log_odds_chosen": 0.39953452348709106, "log_odds_ratio": -0.5179459452629089, "logits/chosen": -0.7621469497680664, "logits/rejected": -0.25276613235473633, "logps/chosen": -2.0076873302459717, "logps/rejected": -2.3631021976470947, "loss": 2.2827, "nll_loss": 2.2309515476226807, "rewards/accuracies": 1.0, "rewards/chosen": -0.20076872408390045, "rewards/margins": 0.03554149717092514, "rewards/rejected": -0.23631024360656738, "step": 10 }, { "epoch": 0.030376251294442526, "grad_norm": 0.7557547092437744, "learning_rate": 4.587155963302753e-07, "log_odds_chosen": 0.5315287113189697, "log_odds_ratio": -0.4713534414768219, "logits/chosen": -0.6893935203552246, "logits/rejected": -0.10655307024717331, "logps/chosen": -1.9816298484802246, "logps/rejected": -2.457181930541992, "loss": 2.257, "nll_loss": 2.209847927093506, "rewards/accuracies": 0.875, "rewards/chosen": -0.1981630027294159, "rewards/margins": 0.04755519703030586, "rewards/rejected": -0.24571821093559265, "step": 11 }, { "epoch": 0.03313772868484639, "grad_norm": 0.7169145941734314, "learning_rate": 5.045871559633028e-07, "log_odds_chosen": 0.38786107301712036, "log_odds_ratio": -0.5263106822967529, "logits/chosen": -0.6816898584365845, "logits/rejected": -0.0623253732919693, "logps/chosen": -1.9207288026809692, "logps/rejected": -2.262866973876953, "loss": 2.1985, "nll_loss": 2.145916223526001, "rewards/accuracies": 0.875, "rewards/chosen": -0.19207286834716797, "rewards/margins": 0.0342138335108757, "rewards/rejected": -0.22628670930862427, "step": 12 }, { "epoch": 0.03589920607525026, "grad_norm": 0.6504296660423279, "learning_rate": 5.504587155963304e-07, "log_odds_chosen": 0.47358548641204834, "log_odds_ratio": -0.48640355467796326, "logits/chosen": -0.6333162784576416, "logits/rejected": -0.1628946214914322, "logps/chosen": -1.867828369140625, "logps/rejected": -2.280285596847534, "loss": 2.1385, "nll_loss": 2.089879035949707, "rewards/accuracies": 1.0, "rewards/chosen": -0.1867828369140625, "rewards/margins": 0.0412457212805748, "rewards/rejected": -0.2280285507440567, "step": 13 }, { "epoch": 0.03866068346565413, "grad_norm": 0.83363276720047, "learning_rate": 5.963302752293579e-07, "log_odds_chosen": 0.4213864207267761, "log_odds_ratio": -0.521136999130249, "logits/chosen": -0.7305208444595337, "logits/rejected": -0.08356200903654099, "logps/chosen": -2.022002935409546, "logps/rejected": -2.3972954750061035, "loss": 2.2795, "nll_loss": 2.2273383140563965, "rewards/accuracies": 0.75, "rewards/chosen": -0.2022002935409546, "rewards/margins": 0.03752923756837845, "rewards/rejected": -0.23972955346107483, "step": 14 }, { "epoch": 0.04142216085605799, "grad_norm": 0.8258711099624634, "learning_rate": 6.422018348623854e-07, "log_odds_chosen": 0.5449070930480957, "log_odds_ratio": -0.4581969678401947, "logits/chosen": -0.8105592131614685, "logits/rejected": 0.08067083358764648, "logps/chosen": -1.9474968910217285, "logps/rejected": -2.428217649459839, "loss": 2.2093, "nll_loss": 2.163527488708496, "rewards/accuracies": 1.0, "rewards/chosen": -0.19474971294403076, "rewards/margins": 0.04807208105921745, "rewards/rejected": -0.24282175302505493, "step": 15 }, { "epoch": 0.044183638246461855, "grad_norm": 0.7511261701583862, "learning_rate": 6.880733944954129e-07, "log_odds_chosen": 0.4355267882347107, "log_odds_ratio": -0.5088008046150208, "logits/chosen": -0.7654774188995361, "logits/rejected": 0.09955525398254395, "logps/chosen": -1.873271107673645, "logps/rejected": -2.249717950820923, "loss": 2.1545, "nll_loss": 2.1036317348480225, "rewards/accuracies": 0.875, "rewards/chosen": -0.18732713162899017, "rewards/margins": 0.03764466941356659, "rewards/rejected": -0.22497178614139557, "step": 16 }, { "epoch": 0.04694511563686572, "grad_norm": 0.7405815124511719, "learning_rate": 7.339449541284405e-07, "log_odds_chosen": 0.49768519401550293, "log_odds_ratio": -0.4804110527038574, "logits/chosen": -0.6771326661109924, "logits/rejected": 0.09095388650894165, "logps/chosen": -1.9401291608810425, "logps/rejected": -2.3801112174987793, "loss": 2.1836, "nll_loss": 2.1355254650115967, "rewards/accuracies": 1.0, "rewards/chosen": -0.19401292502880096, "rewards/margins": 0.04399820417165756, "rewards/rejected": -0.23801112174987793, "step": 17 }, { "epoch": 0.04970659302726959, "grad_norm": 0.7064201831817627, "learning_rate": 7.79816513761468e-07, "log_odds_chosen": 0.6101190447807312, "log_odds_ratio": -0.43982943892478943, "logits/chosen": -0.6823788285255432, "logits/rejected": -0.1550363153219223, "logps/chosen": -1.9634783267974854, "logps/rejected": -2.509146213531494, "loss": 2.2062, "nll_loss": 2.162249803543091, "rewards/accuracies": 1.0, "rewards/chosen": -0.19634784758090973, "rewards/margins": 0.05456679314374924, "rewards/rejected": -0.25091463327407837, "step": 18 }, { "epoch": 0.05246807041767346, "grad_norm": 0.709898829460144, "learning_rate": 8.256880733944956e-07, "log_odds_chosen": 0.22375424206256866, "log_odds_ratio": -0.5918958783149719, "logits/chosen": -0.6519614458084106, "logits/rejected": -0.030727151781320572, "logps/chosen": -1.9863125085830688, "logps/rejected": -2.1811602115631104, "loss": 2.2528, "nll_loss": 2.193619966506958, "rewards/accuracies": 0.875, "rewards/chosen": -0.19863125681877136, "rewards/margins": 0.01948476769030094, "rewards/rejected": -0.21811603009700775, "step": 19 }, { "epoch": 0.055229547808077324, "grad_norm": 0.7068823575973511, "learning_rate": 8.71559633027523e-07, "log_odds_chosen": 0.4315674602985382, "log_odds_ratio": -0.5065562129020691, "logits/chosen": -0.6818419694900513, "logits/rejected": -0.010788477957248688, "logps/chosen": -1.9227614402770996, "logps/rejected": -2.3006279468536377, "loss": 2.205, "nll_loss": 2.1543147563934326, "rewards/accuracies": 1.0, "rewards/chosen": -0.19227616488933563, "rewards/margins": 0.03778664767742157, "rewards/rejected": -0.23006278276443481, "step": 20 }, { "epoch": 0.057991025198481184, "grad_norm": 0.7781582474708557, "learning_rate": 9.174311926605506e-07, "log_odds_chosen": 0.2822119891643524, "log_odds_ratio": -0.5701829195022583, "logits/chosen": -0.5542811155319214, "logits/rejected": 0.0044986791908741, "logps/chosen": -2.092810869216919, "logps/rejected": -2.3459970951080322, "loss": 2.339, "nll_loss": 2.2819888591766357, "rewards/accuracies": 0.75, "rewards/chosen": -0.2092810869216919, "rewards/margins": 0.025318622589111328, "rewards/rejected": -0.23459972441196442, "step": 21 }, { "epoch": 0.06075250258888505, "grad_norm": 0.7080368399620056, "learning_rate": 9.633027522935782e-07, "log_odds_chosen": 0.46356773376464844, "log_odds_ratio": -0.504152774810791, "logits/chosen": -0.6566605567932129, "logits/rejected": -0.23016926646232605, "logps/chosen": -1.9449937343597412, "logps/rejected": -2.354017972946167, "loss": 2.1914, "nll_loss": 2.1410012245178223, "rewards/accuracies": 0.875, "rewards/chosen": -0.19449937343597412, "rewards/margins": 0.040902428328990936, "rewards/rejected": -0.23540180921554565, "step": 22 }, { "epoch": 0.06351397997928893, "grad_norm": 0.7032366394996643, "learning_rate": 1.0091743119266057e-06, "log_odds_chosen": 0.41283032298088074, "log_odds_ratio": -0.5154061913490295, "logits/chosen": -0.6851422786712646, "logits/rejected": -0.43942469358444214, "logps/chosen": -1.9216886758804321, "logps/rejected": -2.2877392768859863, "loss": 2.2005, "nll_loss": 2.1489334106445312, "rewards/accuracies": 1.0, "rewards/chosen": -0.19216886162757874, "rewards/margins": 0.03660505264997482, "rewards/rejected": -0.22877395153045654, "step": 23 }, { "epoch": 0.06627545736969279, "grad_norm": 0.759819746017456, "learning_rate": 1.055045871559633e-06, "log_odds_chosen": 0.5156121253967285, "log_odds_ratio": -0.46936866641044617, "logits/chosen": -0.6876986622810364, "logits/rejected": -0.031957462430000305, "logps/chosen": -1.9351065158843994, "logps/rejected": -2.390742063522339, "loss": 2.1928, "nll_loss": 2.1458399295806885, "rewards/accuracies": 1.0, "rewards/chosen": -0.19351065158843994, "rewards/margins": 0.04556357115507126, "rewards/rejected": -0.2390742301940918, "step": 24 }, { "epoch": 0.06903693476009665, "grad_norm": 0.8110620379447937, "learning_rate": 1.1009174311926608e-06, "log_odds_chosen": 0.6373166441917419, "log_odds_ratio": -0.4303508996963501, "logits/chosen": -0.6892279982566833, "logits/rejected": -0.09929540753364563, "logps/chosen": -1.9359254837036133, "logps/rejected": -2.5030038356781006, "loss": 2.2035, "nll_loss": 2.1604182720184326, "rewards/accuracies": 1.0, "rewards/chosen": -0.19359253346920013, "rewards/margins": 0.05670783296227455, "rewards/rejected": -0.2503003776073456, "step": 25 }, { "epoch": 0.07179841215050052, "grad_norm": 0.7390461564064026, "learning_rate": 1.1467889908256882e-06, "log_odds_chosen": 0.36132410168647766, "log_odds_ratio": -0.5341425538063049, "logits/chosen": -0.6445504426956177, "logits/rejected": -0.1332886964082718, "logps/chosen": -1.9676666259765625, "logps/rejected": -2.2869884967803955, "loss": 2.2373, "nll_loss": 2.1838440895080566, "rewards/accuracies": 1.0, "rewards/chosen": -0.1967666745185852, "rewards/margins": 0.03193218633532524, "rewards/rejected": -0.22869886457920074, "step": 26 }, { "epoch": 0.07455988954090438, "grad_norm": 0.6214093565940857, "learning_rate": 1.1926605504587159e-06, "log_odds_chosen": 0.4441767632961273, "log_odds_ratio": -0.5069965124130249, "logits/chosen": -0.5817967653274536, "logits/rejected": -0.287628710269928, "logps/chosen": -1.8485782146453857, "logps/rejected": -2.2337605953216553, "loss": 2.1166, "nll_loss": 2.065877914428711, "rewards/accuracies": 1.0, "rewards/chosen": -0.1848578304052353, "rewards/margins": 0.03851822763681412, "rewards/rejected": -0.22337606549263, "step": 27 }, { "epoch": 0.07732136693130826, "grad_norm": 0.6619555950164795, "learning_rate": 1.2385321100917433e-06, "log_odds_chosen": 0.5049865245819092, "log_odds_ratio": -0.47638368606567383, "logits/chosen": -0.6138817071914673, "logits/rejected": -0.2531018853187561, "logps/chosen": -1.9172720909118652, "logps/rejected": -2.361618995666504, "loss": 2.1756, "nll_loss": 2.127944231033325, "rewards/accuracies": 1.0, "rewards/chosen": -0.19172722101211548, "rewards/margins": 0.04443468153476715, "rewards/rejected": -0.23616188764572144, "step": 28 }, { "epoch": 0.08008284432171212, "grad_norm": 0.6942340135574341, "learning_rate": 1.2844036697247707e-06, "log_odds_chosen": 0.4293629825115204, "log_odds_ratio": -0.5098441243171692, "logits/chosen": -0.6483147740364075, "logits/rejected": -0.05790426582098007, "logps/chosen": -2.0040318965911865, "logps/rejected": -2.3848392963409424, "loss": 2.2622, "nll_loss": 2.2111713886260986, "rewards/accuracies": 1.0, "rewards/chosen": -0.20040319859981537, "rewards/margins": 0.03808073699474335, "rewards/rejected": -0.23848393559455872, "step": 29 }, { "epoch": 0.08284432171211598, "grad_norm": 0.7068150043487549, "learning_rate": 1.3302752293577984e-06, "log_odds_chosen": 0.6293392181396484, "log_odds_ratio": -0.43408164381980896, "logits/chosen": -0.5528810024261475, "logits/rejected": -0.038134124130010605, "logps/chosen": -1.9628605842590332, "logps/rejected": -2.524247407913208, "loss": 2.2025, "nll_loss": 2.1590917110443115, "rewards/accuracies": 1.0, "rewards/chosen": -0.19628606736660004, "rewards/margins": 0.05613870173692703, "rewards/rejected": -0.25242477655410767, "step": 30 }, { "epoch": 0.08560579910251985, "grad_norm": 0.7285196185112, "learning_rate": 1.3761467889908258e-06, "log_odds_chosen": 0.5635695457458496, "log_odds_ratio": -0.4573448598384857, "logits/chosen": -0.6580309271812439, "logits/rejected": -0.1388925164937973, "logps/chosen": -1.97215735912323, "logps/rejected": -2.47501540184021, "loss": 2.2267, "nll_loss": 2.1810009479522705, "rewards/accuracies": 1.0, "rewards/chosen": -0.1972157210111618, "rewards/margins": 0.050285838544368744, "rewards/rejected": -0.24750158190727234, "step": 31 }, { "epoch": 0.08836727649292371, "grad_norm": 0.663361132144928, "learning_rate": 1.4220183486238535e-06, "log_odds_chosen": 0.4357861876487732, "log_odds_ratio": -0.5044962763786316, "logits/chosen": -0.5250373482704163, "logits/rejected": -0.026727374643087387, "logps/chosen": -1.9720088243484497, "logps/rejected": -2.3577969074249268, "loss": 2.2324, "nll_loss": 2.1819026470184326, "rewards/accuracies": 1.0, "rewards/chosen": -0.19720089435577393, "rewards/margins": 0.0385788157582283, "rewards/rejected": -0.23577968776226044, "step": 32 }, { "epoch": 0.09112875388332758, "grad_norm": 0.6460317373275757, "learning_rate": 1.467889908256881e-06, "log_odds_chosen": 0.5821229219436646, "log_odds_ratio": -0.45059195160865784, "logits/chosen": -0.5484606027603149, "logits/rejected": -0.17182235419750214, "logps/chosen": -1.8812311887741089, "logps/rejected": -2.394681453704834, "loss": 2.1347, "nll_loss": 2.0896193981170654, "rewards/accuracies": 1.0, "rewards/chosen": -0.18812312185764313, "rewards/margins": 0.05134502053260803, "rewards/rejected": -0.23946812748908997, "step": 33 }, { "epoch": 0.09389023127373144, "grad_norm": 0.6461722254753113, "learning_rate": 1.5137614678899084e-06, "log_odds_chosen": 0.5589693784713745, "log_odds_ratio": -0.466278612613678, "logits/chosen": -0.5394806861877441, "logits/rejected": -0.11535287648439407, "logps/chosen": -1.8948417901992798, "logps/rejected": -2.3906807899475098, "loss": 2.1429, "nll_loss": 2.0962626934051514, "rewards/accuracies": 1.0, "rewards/chosen": -0.18948417901992798, "rewards/margins": 0.04958389326930046, "rewards/rejected": -0.23906809091567993, "step": 34 }, { "epoch": 0.09665170866413532, "grad_norm": 0.6749213933944702, "learning_rate": 1.559633027522936e-06, "log_odds_chosen": 0.4189353883266449, "log_odds_ratio": -0.5096418261528015, "logits/chosen": -0.8145462274551392, "logits/rejected": -0.184108167886734, "logps/chosen": -1.9128409624099731, "logps/rejected": -2.278416395187378, "loss": 2.1804, "nll_loss": 2.12943172454834, "rewards/accuracies": 1.0, "rewards/chosen": -0.19128410518169403, "rewards/margins": 0.036557577550411224, "rewards/rejected": -0.22784166038036346, "step": 35 }, { "epoch": 0.09941318605453918, "grad_norm": 0.7020057439804077, "learning_rate": 1.6055045871559635e-06, "log_odds_chosen": 0.5992317199707031, "log_odds_ratio": -0.4454975128173828, "logits/chosen": -0.755850613117218, "logits/rejected": -0.0925610214471817, "logps/chosen": -1.921108603477478, "logps/rejected": -2.4531819820404053, "loss": 2.1777, "nll_loss": 2.1331448554992676, "rewards/accuracies": 1.0, "rewards/chosen": -0.19211086630821228, "rewards/margins": 0.05320734530687332, "rewards/rejected": -0.245318204164505, "step": 36 }, { "epoch": 0.10217466344494304, "grad_norm": 0.7380937337875366, "learning_rate": 1.6513761467889911e-06, "log_odds_chosen": 0.5436604619026184, "log_odds_ratio": -0.4642760455608368, "logits/chosen": -0.5516951680183411, "logits/rejected": -0.17646433413028717, "logps/chosen": -2.041116952896118, "logps/rejected": -2.531487226486206, "loss": 2.3149, "nll_loss": 2.268436908721924, "rewards/accuracies": 1.0, "rewards/chosen": -0.20411169528961182, "rewards/margins": 0.04903702437877655, "rewards/rejected": -0.25314873456954956, "step": 37 }, { "epoch": 0.10493614083534691, "grad_norm": 0.697014331817627, "learning_rate": 1.6972477064220186e-06, "log_odds_chosen": 0.3754476308822632, "log_odds_ratio": -0.5259881615638733, "logits/chosen": -0.6446620225906372, "logits/rejected": -0.2302635908126831, "logps/chosen": -1.916767954826355, "logps/rejected": -2.246654748916626, "loss": 2.1948, "nll_loss": 2.142183303833008, "rewards/accuracies": 1.0, "rewards/chosen": -0.1916767954826355, "rewards/margins": 0.032988667488098145, "rewards/rejected": -0.22466546297073364, "step": 38 }, { "epoch": 0.10769761822575077, "grad_norm": 0.6962437033653259, "learning_rate": 1.743119266055046e-06, "log_odds_chosen": 0.4513578414916992, "log_odds_ratio": -0.5085355043411255, "logits/chosen": -0.5062450170516968, "logits/rejected": -0.0742938369512558, "logps/chosen": -2.0077242851257324, "logps/rejected": -2.4134199619293213, "loss": 2.2615, "nll_loss": 2.2106730937957764, "rewards/accuracies": 0.875, "rewards/chosen": -0.20077240467071533, "rewards/margins": 0.04056959226727486, "rewards/rejected": -0.24134202301502228, "step": 39 }, { "epoch": 0.11045909561615465, "grad_norm": 0.8404136896133423, "learning_rate": 1.7889908256880737e-06, "log_odds_chosen": 0.26594164967536926, "log_odds_ratio": -0.5785322189331055, "logits/chosen": -0.7206758260726929, "logits/rejected": -0.2084624469280243, "logps/chosen": -2.028856039047241, "logps/rejected": -2.2659192085266113, "loss": 2.3049, "nll_loss": 2.2470221519470215, "rewards/accuracies": 0.75, "rewards/chosen": -0.20288559794425964, "rewards/margins": 0.023706313222646713, "rewards/rejected": -0.22659191489219666, "step": 40 }, { "epoch": 0.11322057300655851, "grad_norm": 0.6305637359619141, "learning_rate": 1.8348623853211011e-06, "log_odds_chosen": 0.4789700508117676, "log_odds_ratio": -0.4894922971725464, "logits/chosen": -0.6578766107559204, "logits/rejected": -0.20148660242557526, "logps/chosen": -1.876691460609436, "logps/rejected": -2.2974884510040283, "loss": 2.1332, "nll_loss": 2.0842790603637695, "rewards/accuracies": 1.0, "rewards/chosen": -0.18766914308071136, "rewards/margins": 0.042079709470272064, "rewards/rejected": -0.22974886000156403, "step": 41 }, { "epoch": 0.11598205039696237, "grad_norm": 0.764166533946991, "learning_rate": 1.8807339449541288e-06, "log_odds_chosen": 0.32667985558509827, "log_odds_ratio": -0.5534327030181885, "logits/chosen": -0.6471579074859619, "logits/rejected": -0.24204234778881073, "logps/chosen": -2.054783582687378, "logps/rejected": -2.3455071449279785, "loss": 2.324, "nll_loss": 2.268658399581909, "rewards/accuracies": 0.75, "rewards/chosen": -0.20547834038734436, "rewards/margins": 0.0290723517537117, "rewards/rejected": -0.23455071449279785, "step": 42 }, { "epoch": 0.11874352778736624, "grad_norm": 0.6828538775444031, "learning_rate": 1.9266055045871564e-06, "log_odds_chosen": 0.39937809109687805, "log_odds_ratio": -0.5205667018890381, "logits/chosen": -0.6188192367553711, "logits/rejected": -0.2733380198478699, "logps/chosen": -1.9134750366210938, "logps/rejected": -2.2601161003112793, "loss": 2.1861, "nll_loss": 2.134024143218994, "rewards/accuracies": 0.875, "rewards/chosen": -0.19134750962257385, "rewards/margins": 0.0346641018986702, "rewards/rejected": -0.22601160407066345, "step": 43 }, { "epoch": 0.1215050051777701, "grad_norm": 0.8159171938896179, "learning_rate": 1.9724770642201837e-06, "log_odds_chosen": 0.32455721497535706, "log_odds_ratio": -0.5553261041641235, "logits/chosen": -0.669414758682251, "logits/rejected": -0.2208831012248993, "logps/chosen": -2.0293149948120117, "logps/rejected": -2.317753314971924, "loss": 2.2895, "nll_loss": 2.233963966369629, "rewards/accuracies": 0.75, "rewards/chosen": -0.20293152332305908, "rewards/margins": 0.028843821957707405, "rewards/rejected": -0.23177534341812134, "step": 44 }, { "epoch": 0.12426648256817398, "grad_norm": 0.6607564091682434, "learning_rate": 2.0183486238532113e-06, "log_odds_chosen": 0.615048885345459, "log_odds_ratio": -0.4402877390384674, "logits/chosen": -0.6855505704879761, "logits/rejected": -0.38655874133110046, "logps/chosen": -1.9126825332641602, "logps/rejected": -2.459097385406494, "loss": 2.1594, "nll_loss": 2.115415096282959, "rewards/accuracies": 1.0, "rewards/chosen": -0.19126826524734497, "rewards/margins": 0.054641470313072205, "rewards/rejected": -0.24590973556041718, "step": 45 }, { "epoch": 0.12702795995857785, "grad_norm": 0.6284722685813904, "learning_rate": 2.064220183486239e-06, "log_odds_chosen": 0.5434573888778687, "log_odds_ratio": -0.467115193605423, "logits/chosen": -0.5798700451850891, "logits/rejected": -0.13538922369480133, "logps/chosen": -1.7848091125488281, "logps/rejected": -2.2502312660217285, "loss": 2.0394, "nll_loss": 1.99272620677948, "rewards/accuracies": 1.0, "rewards/chosen": -0.17848090827465057, "rewards/margins": 0.046542223542928696, "rewards/rejected": -0.22502315044403076, "step": 46 }, { "epoch": 0.1297894373489817, "grad_norm": 0.6467039585113525, "learning_rate": 2.110091743119266e-06, "log_odds_chosen": 0.3773024082183838, "log_odds_ratio": -0.5347570180892944, "logits/chosen": -0.559330403804779, "logits/rejected": -0.16529709100723267, "logps/chosen": -2.0019242763519287, "logps/rejected": -2.339775323867798, "loss": 2.2632, "nll_loss": 2.209690809249878, "rewards/accuracies": 0.875, "rewards/chosen": -0.2001924067735672, "rewards/margins": 0.0337851420044899, "rewards/rejected": -0.2339775562286377, "step": 47 }, { "epoch": 0.13255091473938557, "grad_norm": 0.803294837474823, "learning_rate": 2.155963302752294e-06, "log_odds_chosen": 0.16557860374450684, "log_odds_ratio": -0.6379687190055847, "logits/chosen": -0.6091009378433228, "logits/rejected": -0.18343707919120789, "logps/chosen": -2.083127975463867, "logps/rejected": -2.241791009902954, "loss": 2.3479, "nll_loss": 2.2841379642486572, "rewards/accuracies": 0.875, "rewards/chosen": -0.20831279456615448, "rewards/margins": 0.015866274014115334, "rewards/rejected": -0.22417910397052765, "step": 48 }, { "epoch": 0.13531239212978943, "grad_norm": 0.7350826263427734, "learning_rate": 2.2018348623853215e-06, "log_odds_chosen": 0.663620114326477, "log_odds_ratio": -0.430046409368515, "logits/chosen": -0.6330848336219788, "logits/rejected": -0.12406854331493378, "logps/chosen": -1.9958436489105225, "logps/rejected": -2.595897912979126, "loss": 2.2445, "nll_loss": 2.201472282409668, "rewards/accuracies": 1.0, "rewards/chosen": -0.19958436489105225, "rewards/margins": 0.06000541150569916, "rewards/rejected": -0.2595897912979126, "step": 49 }, { "epoch": 0.1380738695201933, "grad_norm": 0.6953707337379456, "learning_rate": 2.2477064220183487e-06, "log_odds_chosen": 0.558549702167511, "log_odds_ratio": -0.4727778434753418, "logits/chosen": -0.6551685333251953, "logits/rejected": -0.10959107428789139, "logps/chosen": -1.9449548721313477, "logps/rejected": -2.4448442459106445, "loss": 2.1886, "nll_loss": 2.1412806510925293, "rewards/accuracies": 0.875, "rewards/chosen": -0.19449549913406372, "rewards/margins": 0.04998895525932312, "rewards/rejected": -0.24448445439338684, "step": 50 }, { "epoch": 0.14083534691059718, "grad_norm": 0.7255063056945801, "learning_rate": 2.2935779816513764e-06, "log_odds_chosen": 0.439910352230072, "log_odds_ratio": -0.5028793811798096, "logits/chosen": -0.5782126784324646, "logits/rejected": -0.3915577232837677, "logps/chosen": -1.9679124355316162, "logps/rejected": -2.3573005199432373, "loss": 2.2331, "nll_loss": 2.1828465461730957, "rewards/accuracies": 1.0, "rewards/chosen": -0.19679124653339386, "rewards/margins": 0.03893881291151047, "rewards/rejected": -0.23573008179664612, "step": 51 }, { "epoch": 0.14359682430100104, "grad_norm": 0.6826524138450623, "learning_rate": 2.339449541284404e-06, "log_odds_chosen": 0.4098314940929413, "log_odds_ratio": -0.5124779939651489, "logits/chosen": -0.5287759304046631, "logits/rejected": -0.16041654348373413, "logps/chosen": -1.9367855787277222, "logps/rejected": -2.296450614929199, "loss": 2.1899, "nll_loss": 2.1386733055114746, "rewards/accuracies": 1.0, "rewards/chosen": -0.19367855787277222, "rewards/margins": 0.03596651181578636, "rewards/rejected": -0.22964505851268768, "step": 52 }, { "epoch": 0.1463583016914049, "grad_norm": 0.7680190801620483, "learning_rate": 2.3853211009174317e-06, "log_odds_chosen": 0.3706192970275879, "log_odds_ratio": -0.5296811461448669, "logits/chosen": -0.707636833190918, "logits/rejected": -0.3318819999694824, "logps/chosen": -1.9044169187545776, "logps/rejected": -2.228182315826416, "loss": 2.1714, "nll_loss": 2.118480920791626, "rewards/accuracies": 1.0, "rewards/chosen": -0.19044168293476105, "rewards/margins": 0.03237656503915787, "rewards/rejected": -0.2228182554244995, "step": 53 }, { "epoch": 0.14911977908180876, "grad_norm": 0.6361218690872192, "learning_rate": 2.431192660550459e-06, "log_odds_chosen": 0.37140461802482605, "log_odds_ratio": -0.5330734848976135, "logits/chosen": -0.5706905126571655, "logits/rejected": -0.2818271815776825, "logps/chosen": -1.8717925548553467, "logps/rejected": -2.1973752975463867, "loss": 2.1391, "nll_loss": 2.0858311653137207, "rewards/accuracies": 0.875, "rewards/chosen": -0.18717925250530243, "rewards/margins": 0.03255828469991684, "rewards/rejected": -0.21973752975463867, "step": 54 }, { "epoch": 0.15188125647221262, "grad_norm": 0.6758925914764404, "learning_rate": 2.4770642201834866e-06, "log_odds_chosen": 0.351237416267395, "log_odds_ratio": -0.5363888740539551, "logits/chosen": -0.6902980804443359, "logits/rejected": -0.1499267816543579, "logps/chosen": -1.9404265880584717, "logps/rejected": -2.248072624206543, "loss": 2.2093, "nll_loss": 2.155627965927124, "rewards/accuracies": 1.0, "rewards/chosen": -0.19404268264770508, "rewards/margins": 0.030764613300561905, "rewards/rejected": -0.2248072773218155, "step": 55 }, { "epoch": 0.1546427338626165, "grad_norm": 0.7609654664993286, "learning_rate": 2.522935779816514e-06, "log_odds_chosen": 0.6203876733779907, "log_odds_ratio": -0.4391648769378662, "logits/chosen": -0.6777594685554504, "logits/rejected": -0.12744097411632538, "logps/chosen": -1.920168161392212, "logps/rejected": -2.470646381378174, "loss": 2.2005, "nll_loss": 2.1565370559692383, "rewards/accuracies": 1.0, "rewards/chosen": -0.1920168101787567, "rewards/margins": 0.055047836154699326, "rewards/rejected": -0.24706465005874634, "step": 56 }, { "epoch": 0.15740421125302037, "grad_norm": 0.6103418469429016, "learning_rate": 2.5688073394495415e-06, "log_odds_chosen": 0.3935878276824951, "log_odds_ratio": -0.52540522813797, "logits/chosen": -0.5779827833175659, "logits/rejected": -0.07134772837162018, "logps/chosen": -1.8075436353683472, "logps/rejected": -2.1494383811950684, "loss": 2.0685, "nll_loss": 2.0159270763397217, "rewards/accuracies": 0.875, "rewards/chosen": -0.18075433373451233, "rewards/margins": 0.03418949246406555, "rewards/rejected": -0.21494384109973907, "step": 57 }, { "epoch": 0.16016568864342423, "grad_norm": 0.6918542981147766, "learning_rate": 2.6146788990825687e-06, "log_odds_chosen": 0.508127748966217, "log_odds_ratio": -0.48639971017837524, "logits/chosen": -0.5526314377784729, "logits/rejected": -0.17470352351665497, "logps/chosen": -1.9131710529327393, "logps/rejected": -2.359304904937744, "loss": 2.1784, "nll_loss": 2.1297237873077393, "rewards/accuracies": 0.875, "rewards/chosen": -0.1913171112537384, "rewards/margins": 0.044613372534513474, "rewards/rejected": -0.23593048751354218, "step": 58 }, { "epoch": 0.1629271660338281, "grad_norm": 0.6938620209693909, "learning_rate": 2.6605504587155968e-06, "log_odds_chosen": 0.2925473153591156, "log_odds_ratio": -0.563460111618042, "logits/chosen": -0.5527081489562988, "logits/rejected": -0.2188190072774887, "logps/chosen": -1.9204390048980713, "logps/rejected": -2.175715684890747, "loss": 2.1995, "nll_loss": 2.1431734561920166, "rewards/accuracies": 1.0, "rewards/chosen": -0.19204390048980713, "rewards/margins": 0.025527678430080414, "rewards/rejected": -0.21757157146930695, "step": 59 }, { "epoch": 0.16568864342423195, "grad_norm": 0.6408482193946838, "learning_rate": 2.706422018348624e-06, "log_odds_chosen": 0.3286433815956116, "log_odds_ratio": -0.5471742749214172, "logits/chosen": -0.5031524300575256, "logits/rejected": -0.016629882156848907, "logps/chosen": -1.9514718055725098, "logps/rejected": -2.240278720855713, "loss": 2.2124, "nll_loss": 2.157729387283325, "rewards/accuracies": 0.875, "rewards/chosen": -0.19514718651771545, "rewards/margins": 0.028880706056952477, "rewards/rejected": -0.22402788698673248, "step": 60 }, { "epoch": 0.16845012081463584, "grad_norm": 0.5977794528007507, "learning_rate": 2.7522935779816517e-06, "log_odds_chosen": 0.6263030767440796, "log_odds_ratio": -0.43304184079170227, "logits/chosen": -0.5808561444282532, "logits/rejected": -0.16479042172431946, "logps/chosen": -1.874776840209961, "logps/rejected": -2.425903797149658, "loss": 2.1062, "nll_loss": 2.0628535747528076, "rewards/accuracies": 1.0, "rewards/chosen": -0.18747767806053162, "rewards/margins": 0.055112697184085846, "rewards/rejected": -0.24259036779403687, "step": 61 }, { "epoch": 0.1712115982050397, "grad_norm": 0.7107025980949402, "learning_rate": 2.798165137614679e-06, "log_odds_chosen": 0.38438883423805237, "log_odds_ratio": -0.5274062752723694, "logits/chosen": -0.5282328724861145, "logits/rejected": 0.017122909426689148, "logps/chosen": -2.0265655517578125, "logps/rejected": -2.3674638271331787, "loss": 2.2774, "nll_loss": 2.224700927734375, "rewards/accuracies": 1.0, "rewards/chosen": -0.202656552195549, "rewards/margins": 0.034089840948581696, "rewards/rejected": -0.2367464005947113, "step": 62 }, { "epoch": 0.17397307559544356, "grad_norm": 0.6914030313491821, "learning_rate": 2.844036697247707e-06, "log_odds_chosen": 0.4569448232650757, "log_odds_ratio": -0.4947759211063385, "logits/chosen": -0.564735472202301, "logits/rejected": -0.17074617743492126, "logps/chosen": -1.9550468921661377, "logps/rejected": -2.357658624649048, "loss": 2.1959, "nll_loss": 2.14642596244812, "rewards/accuracies": 1.0, "rewards/chosen": -0.19550471007823944, "rewards/margins": 0.040261153131723404, "rewards/rejected": -0.23576584458351135, "step": 63 }, { "epoch": 0.17673455298584742, "grad_norm": 0.6147691011428833, "learning_rate": 2.8899082568807342e-06, "log_odds_chosen": 0.5831525325775146, "log_odds_ratio": -0.4512834846973419, "logits/chosen": -0.6652324199676514, "logits/rejected": -0.30751606822013855, "logps/chosen": -1.925264835357666, "logps/rejected": -2.4442431926727295, "loss": 2.154, "nll_loss": 2.1088905334472656, "rewards/accuracies": 1.0, "rewards/chosen": -0.1925264596939087, "rewards/margins": 0.05189783126115799, "rewards/rejected": -0.24442431330680847, "step": 64 }, { "epoch": 0.17949603037625128, "grad_norm": 0.6069121360778809, "learning_rate": 2.935779816513762e-06, "log_odds_chosen": 0.3449505567550659, "log_odds_ratio": -0.5405150055885315, "logits/chosen": -0.46805867552757263, "logits/rejected": -0.031980015337467194, "logps/chosen": -1.8449336290359497, "logps/rejected": -2.141822576522827, "loss": 2.104, "nll_loss": 2.049985885620117, "rewards/accuracies": 1.0, "rewards/chosen": -0.18449336290359497, "rewards/margins": 0.02968890592455864, "rewards/rejected": -0.2141822725534439, "step": 65 }, { "epoch": 0.18225750776665517, "grad_norm": 0.5975824594497681, "learning_rate": 2.981651376146789e-06, "log_odds_chosen": 0.5348482728004456, "log_odds_ratio": -0.48265504837036133, "logits/chosen": -0.6339913606643677, "logits/rejected": -0.19266335666179657, "logps/chosen": -1.7976055145263672, "logps/rejected": -2.2617228031158447, "loss": 2.0549, "nll_loss": 2.006657600402832, "rewards/accuracies": 1.0, "rewards/chosen": -0.17976056039333344, "rewards/margins": 0.04641173034906387, "rewards/rejected": -0.2261722981929779, "step": 66 }, { "epoch": 0.18501898515705903, "grad_norm": 0.6583871245384216, "learning_rate": 3.0275229357798168e-06, "log_odds_chosen": 0.2934231162071228, "log_odds_ratio": -0.5610607862472534, "logits/chosen": -0.49614468216896057, "logits/rejected": -0.02480134554207325, "logps/chosen": -1.9567402601242065, "logps/rejected": -2.21272349357605, "loss": 2.1884, "nll_loss": 2.1323015689849854, "rewards/accuracies": 1.0, "rewards/chosen": -0.19567403197288513, "rewards/margins": 0.025598343461751938, "rewards/rejected": -0.22127236425876617, "step": 67 }, { "epoch": 0.1877804625474629, "grad_norm": 0.6593952775001526, "learning_rate": 3.073394495412844e-06, "log_odds_chosen": 0.39623624086380005, "log_odds_ratio": -0.5267960429191589, "logits/chosen": -0.32998380064964294, "logits/rejected": 0.019692357629537582, "logps/chosen": -1.9624451398849487, "logps/rejected": -2.3119962215423584, "loss": 2.2478, "nll_loss": 2.195082902908325, "rewards/accuracies": 0.875, "rewards/chosen": -0.1962445080280304, "rewards/margins": 0.03495512157678604, "rewards/rejected": -0.23119963705539703, "step": 68 }, { "epoch": 0.19054193993786675, "grad_norm": 0.7153931856155396, "learning_rate": 3.119266055045872e-06, "log_odds_chosen": 0.5069383382797241, "log_odds_ratio": -0.4813777804374695, "logits/chosen": -0.505609393119812, "logits/rejected": -0.05926704406738281, "logps/chosen": -2.0001449584960938, "logps/rejected": -2.453965187072754, "loss": 2.2604, "nll_loss": 2.212214231491089, "rewards/accuracies": 1.0, "rewards/chosen": -0.20001448690891266, "rewards/margins": 0.04538201540708542, "rewards/rejected": -0.24539650976657867, "step": 69 }, { "epoch": 0.19330341732827064, "grad_norm": 0.5841740965843201, "learning_rate": 3.1651376146788993e-06, "log_odds_chosen": 0.48365646600723267, "log_odds_ratio": -0.4906890094280243, "logits/chosen": -0.4503505527973175, "logits/rejected": -0.24621139466762543, "logps/chosen": -1.9794856309890747, "logps/rejected": -2.4092578887939453, "loss": 2.2046, "nll_loss": 2.155548095703125, "rewards/accuracies": 0.875, "rewards/chosen": -0.19794857501983643, "rewards/margins": 0.0429772287607193, "rewards/rejected": -0.24092580378055573, "step": 70 }, { "epoch": 0.1960648947186745, "grad_norm": 0.6693902015686035, "learning_rate": 3.211009174311927e-06, "log_odds_chosen": 0.4308607876300812, "log_odds_ratio": -0.5067814588546753, "logits/chosen": -0.47226929664611816, "logits/rejected": -0.22168010473251343, "logps/chosen": -1.9241232872009277, "logps/rejected": -2.3047096729278564, "loss": 2.191, "nll_loss": 2.14032244682312, "rewards/accuracies": 1.0, "rewards/chosen": -0.192412331700325, "rewards/margins": 0.038058653473854065, "rewards/rejected": -0.23047097027301788, "step": 71 }, { "epoch": 0.19882637210907836, "grad_norm": 0.6415713429450989, "learning_rate": 3.256880733944954e-06, "log_odds_chosen": 0.5713039636611938, "log_odds_ratio": -0.46779656410217285, "logits/chosen": -0.4027179479598999, "logits/rejected": -0.20848074555397034, "logps/chosen": -1.8328369855880737, "logps/rejected": -2.313088893890381, "loss": 2.0868, "nll_loss": 2.0400490760803223, "rewards/accuracies": 0.875, "rewards/chosen": -0.18328368663787842, "rewards/margins": 0.048025187104940414, "rewards/rejected": -0.23130889236927032, "step": 72 }, { "epoch": 0.20158784949948222, "grad_norm": 0.6714993715286255, "learning_rate": 3.3027522935779823e-06, "log_odds_chosen": 0.35765087604522705, "log_odds_ratio": -0.5315056443214417, "logits/chosen": -0.387838214635849, "logits/rejected": -0.10332845896482468, "logps/chosen": -2.011086940765381, "logps/rejected": -2.328169107437134, "loss": 2.2627, "nll_loss": 2.209596872329712, "rewards/accuracies": 1.0, "rewards/chosen": -0.20110869407653809, "rewards/margins": 0.03170822933316231, "rewards/rejected": -0.2328169345855713, "step": 73 }, { "epoch": 0.20434932688988608, "grad_norm": 0.6035195589065552, "learning_rate": 3.3486238532110095e-06, "log_odds_chosen": 0.37045401334762573, "log_odds_ratio": -0.528258740901947, "logits/chosen": -0.4163488447666168, "logits/rejected": -0.20020049810409546, "logps/chosen": -1.927206039428711, "logps/rejected": -2.2507896423339844, "loss": 2.1911, "nll_loss": 2.1382455825805664, "rewards/accuracies": 1.0, "rewards/chosen": -0.19272059202194214, "rewards/margins": 0.03235836327075958, "rewards/rejected": -0.22507895529270172, "step": 74 }, { "epoch": 0.20711080428028997, "grad_norm": 0.6681767702102661, "learning_rate": 3.394495412844037e-06, "log_odds_chosen": 0.4545160233974457, "log_odds_ratio": -0.4990374743938446, "logits/chosen": -0.4695533514022827, "logits/rejected": -0.14828726649284363, "logps/chosen": -1.9052660465240479, "logps/rejected": -2.3053171634674072, "loss": 2.171, "nll_loss": 2.121072769165039, "rewards/accuracies": 0.875, "rewards/chosen": -0.19052661955356598, "rewards/margins": 0.04000508412718773, "rewards/rejected": -0.2305317223072052, "step": 75 }, { "epoch": 0.20987228167069383, "grad_norm": 0.5848620533943176, "learning_rate": 3.4403669724770644e-06, "log_odds_chosen": 0.4162794351577759, "log_odds_ratio": -0.5107027292251587, "logits/chosen": -0.3651241362094879, "logits/rejected": -0.29924386739730835, "logps/chosen": -1.9842548370361328, "logps/rejected": -2.354065418243408, "loss": 2.2131, "nll_loss": 2.1620750427246094, "rewards/accuracies": 1.0, "rewards/chosen": -0.19842548668384552, "rewards/margins": 0.03698106110095978, "rewards/rejected": -0.2354065328836441, "step": 76 }, { "epoch": 0.2126337590610977, "grad_norm": 0.6766842007637024, "learning_rate": 3.486238532110092e-06, "log_odds_chosen": 0.3890829086303711, "log_odds_ratio": -0.5331251621246338, "logits/chosen": -0.5041220784187317, "logits/rejected": -0.256991982460022, "logps/chosen": -1.9675860404968262, "logps/rejected": -2.3138177394866943, "loss": 2.2335, "nll_loss": 2.1801676750183105, "rewards/accuracies": 0.875, "rewards/chosen": -0.19675858318805695, "rewards/margins": 0.03462318331003189, "rewards/rejected": -0.23138177394866943, "step": 77 }, { "epoch": 0.21539523645150155, "grad_norm": 0.6400946974754333, "learning_rate": 3.5321100917431193e-06, "log_odds_chosen": 0.3764447569847107, "log_odds_ratio": -0.5406405329704285, "logits/chosen": -0.30762779712677, "logits/rejected": -0.010896757245063782, "logps/chosen": -1.9988197088241577, "logps/rejected": -2.331996202468872, "loss": 2.246, "nll_loss": 2.191932201385498, "rewards/accuracies": 0.875, "rewards/chosen": -0.19988197088241577, "rewards/margins": 0.03331765532493591, "rewards/rejected": -0.23319962620735168, "step": 78 }, { "epoch": 0.2181567138419054, "grad_norm": 0.5379537343978882, "learning_rate": 3.5779816513761473e-06, "log_odds_chosen": 0.4429885745048523, "log_odds_ratio": -0.49778997898101807, "logits/chosen": -0.32647988200187683, "logits/rejected": -0.19796019792556763, "logps/chosen": -1.8951160907745361, "logps/rejected": -2.282485246658325, "loss": 2.1565, "nll_loss": 2.1067562103271484, "rewards/accuracies": 1.0, "rewards/chosen": -0.18951159715652466, "rewards/margins": 0.038736920803785324, "rewards/rejected": -0.22824853658676147, "step": 79 }, { "epoch": 0.2209181912323093, "grad_norm": 0.5976657867431641, "learning_rate": 3.6238532110091746e-06, "log_odds_chosen": 0.3772650957107544, "log_odds_ratio": -0.5323787927627563, "logits/chosen": -0.396470308303833, "logits/rejected": -0.1672373265028, "logps/chosen": -1.8558274507522583, "logps/rejected": -2.1821842193603516, "loss": 2.119, "nll_loss": 2.065802812576294, "rewards/accuracies": 1.0, "rewards/chosen": -0.1855827420949936, "rewards/margins": 0.032635681331157684, "rewards/rejected": -0.21821841597557068, "step": 80 }, { "epoch": 0.22367966862271316, "grad_norm": 0.5696468353271484, "learning_rate": 3.6697247706422022e-06, "log_odds_chosen": 0.3498988747596741, "log_odds_ratio": -0.5373408198356628, "logits/chosen": -0.32143616676330566, "logits/rejected": -0.08278146386146545, "logps/chosen": -1.867413878440857, "logps/rejected": -2.1695525646209717, "loss": 2.1158, "nll_loss": 2.062082052230835, "rewards/accuracies": 1.0, "rewards/chosen": -0.1867414116859436, "rewards/margins": 0.0302138552069664, "rewards/rejected": -0.2169552594423294, "step": 81 }, { "epoch": 0.22644114601311702, "grad_norm": 0.579322874546051, "learning_rate": 3.7155963302752295e-06, "log_odds_chosen": 0.3590673804283142, "log_odds_ratio": -0.5345906019210815, "logits/chosen": -0.3917956054210663, "logits/rejected": -0.1658124327659607, "logps/chosen": -1.9473357200622559, "logps/rejected": -2.2625551223754883, "loss": 2.2152, "nll_loss": 2.1617040634155273, "rewards/accuracies": 1.0, "rewards/chosen": -0.19473356008529663, "rewards/margins": 0.03152196481823921, "rewards/rejected": -0.22625553607940674, "step": 82 }, { "epoch": 0.22920262340352088, "grad_norm": 0.5707481503486633, "learning_rate": 3.7614678899082575e-06, "log_odds_chosen": 0.601745069026947, "log_odds_ratio": -0.44251349568367004, "logits/chosen": -0.35622307658195496, "logits/rejected": -0.2330722063779831, "logps/chosen": -1.8970295190811157, "logps/rejected": -2.4300365447998047, "loss": 2.141, "nll_loss": 2.096763849258423, "rewards/accuracies": 1.0, "rewards/chosen": -0.18970295786857605, "rewards/margins": 0.053300708532333374, "rewards/rejected": -0.24300366640090942, "step": 83 }, { "epoch": 0.23196410079392474, "grad_norm": 0.6338332891464233, "learning_rate": 3.8073394495412848e-06, "log_odds_chosen": 0.4194182753562927, "log_odds_ratio": -0.5094286799430847, "logits/chosen": -0.38890349864959717, "logits/rejected": -0.1751769334077835, "logps/chosen": -1.8893790245056152, "logps/rejected": -2.2541043758392334, "loss": 2.1687, "nll_loss": 2.1177978515625, "rewards/accuracies": 1.0, "rewards/chosen": -0.18893790245056152, "rewards/margins": 0.036472536623477936, "rewards/rejected": -0.22541043162345886, "step": 84 }, { "epoch": 0.23472557818432863, "grad_norm": 0.514668881893158, "learning_rate": 3.853211009174313e-06, "log_odds_chosen": 0.5391043424606323, "log_odds_ratio": -0.46963804960250854, "logits/chosen": -0.42746701836586, "logits/rejected": -0.19726645946502686, "logps/chosen": -1.8130801916122437, "logps/rejected": -2.285198211669922, "loss": 2.0648, "nll_loss": 2.017812490463257, "rewards/accuracies": 1.0, "rewards/chosen": -0.18130803108215332, "rewards/margins": 0.047211792320013046, "rewards/rejected": -0.22851979732513428, "step": 85 }, { "epoch": 0.2374870555747325, "grad_norm": 0.5241694450378418, "learning_rate": 3.89908256880734e-06, "log_odds_chosen": 0.5370965003967285, "log_odds_ratio": -0.47515052556991577, "logits/chosen": -0.18979063630104065, "logits/rejected": -0.3587040901184082, "logps/chosen": -1.9306432008743286, "logps/rejected": -2.4106080532073975, "loss": 2.1744, "nll_loss": 2.1269240379333496, "rewards/accuracies": 1.0, "rewards/chosen": -0.19306430220603943, "rewards/margins": 0.04799651354551315, "rewards/rejected": -0.24106080830097198, "step": 86 }, { "epoch": 0.24024853296513635, "grad_norm": 0.5388301610946655, "learning_rate": 3.944954128440367e-06, "log_odds_chosen": 0.47338563203811646, "log_odds_ratio": -0.4902949929237366, "logits/chosen": -0.2526980936527252, "logits/rejected": -0.27716919779777527, "logps/chosen": -1.8183417320251465, "logps/rejected": -2.227705955505371, "loss": 2.0679, "nll_loss": 2.0188400745391846, "rewards/accuracies": 0.875, "rewards/chosen": -0.1818341761827469, "rewards/margins": 0.0409364253282547, "rewards/rejected": -0.2227705866098404, "step": 87 }, { "epoch": 0.2430100103555402, "grad_norm": 0.5072680115699768, "learning_rate": 3.9908256880733945e-06, "log_odds_chosen": 0.5098553895950317, "log_odds_ratio": -0.4994601309299469, "logits/chosen": -0.4232753813266754, "logits/rejected": -0.2069588601589203, "logps/chosen": -1.7478512525558472, "logps/rejected": -2.1878132820129395, "loss": 2.0084, "nll_loss": 1.9584167003631592, "rewards/accuracies": 0.75, "rewards/chosen": -0.17478513717651367, "rewards/margins": 0.043996214866638184, "rewards/rejected": -0.21878135204315186, "step": 88 }, { "epoch": 0.24577148774594407, "grad_norm": 0.4936067461967468, "learning_rate": 4.036697247706423e-06, "log_odds_chosen": 0.5959604382514954, "log_odds_ratio": -0.4470658600330353, "logits/chosen": -0.26887303590774536, "logits/rejected": -0.2717263400554657, "logps/chosen": -1.8666045665740967, "logps/rejected": -2.3904991149902344, "loss": 2.1004, "nll_loss": 2.0557374954223633, "rewards/accuracies": 1.0, "rewards/chosen": -0.18666045367717743, "rewards/margins": 0.05238945782184601, "rewards/rejected": -0.23904991149902344, "step": 89 }, { "epoch": 0.24853296513634796, "grad_norm": 0.5457214713096619, "learning_rate": 4.08256880733945e-06, "log_odds_chosen": 0.5490537285804749, "log_odds_ratio": -0.45841851830482483, "logits/chosen": -0.34721043705940247, "logits/rejected": -0.10521189868450165, "logps/chosen": -1.8499623537063599, "logps/rejected": -2.329256296157837, "loss": 2.0961, "nll_loss": 2.050299644470215, "rewards/accuracies": 1.0, "rewards/chosen": -0.18499624729156494, "rewards/margins": 0.04792938381433487, "rewards/rejected": -0.2329256385564804, "step": 90 }, { "epoch": 0.2512944425267518, "grad_norm": 0.5049707293510437, "learning_rate": 4.128440366972478e-06, "log_odds_chosen": 0.4017772674560547, "log_odds_ratio": -0.5216570496559143, "logits/chosen": -0.26840728521347046, "logits/rejected": -0.19604754447937012, "logps/chosen": -1.8395251035690308, "logps/rejected": -2.1878998279571533, "loss": 2.0732, "nll_loss": 2.021005868911743, "rewards/accuracies": 0.875, "rewards/chosen": -0.18395252525806427, "rewards/margins": 0.034837473183870316, "rewards/rejected": -0.2187899798154831, "step": 91 }, { "epoch": 0.2540559199171557, "grad_norm": 0.4878016710281372, "learning_rate": 4.174311926605505e-06, "log_odds_chosen": 0.40200310945510864, "log_odds_ratio": -0.5181869268417358, "logits/chosen": -0.17307503521442413, "logits/rejected": -0.5424618721008301, "logps/chosen": -1.8756885528564453, "logps/rejected": -2.2274668216705322, "loss": 2.1098, "nll_loss": 2.0579941272735596, "rewards/accuracies": 1.0, "rewards/chosen": -0.18756884336471558, "rewards/margins": 0.035177819430828094, "rewards/rejected": -0.22274667024612427, "step": 92 }, { "epoch": 0.25681739730755954, "grad_norm": 0.5148739218711853, "learning_rate": 4.220183486238532e-06, "log_odds_chosen": 0.4867369532585144, "log_odds_ratio": -0.4866551160812378, "logits/chosen": -0.20501409471035004, "logits/rejected": -0.3177582919597626, "logps/chosen": -1.8571135997772217, "logps/rejected": -2.2809691429138184, "loss": 2.1117, "nll_loss": 2.0629868507385254, "rewards/accuracies": 0.875, "rewards/chosen": -0.1857113391160965, "rewards/margins": 0.04238557443022728, "rewards/rejected": -0.22809693217277527, "step": 93 }, { "epoch": 0.2595788746979634, "grad_norm": 0.48695462942123413, "learning_rate": 4.26605504587156e-06, "log_odds_chosen": 0.4320299029350281, "log_odds_ratio": -0.5084698796272278, "logits/chosen": -0.33521950244903564, "logits/rejected": -0.22524963319301605, "logps/chosen": -1.759456753730774, "logps/rejected": -2.130563974380493, "loss": 1.9918, "nll_loss": 1.9409611225128174, "rewards/accuracies": 0.875, "rewards/chosen": -0.17594566941261292, "rewards/margins": 0.03711073845624924, "rewards/rejected": -0.21305640041828156, "step": 94 }, { "epoch": 0.26234035208836726, "grad_norm": 0.4444352984428406, "learning_rate": 4.311926605504588e-06, "log_odds_chosen": 0.3173726201057434, "log_odds_ratio": -0.5506779551506042, "logits/chosen": -0.15516842901706696, "logits/rejected": -0.30819153785705566, "logps/chosen": -1.7852771282196045, "logps/rejected": -2.0567989349365234, "loss": 2.0242, "nll_loss": 1.969139814376831, "rewards/accuracies": 1.0, "rewards/chosen": -0.17852769792079926, "rewards/margins": 0.027152204886078835, "rewards/rejected": -0.20567990839481354, "step": 95 }, { "epoch": 0.26510182947877114, "grad_norm": 0.508222758769989, "learning_rate": 4.357798165137615e-06, "log_odds_chosen": 0.26060953736305237, "log_odds_ratio": -0.5766869187355042, "logits/chosen": -0.21468104422092438, "logits/rejected": -0.17443214356899261, "logps/chosen": -1.9513554573059082, "logps/rejected": -2.179452419281006, "loss": 2.1941, "nll_loss": 2.1364006996154785, "rewards/accuracies": 0.875, "rewards/chosen": -0.19513554871082306, "rewards/margins": 0.02280968800187111, "rewards/rejected": -0.21794524788856506, "step": 96 }, { "epoch": 0.26786330686917503, "grad_norm": 0.4931764602661133, "learning_rate": 4.403669724770643e-06, "log_odds_chosen": 0.5352882146835327, "log_odds_ratio": -0.4726255536079407, "logits/chosen": -0.10641247034072876, "logits/rejected": -0.164643332362175, "logps/chosen": -1.7959346771240234, "logps/rejected": -2.266484022140503, "loss": 2.0405, "nll_loss": 1.9932180643081665, "rewards/accuracies": 1.0, "rewards/chosen": -0.17959347367286682, "rewards/margins": 0.047054924070835114, "rewards/rejected": -0.22664840519428253, "step": 97 }, { "epoch": 0.27062478425957887, "grad_norm": 0.49030837416648865, "learning_rate": 4.44954128440367e-06, "log_odds_chosen": 0.38322845101356506, "log_odds_ratio": -0.5242493748664856, "logits/chosen": -0.15826918184757233, "logits/rejected": -0.20204174518585205, "logps/chosen": -1.8669074773788452, "logps/rejected": -2.199532985687256, "loss": 2.1032, "nll_loss": 2.0507290363311768, "rewards/accuracies": 1.0, "rewards/chosen": -0.18669073283672333, "rewards/margins": 0.03326254338026047, "rewards/rejected": -0.2199532836675644, "step": 98 }, { "epoch": 0.27338626164998275, "grad_norm": 0.49382349848747253, "learning_rate": 4.4954128440366975e-06, "log_odds_chosen": 0.5044716000556946, "log_odds_ratio": -0.4766765236854553, "logits/chosen": -0.1681555211544037, "logits/rejected": -0.3671070337295532, "logps/chosen": -1.9003745317459106, "logps/rejected": -2.344512939453125, "loss": 2.1245, "nll_loss": 2.076801061630249, "rewards/accuracies": 1.0, "rewards/chosen": -0.19003747403621674, "rewards/margins": 0.04441382735967636, "rewards/rejected": -0.2344512790441513, "step": 99 }, { "epoch": 0.2761477390403866, "grad_norm": 0.507435142993927, "learning_rate": 4.541284403669725e-06, "log_odds_chosen": 0.2962280511856079, "log_odds_ratio": -0.5599422454833984, "logits/chosen": -0.26080605387687683, "logits/rejected": -0.24701349437236786, "logps/chosen": -1.798446774482727, "logps/rejected": -2.050736427307129, "loss": 2.0576, "nll_loss": 2.001603841781616, "rewards/accuracies": 0.875, "rewards/chosen": -0.1798446923494339, "rewards/margins": 0.0252289529889822, "rewards/rejected": -0.20507365465164185, "step": 100 }, { "epoch": 0.2789092164307905, "grad_norm": 0.461052268743515, "learning_rate": 4.587155963302753e-06, "log_odds_chosen": 0.29842671751976013, "log_odds_ratio": -0.5633093118667603, "logits/chosen": -0.11747467517852783, "logits/rejected": -0.2521146833896637, "logps/chosen": -1.913146734237671, "logps/rejected": -2.1769111156463623, "loss": 2.1524, "nll_loss": 2.0960795879364014, "rewards/accuracies": 0.875, "rewards/chosen": -0.1913146674633026, "rewards/margins": 0.02637643553316593, "rewards/rejected": -0.21769112348556519, "step": 101 }, { "epoch": 0.28167069382119436, "grad_norm": 0.42991501092910767, "learning_rate": 4.63302752293578e-06, "log_odds_chosen": 0.5397889614105225, "log_odds_ratio": -0.4643154740333557, "logits/chosen": -0.17212505638599396, "logits/rejected": -0.3217812776565552, "logps/chosen": -1.7475972175598145, "logps/rejected": -2.209057331085205, "loss": 2.0036, "nll_loss": 1.9572076797485352, "rewards/accuracies": 1.0, "rewards/chosen": -0.17475973069667816, "rewards/margins": 0.046146005392074585, "rewards/rejected": -0.22090573608875275, "step": 102 }, { "epoch": 0.2844321712115982, "grad_norm": 0.47230181097984314, "learning_rate": 4.678899082568808e-06, "log_odds_chosen": 0.29523512721061707, "log_odds_ratio": -0.5644804239273071, "logits/chosen": -0.22446005046367645, "logits/rejected": -0.26140278577804565, "logps/chosen": -1.7972923517227173, "logps/rejected": -2.051152229309082, "loss": 2.0406, "nll_loss": 1.9841551780700684, "rewards/accuracies": 0.875, "rewards/chosen": -0.17972922325134277, "rewards/margins": 0.025385981425642967, "rewards/rejected": -0.20511522889137268, "step": 103 }, { "epoch": 0.2871936486020021, "grad_norm": 0.4233863651752472, "learning_rate": 4.724770642201835e-06, "log_odds_chosen": 0.3453969359397888, "log_odds_ratio": -0.5395044684410095, "logits/chosen": -0.18183927237987518, "logits/rejected": -0.3289150297641754, "logps/chosen": -1.7190299034118652, "logps/rejected": -2.0107898712158203, "loss": 1.9637, "nll_loss": 1.9097464084625244, "rewards/accuracies": 1.0, "rewards/chosen": -0.17190299928188324, "rewards/margins": 0.02917599491775036, "rewards/rejected": -0.20107899606227875, "step": 104 }, { "epoch": 0.2899551259924059, "grad_norm": 0.4307953417301178, "learning_rate": 4.770642201834863e-06, "log_odds_chosen": 0.46213921904563904, "log_odds_ratio": -0.4912022650241852, "logits/chosen": -0.1190250962972641, "logits/rejected": -0.19093452394008636, "logps/chosen": -1.7502868175506592, "logps/rejected": -2.1464428901672363, "loss": 1.9758, "nll_loss": 1.9266620874404907, "rewards/accuracies": 1.0, "rewards/chosen": -0.17502868175506592, "rewards/margins": 0.03961558640003204, "rewards/rejected": -0.21464428305625916, "step": 105 }, { "epoch": 0.2927166033828098, "grad_norm": 0.43505242466926575, "learning_rate": 4.816513761467891e-06, "log_odds_chosen": 0.4836673140525818, "log_odds_ratio": -0.49270570278167725, "logits/chosen": -0.08418025076389313, "logits/rejected": -0.21315333247184753, "logps/chosen": -1.7327262163162231, "logps/rejected": -2.145224094390869, "loss": 1.9644, "nll_loss": 1.9150831699371338, "rewards/accuracies": 0.875, "rewards/chosen": -0.17327262461185455, "rewards/margins": 0.041249774396419525, "rewards/rejected": -0.21452240645885468, "step": 106 }, { "epoch": 0.2954780807732137, "grad_norm": 0.4454819858074188, "learning_rate": 4.862385321100918e-06, "log_odds_chosen": 0.3297927975654602, "log_odds_ratio": -0.5459839701652527, "logits/chosen": -0.08551089465618134, "logits/rejected": -0.3492465913295746, "logps/chosen": -1.8108294010162354, "logps/rejected": -2.0928804874420166, "loss": 2.0393, "nll_loss": 1.9846614599227905, "rewards/accuracies": 0.875, "rewards/chosen": -0.18108293414115906, "rewards/margins": 0.02820511721074581, "rewards/rejected": -0.20928806066513062, "step": 107 }, { "epoch": 0.2982395581636175, "grad_norm": 0.4228799641132355, "learning_rate": 4.908256880733945e-06, "log_odds_chosen": 0.4139629602432251, "log_odds_ratio": -0.5125847458839417, "logits/chosen": -0.042439091950654984, "logits/rejected": -0.3392437696456909, "logps/chosen": -1.842297077178955, "logps/rejected": -2.203068971633911, "loss": 2.0532, "nll_loss": 2.00195050239563, "rewards/accuracies": 0.875, "rewards/chosen": -0.18422970175743103, "rewards/margins": 0.03607717901468277, "rewards/rejected": -0.2203068882226944, "step": 108 }, { "epoch": 0.3010010355540214, "grad_norm": 0.4132618308067322, "learning_rate": 4.954128440366973e-06, "log_odds_chosen": 0.5509217977523804, "log_odds_ratio": -0.4667437970638275, "logits/chosen": -0.047298721969127655, "logits/rejected": -0.492242693901062, "logps/chosen": -1.8542200326919556, "logps/rejected": -2.3400068283081055, "loss": 2.0568, "nll_loss": 2.0100908279418945, "rewards/accuracies": 1.0, "rewards/chosen": -0.18542201817035675, "rewards/margins": 0.04857867211103439, "rewards/rejected": -0.23400066792964935, "step": 109 }, { "epoch": 0.30376251294442524, "grad_norm": 0.40540555119514465, "learning_rate": 5e-06, "log_odds_chosen": 0.26367828249931335, "log_odds_ratio": -0.5857654213905334, "logits/chosen": 0.020869266241788864, "logits/rejected": -0.3348216116428375, "logps/chosen": -1.8042923212051392, "logps/rejected": -2.0284535884857178, "loss": 2.0564, "nll_loss": 1.997856616973877, "rewards/accuracies": 0.75, "rewards/chosen": -0.18042920529842377, "rewards/margins": 0.02241615764796734, "rewards/rejected": -0.20284539461135864, "step": 110 }, { "epoch": 0.30652399033482913, "grad_norm": 0.3964520990848541, "learning_rate": 4.999987154315977e-06, "log_odds_chosen": 0.39854103326797485, "log_odds_ratio": -0.5201252698898315, "logits/chosen": -0.07075213640928268, "logits/rejected": -0.26321282982826233, "logps/chosen": -1.7023441791534424, "logps/rejected": -2.03615665435791, "loss": 1.9356, "nll_loss": 1.883634090423584, "rewards/accuracies": 1.0, "rewards/chosen": -0.17023441195487976, "rewards/margins": 0.03338123857975006, "rewards/rejected": -0.20361566543579102, "step": 111 }, { "epoch": 0.309285467725233, "grad_norm": 0.4122212827205658, "learning_rate": 4.999948617395916e-06, "log_odds_chosen": 0.3810994029045105, "log_odds_ratio": -0.528425931930542, "logits/chosen": 0.04292220622301102, "logits/rejected": -0.24851810932159424, "logps/chosen": -1.7573786973953247, "logps/rejected": -2.084685802459717, "loss": 1.9844, "nll_loss": 1.931592583656311, "rewards/accuracies": 1.0, "rewards/chosen": -0.17573785781860352, "rewards/margins": 0.03273071348667145, "rewards/rejected": -0.20846858620643616, "step": 112 }, { "epoch": 0.31204694511563685, "grad_norm": 0.4462045729160309, "learning_rate": 4.999884389635843e-06, "log_odds_chosen": 0.392294704914093, "log_odds_ratio": -0.520103394985199, "logits/chosen": -0.046707406640052795, "logits/rejected": -0.22425656020641327, "logps/chosen": -1.8034018278121948, "logps/rejected": -2.140183687210083, "loss": 2.0382, "nll_loss": 1.9861979484558105, "rewards/accuracies": 1.0, "rewards/chosen": -0.18034018576145172, "rewards/margins": 0.03367818892002106, "rewards/rejected": -0.21401838958263397, "step": 113 }, { "epoch": 0.31480842250604074, "grad_norm": 0.40587544441223145, "learning_rate": 4.9997944716957985e-06, "log_odds_chosen": 0.48522624373435974, "log_odds_ratio": -0.49063414335250854, "logits/chosen": 0.049725521355867386, "logits/rejected": -0.3027104139328003, "logps/chosen": -1.8043849468231201, "logps/rejected": -2.224425792694092, "loss": 2.0299, "nll_loss": 1.9808719158172607, "rewards/accuracies": 0.875, "rewards/chosen": -0.18043850362300873, "rewards/margins": 0.04200407862663269, "rewards/rejected": -0.22244258224964142, "step": 114 }, { "epoch": 0.3175698998964446, "grad_norm": 0.38116294145584106, "learning_rate": 4.999678864499828e-06, "log_odds_chosen": 0.35890865325927734, "log_odds_ratio": -0.5324774384498596, "logits/chosen": 0.1463293731212616, "logits/rejected": -0.3165544271469116, "logps/chosen": -1.83231782913208, "logps/rejected": -2.1404545307159424, "loss": 2.0335, "nll_loss": 1.9802701473236084, "rewards/accuracies": 1.0, "rewards/chosen": -0.18323180079460144, "rewards/margins": 0.030813684687018394, "rewards/rejected": -0.21404549479484558, "step": 115 }, { "epoch": 0.32033137728684846, "grad_norm": 0.3846777081489563, "learning_rate": 4.999537569235975e-06, "log_odds_chosen": 0.31865543127059937, "log_odds_ratio": -0.5562014579772949, "logits/chosen": 0.06778667122125626, "logits/rejected": -0.2945130169391632, "logps/chosen": -1.8204419612884521, "logps/rejected": -2.0919411182403564, "loss": 2.0362, "nll_loss": 1.9805940389633179, "rewards/accuracies": 0.875, "rewards/chosen": -0.18204417824745178, "rewards/margins": 0.02714991755783558, "rewards/rejected": -0.2091941088438034, "step": 116 }, { "epoch": 0.32309285467725235, "grad_norm": 0.3859681189060211, "learning_rate": 4.999370587356267e-06, "log_odds_chosen": 0.2864968180656433, "log_odds_ratio": -0.5685579180717468, "logits/chosen": 0.021781759336590767, "logits/rejected": -0.41891583800315857, "logps/chosen": -1.8915698528289795, "logps/rejected": -2.1414055824279785, "loss": 2.1035, "nll_loss": 2.0466203689575195, "rewards/accuracies": 0.875, "rewards/chosen": -0.18915697932243347, "rewards/margins": 0.02498357556760311, "rewards/rejected": -0.21414057910442352, "step": 117 }, { "epoch": 0.3258543320676562, "grad_norm": 0.4066268801689148, "learning_rate": 4.9991779205767e-06, "log_odds_chosen": 0.2647791802883148, "log_odds_ratio": -0.5728141069412231, "logits/chosen": -0.04139568656682968, "logits/rejected": -0.6460073590278625, "logps/chosen": -1.757622480392456, "logps/rejected": -1.9820458889007568, "loss": 1.984, "nll_loss": 1.9267468452453613, "rewards/accuracies": 1.0, "rewards/chosen": -0.17576223611831665, "rewards/margins": 0.022442325949668884, "rewards/rejected": -0.19820457696914673, "step": 118 }, { "epoch": 0.32861580945806007, "grad_norm": 0.4128899574279785, "learning_rate": 4.998959570877224e-06, "log_odds_chosen": 0.3499869406223297, "log_odds_ratio": -0.5363956093788147, "logits/chosen": 0.16672371327877045, "logits/rejected": -0.34432125091552734, "logps/chosen": -1.9091435670852661, "logps/rejected": -2.2108755111694336, "loss": 2.1306, "nll_loss": 2.0769805908203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.19091437757015228, "rewards/margins": 0.0301731638610363, "rewards/rejected": -0.2210875302553177, "step": 119 }, { "epoch": 0.3313772868484639, "grad_norm": 0.358536958694458, "learning_rate": 4.99871554050172e-06, "log_odds_chosen": 0.42555758357048035, "log_odds_ratio": -0.5050589442253113, "logits/chosen": 0.02474893629550934, "logits/rejected": -0.25200581550598145, "logps/chosen": -1.7013750076293945, "logps/rejected": -2.060541868209839, "loss": 1.9062, "nll_loss": 1.855684757232666, "rewards/accuracies": 1.0, "rewards/chosen": -0.17013752460479736, "rewards/margins": 0.03591667488217354, "rewards/rejected": -0.2060541808605194, "step": 120 }, { "epoch": 0.3341387642388678, "grad_norm": 0.394452840089798, "learning_rate": 4.9984458319579775e-06, "log_odds_chosen": 0.423836886882782, "log_odds_ratio": -0.5092670321464539, "logits/chosen": -0.0022555014584213495, "logits/rejected": -0.3431664705276489, "logps/chosen": -1.7344661951065063, "logps/rejected": -2.0950186252593994, "loss": 1.9335, "nll_loss": 1.8825750350952148, "rewards/accuracies": 1.0, "rewards/chosen": -0.17344661056995392, "rewards/margins": 0.03605526313185692, "rewards/rejected": -0.20950186252593994, "step": 121 }, { "epoch": 0.3369002416292717, "grad_norm": 0.3641405999660492, "learning_rate": 4.99815044801767e-06, "log_odds_chosen": 0.31487271189689636, "log_odds_ratio": -0.5565272569656372, "logits/chosen": 0.12278047204017639, "logits/rejected": -0.3144981861114502, "logps/chosen": -1.7826478481292725, "logps/rejected": -2.0509307384490967, "loss": 2.0075, "nll_loss": 1.951832890510559, "rewards/accuracies": 0.875, "rewards/chosen": -0.1782647967338562, "rewards/margins": 0.026828289031982422, "rewards/rejected": -0.20509308576583862, "step": 122 }, { "epoch": 0.3396617190196755, "grad_norm": 0.4089741110801697, "learning_rate": 4.9978293917163225e-06, "log_odds_chosen": 0.1852492243051529, "log_odds_ratio": -0.6086881756782532, "logits/chosen": 0.059370554983615875, "logits/rejected": -0.2047065794467926, "logps/chosen": -1.9199140071868896, "logps/rejected": -2.0819344520568848, "loss": 2.1197, "nll_loss": 2.0588343143463135, "rewards/accuracies": 0.75, "rewards/chosen": -0.1919914036989212, "rewards/margins": 0.016202054917812347, "rewards/rejected": -0.20819345116615295, "step": 123 }, { "epoch": 0.3424231964100794, "grad_norm": 0.39316678047180176, "learning_rate": 4.997482666353287e-06, "log_odds_chosen": 0.3156554102897644, "log_odds_ratio": -0.5562319755554199, "logits/chosen": -0.036051761358976364, "logits/rejected": -0.20177382230758667, "logps/chosen": -1.8153338432312012, "logps/rejected": -2.0875864028930664, "loss": 2.0271, "nll_loss": 1.9714655876159668, "rewards/accuracies": 0.75, "rewards/chosen": -0.18153339624404907, "rewards/margins": 0.027225244790315628, "rewards/rejected": -0.2087586373090744, "step": 124 }, { "epoch": 0.34518467380048323, "grad_norm": 0.37397369742393494, "learning_rate": 4.997110275491702e-06, "log_odds_chosen": 0.47729045152664185, "log_odds_ratio": -0.4998089373111725, "logits/chosen": 0.06823548674583435, "logits/rejected": -0.6389302015304565, "logps/chosen": -1.725682258605957, "logps/rejected": -2.133211612701416, "loss": 1.9424, "nll_loss": 1.8924020528793335, "rewards/accuracies": 1.0, "rewards/chosen": -0.17256823182106018, "rewards/margins": 0.04075293242931366, "rewards/rejected": -0.21332114934921265, "step": 125 }, { "epoch": 0.3479461511908871, "grad_norm": 0.36338478326797485, "learning_rate": 4.9967122229584614e-06, "log_odds_chosen": 0.46506431698799133, "log_odds_ratio": -0.49022355675697327, "logits/chosen": 0.062216617166996, "logits/rejected": -0.4744764268398285, "logps/chosen": -1.7237508296966553, "logps/rejected": -2.1211488246917725, "loss": 1.9404, "nll_loss": 1.891422152519226, "rewards/accuracies": 1.0, "rewards/chosen": -0.17237509787082672, "rewards/margins": 0.03973980247974396, "rewards/rejected": -0.2121148705482483, "step": 126 }, { "epoch": 0.350707628581291, "grad_norm": 0.3949665427207947, "learning_rate": 4.996288512844169e-06, "log_odds_chosen": 0.208975687623024, "log_odds_ratio": -0.603169322013855, "logits/chosen": 0.15239077806472778, "logits/rejected": -0.14533445239067078, "logps/chosen": -1.8812259435653687, "logps/rejected": -2.058507204055786, "loss": 2.1066, "nll_loss": 2.0462498664855957, "rewards/accuracies": 0.625, "rewards/chosen": -0.18812260031700134, "rewards/margins": 0.01772812381386757, "rewards/rejected": -0.2058507204055786, "step": 127 }, { "epoch": 0.35346910597169484, "grad_norm": 0.3274211585521698, "learning_rate": 4.995839149503103e-06, "log_odds_chosen": 0.3612444996833801, "log_odds_ratio": -0.5432137250900269, "logits/chosen": 0.15867066383361816, "logits/rejected": -0.37713369727134705, "logps/chosen": -1.7504801750183105, "logps/rejected": -2.060889959335327, "loss": 1.9636, "nll_loss": 1.9092310667037964, "rewards/accuracies": 0.875, "rewards/chosen": -0.17504799365997314, "rewards/margins": 0.03104100562632084, "rewards/rejected": -0.20608901977539062, "step": 128 }, { "epoch": 0.35623058336209873, "grad_norm": 0.3613954186439514, "learning_rate": 4.995364137553166e-06, "log_odds_chosen": 0.2536008656024933, "log_odds_ratio": -0.5839525461196899, "logits/chosen": 0.025492653250694275, "logits/rejected": -0.21177223324775696, "logps/chosen": -1.7983472347259521, "logps/rejected": -2.013514995574951, "loss": 2.0121, "nll_loss": 1.953747034072876, "rewards/accuracies": 0.875, "rewards/chosen": -0.1798347383737564, "rewards/margins": 0.021516768261790276, "rewards/rejected": -0.20135147869586945, "step": 129 }, { "epoch": 0.35899206075250256, "grad_norm": 0.33361607789993286, "learning_rate": 4.994863481875842e-06, "log_odds_chosen": 0.4904846251010895, "log_odds_ratio": -0.4827660620212555, "logits/chosen": 0.06080925092101097, "logits/rejected": -0.42839962244033813, "logps/chosen": -1.7444632053375244, "logps/rejected": -2.1671104431152344, "loss": 1.9554, "nll_loss": 1.9071358442306519, "rewards/accuracies": 1.0, "rewards/chosen": -0.17444632947444916, "rewards/margins": 0.04226472228765488, "rewards/rejected": -0.21671104431152344, "step": 130 }, { "epoch": 0.36175353814290645, "grad_norm": 0.38626936078071594, "learning_rate": 4.99433718761614e-06, "log_odds_chosen": 0.4231716990470886, "log_odds_ratio": -0.5083524584770203, "logits/chosen": 0.15689942240715027, "logits/rejected": -0.27488887310028076, "logps/chosen": -1.7832603454589844, "logps/rejected": -2.1480154991149902, "loss": 1.9872, "nll_loss": 1.9363242387771606, "rewards/accuracies": 1.0, "rewards/chosen": -0.1783260554075241, "rewards/margins": 0.036475520581007004, "rewards/rejected": -0.21480156481266022, "step": 131 }, { "epoch": 0.36451501553331034, "grad_norm": 0.3283027410507202, "learning_rate": 4.993785260182552e-06, "log_odds_chosen": 0.4390679895877838, "log_odds_ratio": -0.5016705393791199, "logits/chosen": 0.13413819670677185, "logits/rejected": -0.5499831438064575, "logps/chosen": -1.7239453792572021, "logps/rejected": -2.099557399749756, "loss": 1.9329, "nll_loss": 1.882704734802246, "rewards/accuracies": 1.0, "rewards/chosen": -0.1723945438861847, "rewards/margins": 0.037561215460300446, "rewards/rejected": -0.20995575189590454, "step": 132 }, { "epoch": 0.36727649292371417, "grad_norm": 0.29404011368751526, "learning_rate": 4.993207705246983e-06, "log_odds_chosen": 0.33607491850852966, "log_odds_ratio": -0.5448395609855652, "logits/chosen": 0.09872237592935562, "logits/rejected": -0.3414973318576813, "logps/chosen": -1.7783253192901611, "logps/rejected": -2.0684056282043457, "loss": 1.9796, "nll_loss": 1.925091028213501, "rewards/accuracies": 1.0, "rewards/chosen": -0.17783252894878387, "rewards/margins": 0.029008038341999054, "rewards/rejected": -0.20684055984020233, "step": 133 }, { "epoch": 0.37003797031411806, "grad_norm": 0.303219199180603, "learning_rate": 4.992604528744705e-06, "log_odds_chosen": 0.37202000617980957, "log_odds_ratio": -0.5278716087341309, "logits/chosen": 0.0883011743426323, "logits/rejected": -0.6590756177902222, "logps/chosen": -1.7620971202850342, "logps/rejected": -2.0787768363952637, "loss": 1.9649, "nll_loss": 1.9120779037475586, "rewards/accuracies": 1.0, "rewards/chosen": -0.1762097179889679, "rewards/margins": 0.03166797757148743, "rewards/rejected": -0.20787768065929413, "step": 134 }, { "epoch": 0.37279944770452195, "grad_norm": 0.3061475455760956, "learning_rate": 4.9919757368742895e-06, "log_odds_chosen": 0.43486303091049194, "log_odds_ratio": -0.5079333782196045, "logits/chosen": 0.13486391305923462, "logits/rejected": -0.44202011823654175, "logps/chosen": -1.683558464050293, "logps/rejected": -2.0501785278320312, "loss": 1.8894, "nll_loss": 1.8386157751083374, "rewards/accuracies": 1.0, "rewards/chosen": -0.16835585236549377, "rewards/margins": 0.0366620309650898, "rewards/rejected": -0.20501787960529327, "step": 135 }, { "epoch": 0.3755609250949258, "grad_norm": 0.3027716279029846, "learning_rate": 4.991321336097546e-06, "log_odds_chosen": 0.3602009117603302, "log_odds_ratio": -0.5340627431869507, "logits/chosen": 0.09272217750549316, "logits/rejected": -0.37184590101242065, "logps/chosen": -1.8042678833007812, "logps/rejected": -2.1151676177978516, "loss": 1.9799, "nll_loss": 1.9264534711837769, "rewards/accuracies": 1.0, "rewards/chosen": -0.18042679131031036, "rewards/margins": 0.031089982017874718, "rewards/rejected": -0.21151678264141083, "step": 136 }, { "epoch": 0.37832240248532967, "grad_norm": 0.2968030273914337, "learning_rate": 4.990641333139455e-06, "log_odds_chosen": 0.3864700496196747, "log_odds_ratio": -0.5240495204925537, "logits/chosen": 0.15802569687366486, "logits/rejected": -0.40965431928634644, "logps/chosen": -1.6786091327667236, "logps/rejected": -2.002851724624634, "loss": 1.8816, "nll_loss": 1.8292038440704346, "rewards/accuracies": 1.0, "rewards/chosen": -0.16786089539527893, "rewards/margins": 0.032424286007881165, "rewards/rejected": -0.2002851963043213, "step": 137 }, { "epoch": 0.3810838798757335, "grad_norm": 0.2909601330757141, "learning_rate": 4.989935734988098e-06, "log_odds_chosen": 0.2820569574832916, "log_odds_ratio": -0.5659909844398499, "logits/chosen": 0.20683330297470093, "logits/rejected": -0.5098833441734314, "logps/chosen": -1.7661350965499878, "logps/rejected": -2.0048882961273193, "loss": 1.9544, "nll_loss": 1.8978168964385986, "rewards/accuracies": 0.875, "rewards/chosen": -0.17661352455615997, "rewards/margins": 0.023875314742326736, "rewards/rejected": -0.2004888355731964, "step": 138 }, { "epoch": 0.3838453572661374, "grad_norm": 0.3169582188129425, "learning_rate": 4.989204548894589e-06, "log_odds_chosen": 0.27746957540512085, "log_odds_ratio": -0.5728934407234192, "logits/chosen": 0.26577499508857727, "logits/rejected": -0.32127267122268677, "logps/chosen": -1.846369743347168, "logps/rejected": -2.0836005210876465, "loss": 2.0312, "nll_loss": 1.973919153213501, "rewards/accuracies": 0.75, "rewards/chosen": -0.18463698029518127, "rewards/margins": 0.023723063990473747, "rewards/rejected": -0.20836003124713898, "step": 139 }, { "epoch": 0.3866068346565413, "grad_norm": 0.2856157720088959, "learning_rate": 4.988447782372996e-06, "log_odds_chosen": 0.2677723169326782, "log_odds_ratio": -0.5744235515594482, "logits/chosen": 0.08042767643928528, "logits/rejected": -0.489827036857605, "logps/chosen": -1.6772124767303467, "logps/rejected": -1.9024029970169067, "loss": 1.8706, "nll_loss": 1.8131682872772217, "rewards/accuracies": 0.875, "rewards/chosen": -0.1677212417125702, "rewards/margins": 0.022519057616591454, "rewards/rejected": -0.1902403086423874, "step": 140 }, { "epoch": 0.3893683120469451, "grad_norm": 0.26832813024520874, "learning_rate": 4.9876654432002655e-06, "log_odds_chosen": 0.3760983943939209, "log_odds_ratio": -0.5314284563064575, "logits/chosen": 0.22744062542915344, "logits/rejected": -0.6199472546577454, "logps/chosen": -1.7124302387237549, "logps/rejected": -2.035111665725708, "loss": 1.8915, "nll_loss": 1.838379979133606, "rewards/accuracies": 0.875, "rewards/chosen": -0.17124304175376892, "rewards/margins": 0.032268136739730835, "rewards/rejected": -0.20351116359233856, "step": 141 }, { "epoch": 0.392129789437349, "grad_norm": 0.2787982225418091, "learning_rate": 4.986857539416144e-06, "log_odds_chosen": 0.4650447368621826, "log_odds_ratio": -0.49311909079551697, "logits/chosen": 0.13020291924476624, "logits/rejected": -0.6509877443313599, "logps/chosen": -1.6808263063430786, "logps/rejected": -2.0721030235290527, "loss": 1.8625, "nll_loss": 1.813225269317627, "rewards/accuracies": 0.875, "rewards/chosen": -0.16808265447616577, "rewards/margins": 0.039127662777900696, "rewards/rejected": -0.20721031725406647, "step": 142 }, { "epoch": 0.39489126682775283, "grad_norm": 0.2971290349960327, "learning_rate": 4.986024079323092e-06, "log_odds_chosen": 0.20813080668449402, "log_odds_ratio": -0.5995200276374817, "logits/chosen": 0.1712779998779297, "logits/rejected": -0.2880682647228241, "logps/chosen": -1.6474640369415283, "logps/rejected": -1.8199793100357056, "loss": 1.873, "nll_loss": 1.8130154609680176, "rewards/accuracies": 0.875, "rewards/chosen": -0.16474640369415283, "rewards/margins": 0.017251526936888695, "rewards/rejected": -0.18199792504310608, "step": 143 }, { "epoch": 0.3976527442181567, "grad_norm": 0.28045693039894104, "learning_rate": 4.985165071486201e-06, "log_odds_chosen": 0.3738395869731903, "log_odds_ratio": -0.5252775549888611, "logits/chosen": 0.10901468247175217, "logits/rejected": -0.5548363924026489, "logps/chosen": -1.7671691179275513, "logps/rejected": -2.086446762084961, "loss": 1.9397, "nll_loss": 1.8872014284133911, "rewards/accuracies": 1.0, "rewards/chosen": -0.1767169088125229, "rewards/margins": 0.031927771866321564, "rewards/rejected": -0.20864468812942505, "step": 144 }, { "epoch": 0.4004142216085606, "grad_norm": 0.28425726294517517, "learning_rate": 4.984280524733107e-06, "log_odds_chosen": 0.44026607275009155, "log_odds_ratio": -0.5015271902084351, "logits/chosen": 0.08948800712823868, "logits/rejected": -0.4244663119316101, "logps/chosen": -1.698014736175537, "logps/rejected": -2.068047285079956, "loss": 1.8842, "nll_loss": 1.8340739011764526, "rewards/accuracies": 1.0, "rewards/chosen": -0.1698014885187149, "rewards/margins": 0.037003256380558014, "rewards/rejected": -0.20680472254753113, "step": 145 }, { "epoch": 0.40317569899896444, "grad_norm": 0.27275997400283813, "learning_rate": 4.983370448153896e-06, "log_odds_chosen": 0.34414318203926086, "log_odds_ratio": -0.53827965259552, "logits/chosen": 0.12077900767326355, "logits/rejected": -0.45666831731796265, "logps/chosen": -1.7140839099884033, "logps/rejected": -2.004852056503296, "loss": 1.8944, "nll_loss": 1.8405368328094482, "rewards/accuracies": 1.0, "rewards/chosen": -0.17140838503837585, "rewards/margins": 0.029076814651489258, "rewards/rejected": -0.2004851996898651, "step": 146 }, { "epoch": 0.4059371763893683, "grad_norm": 0.2800770699977875, "learning_rate": 4.9824348511010115e-06, "log_odds_chosen": 0.33458200097084045, "log_odds_ratio": -0.5480896830558777, "logits/chosen": 0.24399758875370026, "logits/rejected": -0.5033762454986572, "logps/chosen": -1.8108501434326172, "logps/rejected": -2.099644422531128, "loss": 1.983, "nll_loss": 1.9281907081604004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1810850203037262, "rewards/margins": 0.02887941710650921, "rewards/rejected": -0.20996443927288055, "step": 147 }, { "epoch": 0.40869865377977216, "grad_norm": 0.25797146558761597, "learning_rate": 4.981473743189163e-06, "log_odds_chosen": 0.5845157504081726, "log_odds_ratio": -0.45159903168678284, "logits/chosen": 0.005943242460489273, "logits/rejected": -0.7690349221229553, "logps/chosen": -1.5877741575241089, "logps/rejected": -2.0803065299987793, "loss": 1.7773, "nll_loss": 1.73209547996521, "rewards/accuracies": 1.0, "rewards/chosen": -0.1587774157524109, "rewards/margins": 0.04925324767827988, "rewards/rejected": -0.20803067088127136, "step": 148 }, { "epoch": 0.41146013117017605, "grad_norm": 0.27324819564819336, "learning_rate": 4.98048713429522e-06, "log_odds_chosen": 0.3932475745677948, "log_odds_ratio": -0.5176883339881897, "logits/chosen": 0.20163178443908691, "logits/rejected": -0.283429354429245, "logps/chosen": -1.7028628587722778, "logps/rejected": -2.03279709815979, "loss": 1.8839, "nll_loss": 1.8321328163146973, "rewards/accuracies": 1.0, "rewards/chosen": -0.17028628289699554, "rewards/margins": 0.032993413507938385, "rewards/rejected": -0.20327968895435333, "step": 149 }, { "epoch": 0.41422160856057993, "grad_norm": 0.30329883098602295, "learning_rate": 4.979475034558115e-06, "log_odds_chosen": 0.3415054678916931, "log_odds_ratio": -0.5400842428207397, "logits/chosen": 0.27814820408821106, "logits/rejected": -0.420722633600235, "logps/chosen": -1.834177017211914, "logps/rejected": -2.1293845176696777, "loss": 2.0005, "nll_loss": 1.9464527368545532, "rewards/accuracies": 1.0, "rewards/chosen": -0.18341770768165588, "rewards/margins": 0.029520753771066666, "rewards/rejected": -0.21293845772743225, "step": 150 }, { "epoch": 0.41698308595098377, "grad_norm": 0.2631519138813019, "learning_rate": 4.978437454378741e-06, "log_odds_chosen": 0.31423458456993103, "log_odds_ratio": -0.5544165968894958, "logits/chosen": 0.25030598044395447, "logits/rejected": -0.45834630727767944, "logps/chosen": -1.7350115776062012, "logps/rejected": -1.998989462852478, "loss": 1.9237, "nll_loss": 1.8682823181152344, "rewards/accuracies": 0.875, "rewards/chosen": -0.1735011488199234, "rewards/margins": 0.026397792622447014, "rewards/rejected": -0.19989895820617676, "step": 151 }, { "epoch": 0.41974456334138766, "grad_norm": 0.28724023699760437, "learning_rate": 4.977374404419838e-06, "log_odds_chosen": 0.34610986709594727, "log_odds_ratio": -0.5409384965896606, "logits/chosen": 0.23072689771652222, "logits/rejected": -0.4912022054195404, "logps/chosen": -1.7317731380462646, "logps/rejected": -2.0284037590026855, "loss": 1.9099, "nll_loss": 1.8557640314102173, "rewards/accuracies": 1.0, "rewards/chosen": -0.1731773018836975, "rewards/margins": 0.029663076624274254, "rewards/rejected": -0.2028403878211975, "step": 152 }, { "epoch": 0.4225060407317915, "grad_norm": 0.2612636089324951, "learning_rate": 4.976285895605888e-06, "log_odds_chosen": 0.4580684304237366, "log_odds_ratio": -0.4960138499736786, "logits/chosen": 0.18824972212314606, "logits/rejected": -0.5351172685623169, "logps/chosen": -1.6807944774627686, "logps/rejected": -2.0613059997558594, "loss": 1.8584, "nll_loss": 1.8087894916534424, "rewards/accuracies": 1.0, "rewards/chosen": -0.1680794358253479, "rewards/margins": 0.038051165640354156, "rewards/rejected": -0.20613062381744385, "step": 153 }, { "epoch": 0.4252675181221954, "grad_norm": 0.2724829614162445, "learning_rate": 4.9751719391230055e-06, "log_odds_chosen": 0.24739423394203186, "log_odds_ratio": -0.5839247703552246, "logits/chosen": 0.24238519370555878, "logits/rejected": -0.4675644040107727, "logps/chosen": -1.6522984504699707, "logps/rejected": -1.8578873872756958, "loss": 1.8598, "nll_loss": 1.8013783693313599, "rewards/accuracies": 0.875, "rewards/chosen": -0.16522985696792603, "rewards/margins": 0.02055889368057251, "rewards/rejected": -0.18578873574733734, "step": 154 }, { "epoch": 0.42802899551259926, "grad_norm": 0.27581965923309326, "learning_rate": 4.974032546418816e-06, "log_odds_chosen": 0.42455926537513733, "log_odds_ratio": -0.5114243030548096, "logits/chosen": 0.2685522735118866, "logits/rejected": -0.49730056524276733, "logps/chosen": -1.6981408596038818, "logps/rejected": -2.056694984436035, "loss": 1.8954, "nll_loss": 1.8442238569259644, "rewards/accuracies": 1.0, "rewards/chosen": -0.1698141098022461, "rewards/margins": 0.035855405032634735, "rewards/rejected": -0.20566949248313904, "step": 155 }, { "epoch": 0.4307904729030031, "grad_norm": 0.2632681131362915, "learning_rate": 4.9728677292023405e-06, "log_odds_chosen": 0.17779600620269775, "log_odds_ratio": -0.6146007776260376, "logits/chosen": 0.3470558226108551, "logits/rejected": -0.47237658500671387, "logps/chosen": -1.8082042932510376, "logps/rejected": -1.9588541984558105, "loss": 1.9785, "nll_loss": 1.9170793294906616, "rewards/accuracies": 0.875, "rewards/chosen": -0.18082045018672943, "rewards/margins": 0.015064971521496773, "rewards/rejected": -0.19588540494441986, "step": 156 }, { "epoch": 0.433551950293407, "grad_norm": 0.25831714272499084, "learning_rate": 4.971677499443882e-06, "log_odds_chosen": 0.30535322427749634, "log_odds_ratio": -0.5562926530838013, "logits/chosen": 0.22543753683567047, "logits/rejected": -0.5224329829216003, "logps/chosen": -1.7110515832901, "logps/rejected": -1.9678313732147217, "loss": 1.8946, "nll_loss": 1.838951587677002, "rewards/accuracies": 0.875, "rewards/chosen": -0.17110514640808105, "rewards/margins": 0.025677980855107307, "rewards/rejected": -0.1967831254005432, "step": 157 }, { "epoch": 0.4363134276838108, "grad_norm": 0.24926158785820007, "learning_rate": 4.97046186937489e-06, "log_odds_chosen": 0.2828327715396881, "log_odds_ratio": -0.5760643482208252, "logits/chosen": 0.1486169695854187, "logits/rejected": -0.5880769491195679, "logps/chosen": -1.690523386001587, "logps/rejected": -1.9291006326675415, "loss": 1.8625, "nll_loss": 1.804898977279663, "rewards/accuracies": 0.875, "rewards/chosen": -0.1690523475408554, "rewards/margins": 0.023857703432440758, "rewards/rejected": -0.1929100602865219, "step": 158 }, { "epoch": 0.4390749050742147, "grad_norm": 0.26394572854042053, "learning_rate": 4.9692208514878445e-06, "log_odds_chosen": 0.21585100889205933, "log_odds_ratio": -0.593828558921814, "logits/chosen": 0.3207181692123413, "logits/rejected": -0.3474537134170532, "logps/chosen": -1.8141443729400635, "logps/rejected": -1.9961789846420288, "loss": 1.9958, "nll_loss": 1.9364500045776367, "rewards/accuracies": 1.0, "rewards/chosen": -0.1814144253730774, "rewards/margins": 0.01820346899330616, "rewards/rejected": -0.1996179074048996, "step": 159 }, { "epoch": 0.4418363824646186, "grad_norm": 0.26699379086494446, "learning_rate": 4.967954458536126e-06, "log_odds_chosen": 0.34597551822662354, "log_odds_ratio": -0.5411776900291443, "logits/chosen": 0.29670047760009766, "logits/rejected": -0.25073421001434326, "logps/chosen": -1.6443804502487183, "logps/rejected": -1.9350707530975342, "loss": 1.833, "nll_loss": 1.7788795232772827, "rewards/accuracies": 1.0, "rewards/chosen": -0.16443803906440735, "rewards/margins": 0.029069023206830025, "rewards/rejected": -0.19350707530975342, "step": 160 }, { "epoch": 0.4445978598550224, "grad_norm": 0.2796018719673157, "learning_rate": 4.96666270353388e-06, "log_odds_chosen": 0.3187919855117798, "log_odds_ratio": -0.5501160025596619, "logits/chosen": 0.24211375415325165, "logits/rejected": -0.5377556681632996, "logps/chosen": -1.704611897468567, "logps/rejected": -1.9735724925994873, "loss": 1.8668, "nll_loss": 1.8117769956588745, "rewards/accuracies": 1.0, "rewards/chosen": -0.17046120762825012, "rewards/margins": 0.02689606510102749, "rewards/rejected": -0.19735725224018097, "step": 161 }, { "epoch": 0.4473593372454263, "grad_norm": 0.2614901065826416, "learning_rate": 4.965345599755888e-06, "log_odds_chosen": 0.3870270848274231, "log_odds_ratio": -0.5230943560600281, "logits/chosen": 0.20484450459480286, "logits/rejected": -0.6116334199905396, "logps/chosen": -1.7666045427322388, "logps/rejected": -2.0972108840942383, "loss": 1.9248, "nll_loss": 1.8725322484970093, "rewards/accuracies": 1.0, "rewards/chosen": -0.1766604632139206, "rewards/margins": 0.03306063264608383, "rewards/rejected": -0.20972107350826263, "step": 162 }, { "epoch": 0.45012081463583015, "grad_norm": 0.253493994474411, "learning_rate": 4.964003160737429e-06, "log_odds_chosen": 0.35205185413360596, "log_odds_ratio": -0.5439150333404541, "logits/chosen": 0.24726349115371704, "logits/rejected": -0.7317219972610474, "logps/chosen": -1.6719884872436523, "logps/rejected": -1.9640897512435913, "loss": 1.8488, "nll_loss": 1.7943804264068604, "rewards/accuracies": 0.875, "rewards/chosen": -0.16719885170459747, "rewards/margins": 0.029210133478045464, "rewards/rejected": -0.19640898704528809, "step": 163 }, { "epoch": 0.45288229202623403, "grad_norm": 0.23761652410030365, "learning_rate": 4.9626354002741424e-06, "log_odds_chosen": 0.38894832134246826, "log_odds_ratio": -0.5217959880828857, "logits/chosen": 0.2658887505531311, "logits/rejected": -0.6289904117584229, "logps/chosen": -1.6253769397735596, "logps/rejected": -1.9467687606811523, "loss": 1.7998, "nll_loss": 1.747636318206787, "rewards/accuracies": 1.0, "rewards/chosen": -0.16253769397735596, "rewards/margins": 0.03213919699192047, "rewards/rejected": -0.19467687606811523, "step": 164 }, { "epoch": 0.4556437694166379, "grad_norm": 0.2694617509841919, "learning_rate": 4.9612423324218816e-06, "log_odds_chosen": 0.383113294839859, "log_odds_ratio": -0.523147463798523, "logits/chosen": 0.29493993520736694, "logits/rejected": -0.8007028102874756, "logps/chosen": -1.798284649848938, "logps/rejected": -2.128204584121704, "loss": 1.9598, "nll_loss": 1.907509684562683, "rewards/accuracies": 1.0, "rewards/chosen": -0.1798284649848938, "rewards/margins": 0.03299199044704437, "rewards/rejected": -0.21282047033309937, "step": 165 }, { "epoch": 0.45840524680704176, "grad_norm": 0.27424952387809753, "learning_rate": 4.959823971496575e-06, "log_odds_chosen": 0.3006168007850647, "log_odds_ratio": -0.5665806531906128, "logits/chosen": 0.21390017867088318, "logits/rejected": -0.4561406373977661, "logps/chosen": -1.7170822620391846, "logps/rejected": -1.9759535789489746, "loss": 1.8963, "nll_loss": 1.8396110534667969, "rewards/accuracies": 0.75, "rewards/chosen": -0.17170822620391846, "rewards/margins": 0.025887131690979004, "rewards/rejected": -0.19759535789489746, "step": 166 }, { "epoch": 0.46116672419744564, "grad_norm": 0.28332701325416565, "learning_rate": 4.958380332074074e-06, "log_odds_chosen": 0.3598293364048004, "log_odds_ratio": -0.5315272212028503, "logits/chosen": 0.3352913558483124, "logits/rejected": -0.34136950969696045, "logps/chosen": -1.6895995140075684, "logps/rejected": -1.9940197467803955, "loss": 1.8818, "nll_loss": 1.8286209106445312, "rewards/accuracies": 1.0, "rewards/chosen": -0.16895994544029236, "rewards/margins": 0.030442016199231148, "rewards/rejected": -0.19940195977687836, "step": 167 }, { "epoch": 0.4639282015878495, "grad_norm": 0.2582882344722748, "learning_rate": 4.95691142899001e-06, "log_odds_chosen": 0.4415035843849182, "log_odds_ratio": -0.5072202682495117, "logits/chosen": 0.23933230340480804, "logits/rejected": -0.5575604438781738, "logps/chosen": -1.6840240955352783, "logps/rejected": -2.0607402324676514, "loss": 1.8592, "nll_loss": 1.8084406852722168, "rewards/accuracies": 1.0, "rewards/chosen": -0.16840240359306335, "rewards/margins": 0.037671614438295364, "rewards/rejected": -0.20607402920722961, "step": 168 }, { "epoch": 0.46668967897825336, "grad_norm": 0.2700425982475281, "learning_rate": 4.955417277339633e-06, "log_odds_chosen": 0.33803892135620117, "log_odds_ratio": -0.54216468334198, "logits/chosen": 0.2614552974700928, "logits/rejected": -0.5953667163848877, "logps/chosen": -1.7099006175994873, "logps/rejected": -1.994602918624878, "loss": 1.8657, "nll_loss": 1.8114964962005615, "rewards/accuracies": 0.875, "rewards/chosen": -0.17099007964134216, "rewards/margins": 0.028470242395997047, "rewards/rejected": -0.19946029782295227, "step": 169 }, { "epoch": 0.46945115636865725, "grad_norm": 0.2598535716533661, "learning_rate": 4.953897892477664e-06, "log_odds_chosen": 0.37775442004203796, "log_odds_ratio": -0.5379059910774231, "logits/chosen": 0.19298075139522552, "logits/rejected": -0.7334005832672119, "logps/chosen": -1.6427866220474243, "logps/rejected": -1.9675416946411133, "loss": 1.8149, "nll_loss": 1.7611249685287476, "rewards/accuracies": 0.75, "rewards/chosen": -0.16427867114543915, "rewards/margins": 0.032475508749485016, "rewards/rejected": -0.19675418734550476, "step": 170 }, { "epoch": 0.4722126337590611, "grad_norm": 0.26744216680526733, "learning_rate": 4.952353290018132e-06, "log_odds_chosen": 0.37955784797668457, "log_odds_ratio": -0.5295799374580383, "logits/chosen": 0.27365049719810486, "logits/rejected": -0.16527330875396729, "logps/chosen": -1.7106455564498901, "logps/rejected": -2.0328187942504883, "loss": 1.8951, "nll_loss": 1.842104196548462, "rewards/accuracies": 1.0, "rewards/chosen": -0.171064555644989, "rewards/margins": 0.032217323780059814, "rewards/rejected": -0.20328189432621002, "step": 171 }, { "epoch": 0.474974111149465, "grad_norm": 0.298951655626297, "learning_rate": 4.950783485834218e-06, "log_odds_chosen": 0.4051695168018341, "log_odds_ratio": -0.5118554830551147, "logits/chosen": 0.23584626615047455, "logits/rejected": -0.5780523419380188, "logps/chosen": -1.6387077569961548, "logps/rejected": -1.975178599357605, "loss": 1.8228, "nll_loss": 1.7716267108917236, "rewards/accuracies": 1.0, "rewards/chosen": -0.16387078166007996, "rewards/margins": 0.0336470901966095, "rewards/rejected": -0.19751787185668945, "step": 172 }, { "epoch": 0.4777355885398688, "grad_norm": 0.23760944604873657, "learning_rate": 4.949188496058089e-06, "log_odds_chosen": 0.308282732963562, "log_odds_ratio": -0.5617873668670654, "logits/chosen": 0.2496112585067749, "logits/rejected": -0.595442533493042, "logps/chosen": -1.6498539447784424, "logps/rejected": -1.9084365367889404, "loss": 1.8249, "nll_loss": 1.768703818321228, "rewards/accuracies": 0.875, "rewards/chosen": -0.16498540341854095, "rewards/margins": 0.02585826814174652, "rewards/rejected": -0.19084367156028748, "step": 173 }, { "epoch": 0.4804970659302727, "grad_norm": 0.27792298793792725, "learning_rate": 4.947568337080733e-06, "log_odds_chosen": 0.12510845065116882, "log_odds_ratio": -0.6408029198646545, "logits/chosen": 0.19703012704849243, "logits/rejected": -0.5250687599182129, "logps/chosen": -1.7701200246810913, "logps/rejected": -1.877974033355713, "loss": 1.9402, "nll_loss": 1.876126766204834, "rewards/accuracies": 0.625, "rewards/chosen": -0.17701202630996704, "rewards/margins": 0.010785380378365517, "rewards/rejected": -0.1877973973751068, "step": 174 }, { "epoch": 0.4832585433206766, "grad_norm": 0.26834768056869507, "learning_rate": 4.945923025551789e-06, "log_odds_chosen": 0.3476155996322632, "log_odds_ratio": -0.535990834236145, "logits/chosen": 0.25689467787742615, "logits/rejected": -0.616847813129425, "logps/chosen": -1.7164323329925537, "logps/rejected": -2.0102245807647705, "loss": 1.8654, "nll_loss": 1.8118302822113037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1716432273387909, "rewards/margins": 0.029379239305853844, "rewards/rejected": -0.2010224610567093, "step": 175 }, { "epoch": 0.4860200207110804, "grad_norm": 0.2374398410320282, "learning_rate": 4.944252578379379e-06, "log_odds_chosen": 0.2779533267021179, "log_odds_ratio": -0.5700355172157288, "logits/chosen": 0.19578704237937927, "logits/rejected": -0.759753942489624, "logps/chosen": -1.6441093683242798, "logps/rejected": -1.8757362365722656, "loss": 1.8093, "nll_loss": 1.752274990081787, "rewards/accuracies": 0.875, "rewards/chosen": -0.16441094875335693, "rewards/margins": 0.023162685334682465, "rewards/rejected": -0.1875736117362976, "step": 176 }, { "epoch": 0.4887814981014843, "grad_norm": 0.259815514087677, "learning_rate": 4.942557012729933e-06, "log_odds_chosen": 0.3541259169578552, "log_odds_ratio": -0.5466777086257935, "logits/chosen": 0.26096147298812866, "logits/rejected": -0.6699164509773254, "logps/chosen": -1.7012748718261719, "logps/rejected": -1.9999008178710938, "loss": 1.8742, "nll_loss": 1.819519281387329, "rewards/accuracies": 0.875, "rewards/chosen": -0.1701274961233139, "rewards/margins": 0.02986258640885353, "rewards/rejected": -0.19999009370803833, "step": 177 }, { "epoch": 0.49154297549188813, "grad_norm": 0.2553425133228302, "learning_rate": 4.940836346028011e-06, "log_odds_chosen": 0.5353389978408813, "log_odds_ratio": -0.46747249364852905, "logits/chosen": 0.19104404747486115, "logits/rejected": -0.7688809633255005, "logps/chosen": -1.6563342809677124, "logps/rejected": -2.104980945587158, "loss": 1.8241, "nll_loss": 1.7773433923721313, "rewards/accuracies": 1.0, "rewards/chosen": -0.165633425116539, "rewards/margins": 0.04486468806862831, "rewards/rejected": -0.21049810945987701, "step": 178 }, { "epoch": 0.494304452882292, "grad_norm": 0.2521730661392212, "learning_rate": 4.9390905959561254e-06, "log_odds_chosen": 0.37528765201568604, "log_odds_ratio": -0.5245440602302551, "logits/chosen": 0.32407164573669434, "logits/rejected": -0.7876682281494141, "logps/chosen": -1.7645964622497559, "logps/rejected": -2.0821878910064697, "loss": 1.9004, "nll_loss": 1.847988247871399, "rewards/accuracies": 1.0, "rewards/chosen": -0.1764596402645111, "rewards/margins": 0.03175915777683258, "rewards/rejected": -0.2082187980413437, "step": 179 }, { "epoch": 0.4970659302726959, "grad_norm": 0.263175904750824, "learning_rate": 4.937319780454559e-06, "log_odds_chosen": 0.2128462940454483, "log_odds_ratio": -0.5969148874282837, "logits/chosen": 0.281113862991333, "logits/rejected": -0.6618841290473938, "logps/chosen": -1.717787504196167, "logps/rejected": -1.8978548049926758, "loss": 1.8852, "nll_loss": 1.8254907131195068, "rewards/accuracies": 0.75, "rewards/chosen": -0.17177876830101013, "rewards/margins": 0.018006734549999237, "rewards/rejected": -0.18978549540042877, "step": 180 }, { "epoch": 0.49982740766309974, "grad_norm": 0.2627811133861542, "learning_rate": 4.935523917721182e-06, "log_odds_chosen": 0.32824423909187317, "log_odds_ratio": -0.5476853847503662, "logits/chosen": 0.3165915608406067, "logits/rejected": -0.5085580945014954, "logps/chosen": -1.7242554426193237, "logps/rejected": -1.9997884035110474, "loss": 1.9077, "nll_loss": 1.852901816368103, "rewards/accuracies": 0.875, "rewards/chosen": -0.1724255532026291, "rewards/margins": 0.027553284540772438, "rewards/rejected": -0.19997884333133698, "step": 181 }, { "epoch": 0.5025888850535036, "grad_norm": 0.2430126965045929, "learning_rate": 4.933703026211262e-06, "log_odds_chosen": 0.37905246019363403, "log_odds_ratio": -0.528121292591095, "logits/chosen": 0.3522907495498657, "logits/rejected": -0.9080434441566467, "logps/chosen": -1.7351850271224976, "logps/rejected": -2.059718608856201, "loss": 1.8866, "nll_loss": 1.8337947130203247, "rewards/accuracies": 1.0, "rewards/chosen": -0.17351850867271423, "rewards/margins": 0.03245338052511215, "rewards/rejected": -0.2059718817472458, "step": 182 }, { "epoch": 0.5053503624439075, "grad_norm": 0.2511756420135498, "learning_rate": 4.931857124637276e-06, "log_odds_chosen": 0.3801102638244629, "log_odds_ratio": -0.5302396416664124, "logits/chosen": 0.29899683594703674, "logits/rejected": -0.5272728204727173, "logps/chosen": -1.6140385866165161, "logps/rejected": -1.9330276250839233, "loss": 1.7807, "nll_loss": 1.7276661396026611, "rewards/accuracies": 0.875, "rewards/chosen": -0.16140387952327728, "rewards/margins": 0.031898900866508484, "rewards/rejected": -0.19330278038978577, "step": 183 }, { "epoch": 0.5081118398343114, "grad_norm": 0.33855873346328735, "learning_rate": 4.92998623196872e-06, "log_odds_chosen": 0.3525073528289795, "log_odds_ratio": -0.5346016883850098, "logits/chosen": 0.18561817705631256, "logits/rejected": -0.9478727579116821, "logps/chosen": -1.6743947267532349, "logps/rejected": -1.9694938659667969, "loss": 1.8278, "nll_loss": 1.7743027210235596, "rewards/accuracies": 1.0, "rewards/chosen": -0.16743947565555573, "rewards/margins": 0.029509922489523888, "rewards/rejected": -0.19694939255714417, "step": 184 }, { "epoch": 0.5108733172247152, "grad_norm": 0.24746204912662506, "learning_rate": 4.92809036743191e-06, "log_odds_chosen": 0.4261741042137146, "log_odds_ratio": -0.5064104199409485, "logits/chosen": 0.21586617827415466, "logits/rejected": -0.7548010349273682, "logps/chosen": -1.5876227617263794, "logps/rejected": -1.9392573833465576, "loss": 1.7364, "nll_loss": 1.6857661008834839, "rewards/accuracies": 1.0, "rewards/chosen": -0.15876229107379913, "rewards/margins": 0.035163454711437225, "rewards/rejected": -0.19392573833465576, "step": 185 }, { "epoch": 0.5136347946151191, "grad_norm": 0.256515771150589, "learning_rate": 4.926169550509787e-06, "log_odds_chosen": 0.2986356019973755, "log_odds_ratio": -0.5593284964561462, "logits/chosen": 0.3137073516845703, "logits/rejected": -0.5803498029708862, "logps/chosen": -1.7011560201644897, "logps/rejected": -1.9512310028076172, "loss": 1.8413, "nll_loss": 1.7853296995162964, "rewards/accuracies": 0.875, "rewards/chosen": -0.1701156049966812, "rewards/margins": 0.025007493793964386, "rewards/rejected": -0.1951231062412262, "step": 186 }, { "epoch": 0.516396272005523, "grad_norm": 0.26078805327415466, "learning_rate": 4.924223800941718e-06, "log_odds_chosen": 0.17883820831775665, "log_odds_ratio": -0.6130943894386292, "logits/chosen": 0.2678496241569519, "logits/rejected": -0.6333581209182739, "logps/chosen": -1.6809213161468506, "logps/rejected": -1.829279899597168, "loss": 1.8416, "nll_loss": 1.7803112268447876, "rewards/accuracies": 0.75, "rewards/chosen": -0.16809213161468506, "rewards/margins": 0.014835860580205917, "rewards/rejected": -0.18292801082134247, "step": 187 }, { "epoch": 0.5191577493959268, "grad_norm": 0.3295902609825134, "learning_rate": 4.9222531387232885e-06, "log_odds_chosen": 0.2605365216732025, "log_odds_ratio": -0.5762373208999634, "logits/chosen": 0.3522469103336334, "logits/rejected": -0.6649764180183411, "logps/chosen": -1.7675739526748657, "logps/rejected": -1.9888498783111572, "loss": 1.9141, "nll_loss": 1.8564814329147339, "rewards/accuracies": 0.875, "rewards/chosen": -0.17675741016864777, "rewards/margins": 0.022127564996480942, "rewards/rejected": -0.1988849639892578, "step": 188 }, { "epoch": 0.5219192267863307, "grad_norm": 0.2640776038169861, "learning_rate": 4.920257584106104e-06, "log_odds_chosen": 0.26505714654922485, "log_odds_ratio": -0.5721904039382935, "logits/chosen": 0.2708177864551544, "logits/rejected": -0.7849910855293274, "logps/chosen": -1.7207906246185303, "logps/rejected": -1.9439318180084229, "loss": 1.8705, "nll_loss": 1.8132869005203247, "rewards/accuracies": 1.0, "rewards/chosen": -0.17207907140254974, "rewards/margins": 0.022314125671982765, "rewards/rejected": -0.19439318776130676, "step": 189 }, { "epoch": 0.5246807041767345, "grad_norm": 0.2825080156326294, "learning_rate": 4.918237157597574e-06, "log_odds_chosen": 0.343722403049469, "log_odds_ratio": -0.5388997197151184, "logits/chosen": 0.17086902260780334, "logits/rejected": -0.6501764059066772, "logps/chosen": -1.6524600982666016, "logps/rejected": -1.9383505582809448, "loss": 1.8225, "nll_loss": 1.768616795539856, "rewards/accuracies": 1.0, "rewards/chosen": -0.16524602472782135, "rewards/margins": 0.0285890344530344, "rewards/rejected": -0.1938350349664688, "step": 190 }, { "epoch": 0.5274421815671384, "grad_norm": 0.27104640007019043, "learning_rate": 4.916191879960708e-06, "log_odds_chosen": 0.38741055130958557, "log_odds_ratio": -0.5215877294540405, "logits/chosen": 0.2510019838809967, "logits/rejected": -0.7692076563835144, "logps/chosen": -1.7732622623443604, "logps/rejected": -2.102111577987671, "loss": 1.9158, "nll_loss": 1.8636667728424072, "rewards/accuracies": 1.0, "rewards/chosen": -0.17732621729373932, "rewards/margins": 0.03288493677973747, "rewards/rejected": -0.2102111577987671, "step": 191 }, { "epoch": 0.5302036589575423, "grad_norm": 0.23343509435653687, "learning_rate": 4.914121772213898e-06, "log_odds_chosen": 0.36985254287719727, "log_odds_ratio": -0.5267210006713867, "logits/chosen": 0.28792741894721985, "logits/rejected": -0.6279172301292419, "logps/chosen": -1.6510688066482544, "logps/rejected": -1.9592763185501099, "loss": 1.7875, "nll_loss": 1.7348387241363525, "rewards/accuracies": 1.0, "rewards/chosen": -0.1651068925857544, "rewards/margins": 0.03082074038684368, "rewards/rejected": -0.19592763483524323, "step": 192 }, { "epoch": 0.5329651363479462, "grad_norm": 0.2630840837955475, "learning_rate": 4.912026855630703e-06, "log_odds_chosen": 0.4057270884513855, "log_odds_ratio": -0.5138529539108276, "logits/chosen": 0.2938230633735657, "logits/rejected": -0.5338449478149414, "logps/chosen": -1.6583514213562012, "logps/rejected": -1.9981603622436523, "loss": 1.804, "nll_loss": 1.7526516914367676, "rewards/accuracies": 1.0, "rewards/chosen": -0.1658351570367813, "rewards/margins": 0.03398089110851288, "rewards/rejected": -0.1998160481452942, "step": 193 }, { "epoch": 0.5357266137383501, "grad_norm": 0.32278531789779663, "learning_rate": 4.909907151739634e-06, "log_odds_chosen": 0.36290958523750305, "log_odds_ratio": -0.5369880795478821, "logits/chosen": 0.2148250937461853, "logits/rejected": -0.6797182559967041, "logps/chosen": -1.6431670188903809, "logps/rejected": -1.9445240497589111, "loss": 1.7982, "nll_loss": 1.7444565296173096, "rewards/accuracies": 0.875, "rewards/chosen": -0.16431671380996704, "rewards/margins": 0.030135709792375565, "rewards/rejected": -0.1944524198770523, "step": 194 }, { "epoch": 0.5384880911287538, "grad_norm": 0.27246755361557007, "learning_rate": 4.907762682323926e-06, "log_odds_chosen": 0.5421338081359863, "log_odds_ratio": -0.5049780011177063, "logits/chosen": 0.2726617455482483, "logits/rejected": -0.6698200702667236, "logps/chosen": -1.649804711341858, "logps/rejected": -2.089289426803589, "loss": 1.8247, "nll_loss": 1.774214506149292, "rewards/accuracies": 0.875, "rewards/chosen": -0.1649804711341858, "rewards/margins": 0.04394847899675369, "rewards/rejected": -0.2089289426803589, "step": 195 }, { "epoch": 0.5412495685191577, "grad_norm": 0.25183457136154175, "learning_rate": 4.905593469421323e-06, "log_odds_chosen": 0.23532448709011078, "log_odds_ratio": -0.5886967778205872, "logits/chosen": 0.26476889848709106, "logits/rejected": -1.049080729484558, "logps/chosen": -1.6535186767578125, "logps/rejected": -1.8511910438537598, "loss": 1.8103, "nll_loss": 1.7514796257019043, "rewards/accuracies": 0.75, "rewards/chosen": -0.16535183787345886, "rewards/margins": 0.01976725272834301, "rewards/rejected": -0.18511910736560822, "step": 196 }, { "epoch": 0.5440110459095616, "grad_norm": 0.4395514130592346, "learning_rate": 4.90339953532384e-06, "log_odds_chosen": 0.3145284354686737, "log_odds_ratio": -0.5530616641044617, "logits/chosen": 0.2953311502933502, "logits/rejected": -0.7025068402290344, "logps/chosen": -1.677915096282959, "logps/rejected": -1.9411128759384155, "loss": 1.8327, "nll_loss": 1.7774040699005127, "rewards/accuracies": 0.875, "rewards/chosen": -0.16779151558876038, "rewards/margins": 0.02631976269185543, "rewards/rejected": -0.19411127269268036, "step": 197 }, { "epoch": 0.5467725232999655, "grad_norm": 0.34831514954566956, "learning_rate": 4.901180902577549e-06, "log_odds_chosen": 0.3553253412246704, "log_odds_ratio": -0.5363969206809998, "logits/chosen": 0.32768774032592773, "logits/rejected": -0.8499129414558411, "logps/chosen": -1.6655187606811523, "logps/rejected": -1.9629868268966675, "loss": 1.8077, "nll_loss": 1.7540751695632935, "rewards/accuracies": 1.0, "rewards/chosen": -0.1665518581867218, "rewards/margins": 0.02974681742489338, "rewards/rejected": -0.19629870355129242, "step": 198 }, { "epoch": 0.5495340006903694, "grad_norm": 0.2638852894306183, "learning_rate": 4.8989375939823305e-06, "log_odds_chosen": 0.2459593415260315, "log_odds_ratio": -0.5942656993865967, "logits/chosen": 0.28430262207984924, "logits/rejected": -0.9709011316299438, "logps/chosen": -1.6272318363189697, "logps/rejected": -1.8355400562286377, "loss": 1.7824, "nll_loss": 1.7229478359222412, "rewards/accuracies": 0.875, "rewards/chosen": -0.16272318363189697, "rewards/margins": 0.020830810070037842, "rewards/rejected": -0.183554008603096, "step": 199 }, { "epoch": 0.5522954780807732, "grad_norm": 0.25321170687675476, "learning_rate": 4.896669632591652e-06, "log_odds_chosen": 0.4053685665130615, "log_odds_ratio": -0.5148746371269226, "logits/chosen": 0.26351821422576904, "logits/rejected": -0.6612182259559631, "logps/chosen": -1.6538773775100708, "logps/rejected": -1.9941370487213135, "loss": 1.8067, "nll_loss": 1.7551857233047485, "rewards/accuracies": 1.0, "rewards/chosen": -0.16538773477077484, "rewards/margins": 0.03402596712112427, "rewards/rejected": -0.1994137167930603, "step": 200 }, { "epoch": 0.5550569554711771, "grad_norm": 0.25087103247642517, "learning_rate": 4.894377041712327e-06, "log_odds_chosen": 0.2785293161869049, "log_odds_ratio": -0.5666120052337646, "logits/chosen": 0.31340447068214417, "logits/rejected": -0.8899152278900146, "logps/chosen": -1.7040454149246216, "logps/rejected": -1.9381954669952393, "loss": 1.8502, "nll_loss": 1.7935274839401245, "rewards/accuracies": 0.875, "rewards/chosen": -0.17040453851222992, "rewards/margins": 0.023415017873048782, "rewards/rejected": -0.1938195526599884, "step": 201 }, { "epoch": 0.557818432861581, "grad_norm": 0.25045764446258545, "learning_rate": 4.892059844904273e-06, "log_odds_chosen": 0.3241802752017975, "log_odds_ratio": -0.5491434335708618, "logits/chosen": 0.21476463973522186, "logits/rejected": -0.7644988894462585, "logps/chosen": -1.6328078508377075, "logps/rejected": -1.9039433002471924, "loss": 1.7911, "nll_loss": 1.7361786365509033, "rewards/accuracies": 1.0, "rewards/chosen": -0.16328078508377075, "rewards/margins": 0.027113551273941994, "rewards/rejected": -0.190394327044487, "step": 202 }, { "epoch": 0.5605799102519848, "grad_norm": 0.23429127037525177, "learning_rate": 4.889718065980272e-06, "log_odds_chosen": 0.3891683518886566, "log_odds_ratio": -0.5242507457733154, "logits/chosen": 0.25736698508262634, "logits/rejected": -0.9445868730545044, "logps/chosen": -1.7043213844299316, "logps/rejected": -2.0325913429260254, "loss": 1.8321, "nll_loss": 1.7796692848205566, "rewards/accuracies": 1.0, "rewards/chosen": -0.17043213546276093, "rewards/margins": 0.032826997339725494, "rewards/rejected": -0.20325914025306702, "step": 203 }, { "epoch": 0.5633413876423887, "grad_norm": 0.23629231750965118, "learning_rate": 4.8873517290057265e-06, "log_odds_chosen": 0.5146141052246094, "log_odds_ratio": -0.470738023519516, "logits/chosen": 0.21375544369220734, "logits/rejected": -0.9923663139343262, "logps/chosen": -1.6708019971847534, "logps/rejected": -2.108680248260498, "loss": 1.8157, "nll_loss": 1.7685816287994385, "rewards/accuracies": 1.0, "rewards/chosen": -0.16708020865917206, "rewards/margins": 0.04378781467676163, "rewards/rejected": -0.2108680158853531, "step": 204 }, { "epoch": 0.5661028650327925, "grad_norm": 0.2618404030799866, "learning_rate": 4.88496085829841e-06, "log_odds_chosen": 0.3119449019432068, "log_odds_ratio": -0.5510252714157104, "logits/chosen": 0.24412189424037933, "logits/rejected": -0.44529297947883606, "logps/chosen": -1.6430044174194336, "logps/rejected": -1.902493953704834, "loss": 1.787, "nll_loss": 1.73191237449646, "rewards/accuracies": 1.0, "rewards/chosen": -0.16430042684078217, "rewards/margins": 0.02594897150993347, "rewards/rejected": -0.19024939835071564, "step": 205 }, { "epoch": 0.5688643424231964, "grad_norm": 0.2525455057621002, "learning_rate": 4.882545478428219e-06, "log_odds_chosen": 0.4682312607765198, "log_odds_ratio": -0.5022274851799011, "logits/chosen": 0.31566041707992554, "logits/rejected": -0.9525866508483887, "logps/chosen": -1.62907075881958, "logps/rejected": -2.020961046218872, "loss": 1.7875, "nll_loss": 1.7372711896896362, "rewards/accuracies": 0.875, "rewards/chosen": -0.16290709376335144, "rewards/margins": 0.039189018309116364, "rewards/rejected": -0.2020961046218872, "step": 206 }, { "epoch": 0.5716258198136003, "grad_norm": 0.29324284195899963, "learning_rate": 4.880105614216917e-06, "log_odds_chosen": 0.3941385746002197, "log_odds_ratio": -0.5201365947723389, "logits/chosen": 0.3187817335128784, "logits/rejected": -0.8300870656967163, "logps/chosen": -1.800098180770874, "logps/rejected": -2.1378254890441895, "loss": 1.9198, "nll_loss": 1.8678245544433594, "rewards/accuracies": 1.0, "rewards/chosen": -0.18000982701778412, "rewards/margins": 0.033772725611925125, "rewards/rejected": -0.21378254890441895, "step": 207 }, { "epoch": 0.5743872972040042, "grad_norm": 0.2698252499103546, "learning_rate": 4.8776412907378845e-06, "log_odds_chosen": 0.32584843039512634, "log_odds_ratio": -0.5465385913848877, "logits/chosen": 0.34170591831207275, "logits/rejected": -0.9024736285209656, "logps/chosen": -1.7152948379516602, "logps/rejected": -1.9904066324234009, "loss": 1.8523, "nll_loss": 1.7976171970367432, "rewards/accuracies": 1.0, "rewards/chosen": -0.17152947187423706, "rewards/margins": 0.02751118130981922, "rewards/rejected": -0.19904065132141113, "step": 208 }, { "epoch": 0.577148774594408, "grad_norm": 0.24523906409740448, "learning_rate": 4.875152533315859e-06, "log_odds_chosen": 0.3781871199607849, "log_odds_ratio": -0.5249034762382507, "logits/chosen": 0.22711794078350067, "logits/rejected": -0.8147018551826477, "logps/chosen": -1.5907987356185913, "logps/rejected": -1.901723861694336, "loss": 1.7362, "nll_loss": 1.6837434768676758, "rewards/accuracies": 1.0, "rewards/chosen": -0.15907986462116241, "rewards/margins": 0.031092505902051926, "rewards/rejected": -0.19017238914966583, "step": 209 }, { "epoch": 0.5799102519848118, "grad_norm": 0.25919055938720703, "learning_rate": 4.872639367526672e-06, "log_odds_chosen": 0.39655864238739014, "log_odds_ratio": -0.5207105875015259, "logits/chosen": 0.26906242966651917, "logits/rejected": -0.7278945446014404, "logps/chosen": -1.6198142766952515, "logps/rejected": -1.9519634246826172, "loss": 1.7641, "nll_loss": 1.7119861841201782, "rewards/accuracies": 1.0, "rewards/chosen": -0.16198144853115082, "rewards/margins": 0.03321490436792374, "rewards/rejected": -0.19519636034965515, "step": 210 }, { "epoch": 0.5826717293752157, "grad_norm": 0.2771226763725281, "learning_rate": 4.870101819196992e-06, "log_odds_chosen": 0.33355456590652466, "log_odds_ratio": -0.5614390969276428, "logits/chosen": 0.23215994238853455, "logits/rejected": -0.7148041129112244, "logps/chosen": -1.6759493350982666, "logps/rejected": -1.9613184928894043, "loss": 1.8249, "nll_loss": 1.7687922716140747, "rewards/accuracies": 0.875, "rewards/chosen": -0.16759493947029114, "rewards/margins": 0.028536932542920113, "rewards/rejected": -0.1961318552494049, "step": 211 }, { "epoch": 0.5854332067656196, "grad_norm": 0.24981357157230377, "learning_rate": 4.8675399144040535e-06, "log_odds_chosen": 0.44918757677078247, "log_odds_ratio": -0.5007237792015076, "logits/chosen": 0.2401760071516037, "logits/rejected": -1.0861955881118774, "logps/chosen": -1.5890363454818726, "logps/rejected": -1.9614393711090088, "loss": 1.7223, "nll_loss": 1.6722639799118042, "rewards/accuracies": 1.0, "rewards/chosen": -0.15890364348888397, "rewards/margins": 0.03724028915166855, "rewards/rejected": -0.19614392518997192, "step": 212 }, { "epoch": 0.5881946841560235, "grad_norm": 0.2642802894115448, "learning_rate": 4.864953679475392e-06, "log_odds_chosen": 0.35757988691329956, "log_odds_ratio": -0.5399623513221741, "logits/chosen": 0.2965427339076996, "logits/rejected": -1.087183952331543, "logps/chosen": -1.645439863204956, "logps/rejected": -1.9395672082901, "loss": 1.7654, "nll_loss": 1.7114168405532837, "rewards/accuracies": 0.75, "rewards/chosen": -0.1645439863204956, "rewards/margins": 0.02941274270415306, "rewards/rejected": -0.19395673274993896, "step": 213 }, { "epoch": 0.5909561615464274, "grad_norm": 0.3119313418865204, "learning_rate": 4.862343140988573e-06, "log_odds_chosen": 0.3508715331554413, "log_odds_ratio": -0.5419243574142456, "logits/chosen": 0.2440934181213379, "logits/rejected": -0.9715545773506165, "logps/chosen": -1.691695213317871, "logps/rejected": -1.9900875091552734, "loss": 1.8129, "nll_loss": 1.75870680809021, "rewards/accuracies": 0.875, "rewards/chosen": -0.16916951537132263, "rewards/margins": 0.029839247465133667, "rewards/rejected": -0.1990087330341339, "step": 214 }, { "epoch": 0.5937176389368312, "grad_norm": 0.2919383943080902, "learning_rate": 4.859708325770919e-06, "log_odds_chosen": 0.32950735092163086, "log_odds_ratio": -0.5485372543334961, "logits/chosen": 0.314483106136322, "logits/rejected": -0.6885538697242737, "logps/chosen": -1.6899092197418213, "logps/rejected": -1.96530020236969, "loss": 1.8463, "nll_loss": 1.791460633277893, "rewards/accuracies": 0.875, "rewards/chosen": -0.16899092495441437, "rewards/margins": 0.02753911167383194, "rewards/rejected": -0.1965300291776657, "step": 215 }, { "epoch": 0.596479116327235, "grad_norm": 0.28394845128059387, "learning_rate": 4.857049260899233e-06, "log_odds_chosen": 0.37926068902015686, "log_odds_ratio": -0.5276373624801636, "logits/chosen": 0.21555066108703613, "logits/rejected": -0.8712125420570374, "logps/chosen": -1.565875768661499, "logps/rejected": -1.873572826385498, "loss": 1.7163, "nll_loss": 1.6635123491287231, "rewards/accuracies": 0.875, "rewards/chosen": -0.15658757090568542, "rewards/margins": 0.030769716948270798, "rewards/rejected": -0.18735727667808533, "step": 216 }, { "epoch": 0.5992405937176389, "grad_norm": 0.2573164105415344, "learning_rate": 4.854365973699519e-06, "log_odds_chosen": 0.26477721333503723, "log_odds_ratio": -0.5851221680641174, "logits/chosen": 0.2476685345172882, "logits/rejected": -0.7407121658325195, "logps/chosen": -1.657826542854309, "logps/rejected": -1.8721026182174683, "loss": 1.8129, "nll_loss": 1.7544105052947998, "rewards/accuracies": 0.625, "rewards/chosen": -0.16578267514705658, "rewards/margins": 0.021427594125270844, "rewards/rejected": -0.18721026182174683, "step": 217 }, { "epoch": 0.6020020711080428, "grad_norm": 0.2776831388473511, "learning_rate": 4.851658491746707e-06, "log_odds_chosen": 0.3577430248260498, "log_odds_ratio": -0.5342705249786377, "logits/chosen": 0.17239025235176086, "logits/rejected": -0.9049993753433228, "logps/chosen": -1.6406420469284058, "logps/rejected": -1.9406909942626953, "loss": 1.773, "nll_loss": 1.719523549079895, "rewards/accuracies": 0.875, "rewards/chosen": -0.1640642285346985, "rewards/margins": 0.030004894360899925, "rewards/rejected": -0.19406910240650177, "step": 218 }, { "epoch": 0.6047635484984467, "grad_norm": 0.2520454227924347, "learning_rate": 4.848926842864361e-06, "log_odds_chosen": 0.4466186463832855, "log_odds_ratio": -0.4973817467689514, "logits/chosen": 0.17908504605293274, "logits/rejected": -1.1680846214294434, "logps/chosen": -1.6161878108978271, "logps/rejected": -1.989192247390747, "loss": 1.7485, "nll_loss": 1.6987885236740112, "rewards/accuracies": 1.0, "rewards/chosen": -0.16161878407001495, "rewards/margins": 0.03730044513940811, "rewards/rejected": -0.19891922175884247, "step": 219 }, { "epoch": 0.6075250258888505, "grad_norm": 0.2567070722579956, "learning_rate": 4.846171055124401e-06, "log_odds_chosen": 0.28004616498947144, "log_odds_ratio": -0.5727553963661194, "logits/chosen": 0.2623513638973236, "logits/rejected": -0.7589821815490723, "logps/chosen": -1.692448377609253, "logps/rejected": -1.9293184280395508, "loss": 1.8107, "nll_loss": 1.7533810138702393, "rewards/accuracies": 1.0, "rewards/chosen": -0.16924485564231873, "rewards/margins": 0.023687003180384636, "rewards/rejected": -0.19293184578418732, "step": 220 }, { "epoch": 0.6102865032792544, "grad_norm": 0.2756041884422302, "learning_rate": 4.843391156846811e-06, "log_odds_chosen": 0.4713705778121948, "log_odds_ratio": -0.4867982268333435, "logits/chosen": 0.29524314403533936, "logits/rejected": -0.8037136793136597, "logps/chosen": -1.6270312070846558, "logps/rejected": -2.019744396209717, "loss": 1.7549, "nll_loss": 1.7062143087387085, "rewards/accuracies": 1.0, "rewards/chosen": -0.16270311176776886, "rewards/margins": 0.039271317422389984, "rewards/rejected": -0.20197445154190063, "step": 221 }, { "epoch": 0.6130479806696583, "grad_norm": 0.2521502375602722, "learning_rate": 4.8405871765993435e-06, "log_odds_chosen": 0.5144533514976501, "log_odds_ratio": -0.478085458278656, "logits/chosen": 0.1983458399772644, "logits/rejected": -0.9259991645812988, "logps/chosen": -1.681555151939392, "logps/rejected": -2.111989974975586, "loss": 1.8047, "nll_loss": 1.7569189071655273, "rewards/accuracies": 1.0, "rewards/chosen": -0.1681555211544037, "rewards/margins": 0.043043479323387146, "rewards/rejected": -0.21119900047779083, "step": 222 }, { "epoch": 0.6158094580600622, "grad_norm": 0.2711774706840515, "learning_rate": 4.837759143197237e-06, "log_odds_chosen": 0.29049259424209595, "log_odds_ratio": -0.5618168711662292, "logits/chosen": 0.18880629539489746, "logits/rejected": -1.0936201810836792, "logps/chosen": -1.6453745365142822, "logps/rejected": -1.8913843631744385, "loss": 1.7765, "nll_loss": 1.7203593254089355, "rewards/accuracies": 1.0, "rewards/chosen": -0.1645374447107315, "rewards/margins": 0.02460099384188652, "rewards/rejected": -0.18913844227790833, "step": 223 }, { "epoch": 0.618570935450466, "grad_norm": 0.2559034824371338, "learning_rate": 4.834907085702909e-06, "log_odds_chosen": 0.39943575859069824, "log_odds_ratio": -0.516412615776062, "logits/chosen": 0.22049731016159058, "logits/rejected": -0.741055428981781, "logps/chosen": -1.5809171199798584, "logps/rejected": -1.9103319644927979, "loss": 1.7174, "nll_loss": 1.6657910346984863, "rewards/accuracies": 1.0, "rewards/chosen": -0.1580917090177536, "rewards/margins": 0.032941486686468124, "rewards/rejected": -0.19103318452835083, "step": 224 }, { "epoch": 0.6213324128408698, "grad_norm": 0.2551354169845581, "learning_rate": 4.832031033425663e-06, "log_odds_chosen": 0.45995259284973145, "log_odds_ratio": -0.4912871718406677, "logits/chosen": 0.14968952536582947, "logits/rejected": -1.1501446962356567, "logps/chosen": -1.5380923748016357, "logps/rejected": -1.914685845375061, "loss": 1.6661, "nll_loss": 1.6169726848602295, "rewards/accuracies": 1.0, "rewards/chosen": -0.15380924940109253, "rewards/margins": 0.03765934333205223, "rewards/rejected": -0.19146858155727386, "step": 225 }, { "epoch": 0.6240938902312737, "grad_norm": 0.2664376199245453, "learning_rate": 4.829131015921386e-06, "log_odds_chosen": 0.3153410255908966, "log_odds_ratio": -0.5497522950172424, "logits/chosen": 0.353019654750824, "logits/rejected": -0.7054316997528076, "logps/chosen": -1.708608865737915, "logps/rejected": -1.975217342376709, "loss": 1.8398, "nll_loss": 1.784870982170105, "rewards/accuracies": 1.0, "rewards/chosen": -0.1708608865737915, "rewards/margins": 0.02666083350777626, "rewards/rejected": -0.19752173125743866, "step": 226 }, { "epoch": 0.6268553676216776, "grad_norm": 0.25821202993392944, "learning_rate": 4.826207062992245e-06, "log_odds_chosen": 0.44117486476898193, "log_odds_ratio": -0.5039249062538147, "logits/chosen": 0.22692835330963135, "logits/rejected": -0.9342468976974487, "logps/chosen": -1.7786613702774048, "logps/rejected": -2.1593728065490723, "loss": 1.8966, "nll_loss": 1.846253752708435, "rewards/accuracies": 1.0, "rewards/chosen": -0.1778661459684372, "rewards/margins": 0.038071148097515106, "rewards/rejected": -0.2159372866153717, "step": 227 }, { "epoch": 0.6296168450120815, "grad_norm": 0.28877097368240356, "learning_rate": 4.82325920468638e-06, "log_odds_chosen": 0.2263374626636505, "log_odds_ratio": -0.5921541452407837, "logits/chosen": 0.26566582918167114, "logits/rejected": -0.8801894187927246, "logps/chosen": -1.7435269355773926, "logps/rejected": -1.9335991144180298, "loss": 1.8735, "nll_loss": 1.8142577409744263, "rewards/accuracies": 0.875, "rewards/chosen": -0.17435269057750702, "rewards/margins": 0.01900722086429596, "rewards/rejected": -0.19335991144180298, "step": 228 }, { "epoch": 0.6323783224024854, "grad_norm": 0.286994069814682, "learning_rate": 4.820287471297598e-06, "log_odds_chosen": 0.34056615829467773, "log_odds_ratio": -0.5455049872398376, "logits/chosen": 0.15769648551940918, "logits/rejected": -0.9189969897270203, "logps/chosen": -1.612159252166748, "logps/rejected": -1.8956931829452515, "loss": 1.7497, "nll_loss": 1.6951847076416016, "rewards/accuracies": 0.875, "rewards/chosen": -0.1612159162759781, "rewards/margins": 0.028353393077850342, "rewards/rejected": -0.18956929445266724, "step": 229 }, { "epoch": 0.6351397997928891, "grad_norm": 0.25925716757774353, "learning_rate": 4.817291893365055e-06, "log_odds_chosen": 0.4459936022758484, "log_odds_ratio": -0.50335294008255, "logits/chosen": 0.03054118901491165, "logits/rejected": -1.0507367849349976, "logps/chosen": -1.4849401712417603, "logps/rejected": -1.8426207304000854, "loss": 1.6416, "nll_loss": 1.591292142868042, "rewards/accuracies": 1.0, "rewards/chosen": -0.14849400520324707, "rewards/margins": 0.035768061876297, "rewards/rejected": -0.18426206707954407, "step": 230 }, { "epoch": 0.637901277183293, "grad_norm": 0.2912357449531555, "learning_rate": 4.81427250167295e-06, "log_odds_chosen": 0.20216652750968933, "log_odds_ratio": -0.6004579663276672, "logits/chosen": 0.10540774464607239, "logits/rejected": -1.1034070253372192, "logps/chosen": -1.6417537927627563, "logps/rejected": -1.8084138631820679, "loss": 1.7758, "nll_loss": 1.715754508972168, "rewards/accuracies": 0.75, "rewards/chosen": -0.1641753911972046, "rewards/margins": 0.01666601374745369, "rewards/rejected": -0.1808413863182068, "step": 231 }, { "epoch": 0.6406627545736969, "grad_norm": 0.28148511052131653, "learning_rate": 4.811229327250204e-06, "log_odds_chosen": 0.40823695063591003, "log_odds_ratio": -0.5194427967071533, "logits/chosen": 0.19764533638954163, "logits/rejected": -1.0543047189712524, "logps/chosen": -1.68423330783844, "logps/rejected": -2.0337371826171875, "loss": 1.8128, "nll_loss": 1.760822057723999, "rewards/accuracies": 0.875, "rewards/chosen": -0.1684233546257019, "rewards/margins": 0.034950368106365204, "rewards/rejected": -0.20337370038032532, "step": 232 }, { "epoch": 0.6434242319641008, "grad_norm": 0.26084744930267334, "learning_rate": 4.8081624013701435e-06, "log_odds_chosen": 0.38514161109924316, "log_odds_ratio": -0.5289287567138672, "logits/chosen": 0.16184748709201813, "logits/rejected": -1.2881039381027222, "logps/chosen": -1.6542613506317139, "logps/rejected": -1.9764081239700317, "loss": 1.7775, "nll_loss": 1.7246254682540894, "rewards/accuracies": 1.0, "rewards/chosen": -0.1654261350631714, "rewards/margins": 0.03221467137336731, "rewards/rejected": -0.1976408213376999, "step": 233 }, { "epoch": 0.6461857093545047, "grad_norm": 0.26910293102264404, "learning_rate": 4.805071755550177e-06, "log_odds_chosen": 0.37266138195991516, "log_odds_ratio": -0.5266379117965698, "logits/chosen": 0.2432277947664261, "logits/rejected": -1.2453206777572632, "logps/chosen": -1.676849603652954, "logps/rejected": -1.9903044700622559, "loss": 1.7904, "nll_loss": 1.7377511262893677, "rewards/accuracies": 1.0, "rewards/chosen": -0.16768495738506317, "rewards/margins": 0.031345486640930176, "rewards/rejected": -0.19903044402599335, "step": 234 }, { "epoch": 0.6489471867449085, "grad_norm": 0.26596587896347046, "learning_rate": 4.8019574215514705e-06, "log_odds_chosen": 0.320311963558197, "log_odds_ratio": -0.5477744340896606, "logits/chosen": 0.1768263727426529, "logits/rejected": -0.8458906412124634, "logps/chosen": -1.6462191343307495, "logps/rejected": -1.9113428592681885, "loss": 1.7877, "nll_loss": 1.7328777313232422, "rewards/accuracies": 1.0, "rewards/chosen": -0.16462190449237823, "rewards/margins": 0.026512393727898598, "rewards/rejected": -0.19113430380821228, "step": 235 }, { "epoch": 0.6517086641353124, "grad_norm": 0.23039527237415314, "learning_rate": 4.7988194313786275e-06, "log_odds_chosen": 0.3400653004646301, "log_odds_ratio": -0.5446557998657227, "logits/chosen": 0.17479516565799713, "logits/rejected": -0.9590482711791992, "logps/chosen": -1.5938714742660522, "logps/rejected": -1.8736517429351807, "loss": 1.706, "nll_loss": 1.6515262126922607, "rewards/accuracies": 1.0, "rewards/chosen": -0.15938714146614075, "rewards/margins": 0.027978049591183662, "rewards/rejected": -0.18736517429351807, "step": 236 }, { "epoch": 0.6544701415257163, "grad_norm": 0.25597652792930603, "learning_rate": 4.795657817279349e-06, "log_odds_chosen": 0.35750117897987366, "log_odds_ratio": -0.5432247519493103, "logits/chosen": 0.10542917996644974, "logits/rejected": -1.0094666481018066, "logps/chosen": -1.5345871448516846, "logps/rejected": -1.8278987407684326, "loss": 1.6735, "nll_loss": 1.619143009185791, "rewards/accuracies": 0.875, "rewards/chosen": -0.15345871448516846, "rewards/margins": 0.02933114767074585, "rewards/rejected": -0.1827898770570755, "step": 237 }, { "epoch": 0.6572316189161201, "grad_norm": 0.27119508385658264, "learning_rate": 4.7924726117441135e-06, "log_odds_chosen": 0.4193371534347534, "log_odds_ratio": -0.5230008363723755, "logits/chosen": 0.08888234198093414, "logits/rejected": -1.0837329626083374, "logps/chosen": -1.6472194194793701, "logps/rejected": -1.9990077018737793, "loss": 1.7734, "nll_loss": 1.7211425304412842, "rewards/accuracies": 0.875, "rewards/chosen": -0.16472195088863373, "rewards/margins": 0.035178832709789276, "rewards/rejected": -0.1999007910490036, "step": 238 }, { "epoch": 0.659993096306524, "grad_norm": 0.26720085740089417, "learning_rate": 4.789263847505835e-06, "log_odds_chosen": 0.4661468267440796, "log_odds_ratio": -0.49137797951698303, "logits/chosen": 0.14774960279464722, "logits/rejected": -0.9653093814849854, "logps/chosen": -1.634422779083252, "logps/rejected": -2.0227272510528564, "loss": 1.7557, "nll_loss": 1.706547737121582, "rewards/accuracies": 1.0, "rewards/chosen": -0.16344228386878967, "rewards/margins": 0.038830459117889404, "rewards/rejected": -0.20227274298667908, "step": 239 }, { "epoch": 0.6627545736969278, "grad_norm": 0.26344889402389526, "learning_rate": 4.786031557539532e-06, "log_odds_chosen": 0.620901882648468, "log_odds_ratio": -0.43870899081230164, "logits/chosen": 0.1005333811044693, "logits/rejected": -1.179240345954895, "logps/chosen": -1.5813204050064087, "logps/rejected": -2.10603666305542, "loss": 1.7039, "nll_loss": 1.66001558303833, "rewards/accuracies": 1.0, "rewards/chosen": -0.15813204646110535, "rewards/margins": 0.05247163027524948, "rewards/rejected": -0.21060366928577423, "step": 240 }, { "epoch": 0.6655160510873317, "grad_norm": 0.2564525306224823, "learning_rate": 4.782775775061983e-06, "log_odds_chosen": 0.3930521011352539, "log_odds_ratio": -0.5189710855484009, "logits/chosen": 0.17282657325267792, "logits/rejected": -0.9231711626052856, "logps/chosen": -1.5853371620178223, "logps/rejected": -1.9094042778015137, "loss": 1.704, "nll_loss": 1.6520962715148926, "rewards/accuracies": 1.0, "rewards/chosen": -0.1585337519645691, "rewards/margins": 0.03240669518709183, "rewards/rejected": -0.19094042479991913, "step": 241 }, { "epoch": 0.6682775284777356, "grad_norm": 0.23007996380329132, "learning_rate": 4.779496533531393e-06, "log_odds_chosen": 0.49815472960472107, "log_odds_ratio": -0.47664713859558105, "logits/chosen": 0.16657163202762604, "logits/rejected": -1.0556104183197021, "logps/chosen": -1.5395145416259766, "logps/rejected": -1.9480966329574585, "loss": 1.6541, "nll_loss": 1.6063947677612305, "rewards/accuracies": 1.0, "rewards/chosen": -0.15395145118236542, "rewards/margins": 0.04085820913314819, "rewards/rejected": -0.19480964541435242, "step": 242 }, { "epoch": 0.6710390058681395, "grad_norm": 0.2536675035953522, "learning_rate": 4.7761938666470405e-06, "log_odds_chosen": 0.3562057316303253, "log_odds_ratio": -0.5365646481513977, "logits/chosen": 0.18961578607559204, "logits/rejected": -1.129898190498352, "logps/chosen": -1.6646479368209839, "logps/rejected": -1.964529037475586, "loss": 1.7902, "nll_loss": 1.736580729484558, "rewards/accuracies": 1.0, "rewards/chosen": -0.16646480560302734, "rewards/margins": 0.029988115653395653, "rewards/rejected": -0.19645293056964874, "step": 243 }, { "epoch": 0.6738004832585434, "grad_norm": 0.241102397441864, "learning_rate": 4.7728678083489375e-06, "log_odds_chosen": 0.287707656621933, "log_odds_ratio": -0.5653382539749146, "logits/chosen": 0.18390116095542908, "logits/rejected": -1.0134061574935913, "logps/chosen": -1.5824017524719238, "logps/rejected": -1.8214263916015625, "loss": 1.707, "nll_loss": 1.6505118608474731, "rewards/accuracies": 0.875, "rewards/chosen": -0.1582401841878891, "rewards/margins": 0.02390245348215103, "rewards/rejected": -0.18214263021945953, "step": 244 }, { "epoch": 0.6765619606489471, "grad_norm": 0.24698680639266968, "learning_rate": 4.7695183928174804e-06, "log_odds_chosen": 0.426491916179657, "log_odds_ratio": -0.5090312361717224, "logits/chosen": 0.09436272829771042, "logits/rejected": -1.081894040107727, "logps/chosen": -1.614341378211975, "logps/rejected": -1.9695953130722046, "loss": 1.7422, "nll_loss": 1.6912728548049927, "rewards/accuracies": 0.875, "rewards/chosen": -0.161434143781662, "rewards/margins": 0.03552539646625519, "rewards/rejected": -0.19695955514907837, "step": 245 }, { "epoch": 0.679323438039351, "grad_norm": 0.25812989473342896, "learning_rate": 4.766145654473096e-06, "log_odds_chosen": 0.33238697052001953, "log_odds_ratio": -0.5477334260940552, "logits/chosen": 0.17383219301700592, "logits/rejected": -1.1996134519577026, "logps/chosen": -1.6853795051574707, "logps/rejected": -1.9657583236694336, "loss": 1.797, "nll_loss": 1.7422523498535156, "rewards/accuracies": 0.875, "rewards/chosen": -0.1685379445552826, "rewards/margins": 0.028037890791893005, "rewards/rejected": -0.1965758353471756, "step": 246 }, { "epoch": 0.6820849154297549, "grad_norm": 0.25600266456604004, "learning_rate": 4.762749627975888e-06, "log_odds_chosen": 0.3794538974761963, "log_odds_ratio": -0.5263408422470093, "logits/chosen": 0.07988805323839188, "logits/rejected": -1.091023325920105, "logps/chosen": -1.5263158082962036, "logps/rejected": -1.8329474925994873, "loss": 1.6541, "nll_loss": 1.6014518737792969, "rewards/accuracies": 1.0, "rewards/chosen": -0.15263158082962036, "rewards/margins": 0.030663155019283295, "rewards/rejected": -0.18329472839832306, "step": 247 }, { "epoch": 0.6848463928201588, "grad_norm": 0.24254651367664337, "learning_rate": 4.7593303482252835e-06, "log_odds_chosen": 0.34609463810920715, "log_odds_ratio": -0.541566014289856, "logits/chosen": 0.14708423614501953, "logits/rejected": -0.7518667578697205, "logps/chosen": -1.6179258823394775, "logps/rejected": -1.905765414237976, "loss": 1.7327, "nll_loss": 1.6784985065460205, "rewards/accuracies": 1.0, "rewards/chosen": -0.1617926061153412, "rewards/margins": 0.02878393605351448, "rewards/rejected": -0.19057652354240417, "step": 248 }, { "epoch": 0.6876078702105627, "grad_norm": 0.22665560245513916, "learning_rate": 4.755887850359673e-06, "log_odds_chosen": 0.5504661798477173, "log_odds_ratio": -0.4719165861606598, "logits/chosen": 0.09460557997226715, "logits/rejected": -1.225687861442566, "logps/chosen": -1.4793003797531128, "logps/rejected": -1.9250984191894531, "loss": 1.6118, "nll_loss": 1.5646475553512573, "rewards/accuracies": 0.875, "rewards/chosen": -0.1479300558567047, "rewards/margins": 0.04457978904247284, "rewards/rejected": -0.19250985980033875, "step": 249 }, { "epoch": 0.6903693476009665, "grad_norm": 0.2510414719581604, "learning_rate": 4.752422169756048e-06, "log_odds_chosen": 0.3988419771194458, "log_odds_ratio": -0.5172514915466309, "logits/chosen": 0.16052858531475067, "logits/rejected": -1.2513781785964966, "logps/chosen": -1.6576679944992065, "logps/rejected": -1.9923875331878662, "loss": 1.7851, "nll_loss": 1.7334227561950684, "rewards/accuracies": 1.0, "rewards/chosen": -0.16576679050922394, "rewards/margins": 0.033471960574388504, "rewards/rejected": -0.19923876225948334, "step": 250 }, { "epoch": 0.6931308249913704, "grad_norm": 0.2423536330461502, "learning_rate": 4.748933342029639e-06, "log_odds_chosen": 0.5282813310623169, "log_odds_ratio": -0.4706994891166687, "logits/chosen": 0.10225804150104523, "logits/rejected": -1.2281630039215088, "logps/chosen": -1.5280183553695679, "logps/rejected": -1.9655184745788574, "loss": 1.6654, "nll_loss": 1.6183708906173706, "rewards/accuracies": 1.0, "rewards/chosen": -0.15280182659626007, "rewards/margins": 0.04375001788139343, "rewards/rejected": -0.1965518444776535, "step": 251 }, { "epoch": 0.6958923023817742, "grad_norm": 0.23662064969539642, "learning_rate": 4.745421403033548e-06, "log_odds_chosen": 0.36522558331489563, "log_odds_ratio": -0.5290880799293518, "logits/chosen": 0.10924719274044037, "logits/rejected": -1.0550241470336914, "logps/chosen": -1.6013975143432617, "logps/rejected": -1.901705026626587, "loss": 1.7128, "nll_loss": 1.6598960161209106, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601397693157196, "rewards/margins": 0.0300307534635067, "rewards/rejected": -0.1901705265045166, "step": 252 }, { "epoch": 0.6986537797721781, "grad_norm": 0.2725629508495331, "learning_rate": 4.741886388858384e-06, "log_odds_chosen": 0.31865060329437256, "log_odds_ratio": -0.5531626343727112, "logits/chosen": 0.15934640169143677, "logits/rejected": -1.1095128059387207, "logps/chosen": -1.6118751764297485, "logps/rejected": -1.871192455291748, "loss": 1.7355, "nll_loss": 1.6801382303237915, "rewards/accuracies": 1.0, "rewards/chosen": -0.1611875295639038, "rewards/margins": 0.02593172900378704, "rewards/rejected": -0.187119260430336, "step": 253 }, { "epoch": 0.701415257162582, "grad_norm": 0.2601236402988434, "learning_rate": 4.738328335831883e-06, "log_odds_chosen": 0.3496936559677124, "log_odds_ratio": -0.5388615131378174, "logits/chosen": 0.12353205680847168, "logits/rejected": -1.3011374473571777, "logps/chosen": -1.5815935134887695, "logps/rejected": -1.8707093000411987, "loss": 1.7024, "nll_loss": 1.6484651565551758, "rewards/accuracies": 1.0, "rewards/chosen": -0.15815936028957367, "rewards/margins": 0.028911583125591278, "rewards/rejected": -0.18707093596458435, "step": 254 }, { "epoch": 0.7041767345529858, "grad_norm": 0.2376219779253006, "learning_rate": 4.734747280518549e-06, "log_odds_chosen": 0.4198550879955292, "log_odds_ratio": -0.5078388452529907, "logits/chosen": 0.030534474179148674, "logits/rejected": -1.399457573890686, "logps/chosen": -1.6249712705612183, "logps/rejected": -1.9735503196716309, "loss": 1.7447, "nll_loss": 1.693869948387146, "rewards/accuracies": 1.0, "rewards/chosen": -0.1624971330165863, "rewards/margins": 0.034857891499996185, "rewards/rejected": -0.19735503196716309, "step": 255 }, { "epoch": 0.7069382119433897, "grad_norm": 0.23082926869392395, "learning_rate": 4.7311432597192655e-06, "log_odds_chosen": 0.368667870759964, "log_odds_ratio": -0.5342724323272705, "logits/chosen": 0.03557804599404335, "logits/rejected": -1.3495283126831055, "logps/chosen": -1.6255284547805786, "logps/rejected": -1.9313668012619019, "loss": 1.7347, "nll_loss": 1.6812456846237183, "rewards/accuracies": 0.875, "rewards/chosen": -0.1625528484582901, "rewards/margins": 0.030583834275603294, "rewards/rejected": -0.19313669204711914, "step": 256 }, { "epoch": 0.7096996893337936, "grad_norm": 0.2494223415851593, "learning_rate": 4.72751631047092e-06, "log_odds_chosen": 0.5292459726333618, "log_odds_ratio": -0.4693402051925659, "logits/chosen": -0.008031047880649567, "logits/rejected": -1.0098199844360352, "logps/chosen": -1.6526001691818237, "logps/rejected": -2.0972676277160645, "loss": 1.7807, "nll_loss": 1.7337857484817505, "rewards/accuracies": 1.0, "rewards/chosen": -0.16526003181934357, "rewards/margins": 0.044466737657785416, "rewards/rejected": -0.2097267508506775, "step": 257 }, { "epoch": 0.7124611667241975, "grad_norm": 0.24077369272708893, "learning_rate": 4.72386647004603e-06, "log_odds_chosen": 0.37760159373283386, "log_odds_ratio": -0.5266671180725098, "logits/chosen": 0.1133025735616684, "logits/rejected": -1.0056575536727905, "logps/chosen": -1.6364243030548096, "logps/rejected": -1.9491901397705078, "loss": 1.7621, "nll_loss": 1.7094427347183228, "rewards/accuracies": 1.0, "rewards/chosen": -0.16364243626594543, "rewards/margins": 0.03127656877040863, "rewards/rejected": -0.19491900503635406, "step": 258 }, { "epoch": 0.7152226441146013, "grad_norm": 0.24284473061561584, "learning_rate": 4.720193775952352e-06, "log_odds_chosen": 0.2508072853088379, "log_odds_ratio": -0.5792805552482605, "logits/chosen": 0.11287318915128708, "logits/rejected": -1.0705785751342773, "logps/chosen": -1.6278916597366333, "logps/rejected": -1.833707332611084, "loss": 1.7407, "nll_loss": 1.6827445030212402, "rewards/accuracies": 1.0, "rewards/chosen": -0.16278916597366333, "rewards/margins": 0.02058156207203865, "rewards/rejected": -0.18337073922157288, "step": 259 }, { "epoch": 0.7179841215050051, "grad_norm": 0.2317313551902771, "learning_rate": 4.716498265932501e-06, "log_odds_chosen": 0.5105437636375427, "log_odds_ratio": -0.4731099009513855, "logits/chosen": -0.00561100710183382, "logits/rejected": -1.0285159349441528, "logps/chosen": -1.4508588314056396, "logps/rejected": -1.8631662130355835, "loss": 1.5697, "nll_loss": 1.5224125385284424, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450859010219574, "rewards/margins": 0.04123072326183319, "rewards/rejected": -0.1863166093826294, "step": 260 }, { "epoch": 0.720745598895409, "grad_norm": 0.25082987546920776, "learning_rate": 4.712779977963559e-06, "log_odds_chosen": 0.3830725848674774, "log_odds_ratio": -0.5252867937088013, "logits/chosen": 0.09645551443099976, "logits/rejected": -0.733791172504425, "logps/chosen": -1.56308114528656, "logps/rejected": -1.8771851062774658, "loss": 1.695, "nll_loss": 1.642515778541565, "rewards/accuracies": 0.875, "rewards/chosen": -0.156308114528656, "rewards/margins": 0.031410399824380875, "rewards/rejected": -0.18771851062774658, "step": 261 }, { "epoch": 0.7235070762858129, "grad_norm": 0.22368477284908295, "learning_rate": 4.7090389502566884e-06, "log_odds_chosen": 0.48179319500923157, "log_odds_ratio": -0.4859163165092468, "logits/chosen": 0.042292360216379166, "logits/rejected": -1.0789254903793335, "logps/chosen": -1.5697331428527832, "logps/rejected": -1.9673210382461548, "loss": 1.6673, "nll_loss": 1.61872136592865, "rewards/accuracies": 1.0, "rewards/chosen": -0.15697331726551056, "rewards/margins": 0.039758771657943726, "rewards/rejected": -0.19673210382461548, "step": 262 }, { "epoch": 0.7262685536762168, "grad_norm": 0.22603513300418854, "learning_rate": 4.705275221256738e-06, "log_odds_chosen": 0.3837694227695465, "log_odds_ratio": -0.5233887434005737, "logits/chosen": 0.11829128116369247, "logits/rejected": -1.0814515352249146, "logps/chosen": -1.612618088722229, "logps/rejected": -1.9323992729187012, "loss": 1.7336, "nll_loss": 1.6812975406646729, "rewards/accuracies": 0.875, "rewards/chosen": -0.16126182675361633, "rewards/margins": 0.03197810798883438, "rewards/rejected": -0.1932399421930313, "step": 263 }, { "epoch": 0.7290300310666207, "grad_norm": 0.23297946155071259, "learning_rate": 4.701488829641845e-06, "log_odds_chosen": 0.3633509874343872, "log_odds_ratio": -0.5302541255950928, "logits/chosen": 0.08365479856729507, "logits/rejected": -1.0365325212478638, "logps/chosen": -1.5728099346160889, "logps/rejected": -1.870044231414795, "loss": 1.6965, "nll_loss": 1.6434751749038696, "rewards/accuracies": 1.0, "rewards/chosen": -0.15728099644184113, "rewards/margins": 0.02972342073917389, "rewards/rejected": -0.18700441718101501, "step": 264 }, { "epoch": 0.7317915084570245, "grad_norm": 0.215961292386055, "learning_rate": 4.697679814323044e-06, "log_odds_chosen": 0.35131096839904785, "log_odds_ratio": -0.540593147277832, "logits/chosen": 0.052778296172618866, "logits/rejected": -1.2138348817825317, "logps/chosen": -1.6011123657226562, "logps/rejected": -1.8878912925720215, "loss": 1.7054, "nll_loss": 1.6513090133666992, "rewards/accuracies": 1.0, "rewards/chosen": -0.16011124849319458, "rewards/margins": 0.028677886351943016, "rewards/rejected": -0.18878912925720215, "step": 265 }, { "epoch": 0.7345529858474283, "grad_norm": 0.2192918211221695, "learning_rate": 4.693848214443858e-06, "log_odds_chosen": 0.3489964008331299, "log_odds_ratio": -0.538882315158844, "logits/chosen": 0.0374111570417881, "logits/rejected": -1.4135199785232544, "logps/chosen": -1.66756010055542, "logps/rejected": -1.9600346088409424, "loss": 1.7678, "nll_loss": 1.713918924331665, "rewards/accuracies": 0.875, "rewards/chosen": -0.1667560189962387, "rewards/margins": 0.029247449710965157, "rewards/rejected": -0.19600346684455872, "step": 266 }, { "epoch": 0.7373144632378322, "grad_norm": 0.23613475263118744, "learning_rate": 4.689994069379905e-06, "log_odds_chosen": 0.6149622797966003, "log_odds_ratio": -0.43481168150901794, "logits/chosen": 0.08960134536027908, "logits/rejected": -1.5427201986312866, "logps/chosen": -1.5492388010025024, "logps/rejected": -2.061069965362549, "loss": 1.6539, "nll_loss": 1.6104521751403809, "rewards/accuracies": 1.0, "rewards/chosen": -0.15492388606071472, "rewards/margins": 0.05118309706449509, "rewards/rejected": -0.2061069756746292, "step": 267 }, { "epoch": 0.7400759406282361, "grad_norm": 0.22785541415214539, "learning_rate": 4.686117418738489e-06, "log_odds_chosen": 0.46340838074684143, "log_odds_ratio": -0.4908517003059387, "logits/chosen": 0.09018822759389877, "logits/rejected": -1.188814640045166, "logps/chosen": -1.607946753501892, "logps/rejected": -1.9937546253204346, "loss": 1.7214, "nll_loss": 1.6723337173461914, "rewards/accuracies": 1.0, "rewards/chosen": -0.1607946753501892, "rewards/margins": 0.03858078643679619, "rewards/rejected": -0.1993754655122757, "step": 268 }, { "epoch": 0.74283741801864, "grad_norm": 0.20759011805057526, "learning_rate": 4.6822183023581945e-06, "log_odds_chosen": 0.39548274874687195, "log_odds_ratio": -0.5299872756004333, "logits/chosen": 0.08991736173629761, "logits/rejected": -1.5129450559616089, "logps/chosen": -1.5805068016052246, "logps/rejected": -1.9086334705352783, "loss": 1.705, "nll_loss": 1.6519887447357178, "rewards/accuracies": 0.875, "rewards/chosen": -0.15805068612098694, "rewards/margins": 0.03281266242265701, "rewards/rejected": -0.19086334109306335, "step": 269 }, { "epoch": 0.7455988954090439, "grad_norm": 0.21977882087230682, "learning_rate": 4.678296760308474e-06, "log_odds_chosen": 0.2966475486755371, "log_odds_ratio": -0.5605870485305786, "logits/chosen": -0.026812348514795303, "logits/rejected": -1.3540985584259033, "logps/chosen": -1.5566442012786865, "logps/rejected": -1.7983969449996948, "loss": 1.6709, "nll_loss": 1.6148244142532349, "rewards/accuracies": 1.0, "rewards/chosen": -0.15566441416740417, "rewards/margins": 0.024175278842449188, "rewards/rejected": -0.17983968555927277, "step": 270 }, { "epoch": 0.7483603727994477, "grad_norm": 0.24105672538280487, "learning_rate": 4.674352832889239e-06, "log_odds_chosen": 0.5675957798957825, "log_odds_ratio": -0.4524817168712616, "logits/chosen": 0.017950108274817467, "logits/rejected": -1.1157424449920654, "logps/chosen": -1.617455005645752, "logps/rejected": -2.094006299972534, "loss": 1.7444, "nll_loss": 1.6991225481033325, "rewards/accuracies": 1.0, "rewards/chosen": -0.16174551844596863, "rewards/margins": 0.0476551279425621, "rewards/rejected": -0.20940065383911133, "step": 271 }, { "epoch": 0.7511218501898516, "grad_norm": 0.20193441212177277, "learning_rate": 4.670386560630446e-06, "log_odds_chosen": 0.4167816638946533, "log_odds_ratio": -0.5149449110031128, "logits/chosen": -0.06302893161773682, "logits/rejected": -1.2852458953857422, "logps/chosen": -1.538407325744629, "logps/rejected": -1.8780553340911865, "loss": 1.6523, "nll_loss": 1.600759744644165, "rewards/accuracies": 0.875, "rewards/chosen": -0.15384072065353394, "rewards/margins": 0.03396480157971382, "rewards/rejected": -0.18780553340911865, "step": 272 }, { "epoch": 0.7538833275802554, "grad_norm": 0.2135000228881836, "learning_rate": 4.66639798429168e-06, "log_odds_chosen": 0.5502100586891174, "log_odds_ratio": -0.45903506875038147, "logits/chosen": 0.02270820550620556, "logits/rejected": -1.4955902099609375, "logps/chosen": -1.6026809215545654, "logps/rejected": -2.0662648677825928, "loss": 1.6999, "nll_loss": 1.6539511680603027, "rewards/accuracies": 1.0, "rewards/chosen": -0.1602681279182434, "rewards/margins": 0.0463583879172802, "rewards/rejected": -0.2066265046596527, "step": 273 }, { "epoch": 0.7566448049706593, "grad_norm": 0.23999927937984467, "learning_rate": 4.6623871448617345e-06, "log_odds_chosen": 0.3460523784160614, "log_odds_ratio": -0.5406045913696289, "logits/chosen": -0.06589086353778839, "logits/rejected": -1.3238555192947388, "logps/chosen": -1.5511431694030762, "logps/rejected": -1.8326784372329712, "loss": 1.6633, "nll_loss": 1.6092641353607178, "rewards/accuracies": 0.875, "rewards/chosen": -0.1551143229007721, "rewards/margins": 0.028153544291853905, "rewards/rejected": -0.18326784670352936, "step": 274 }, { "epoch": 0.7594062823610632, "grad_norm": 0.2286817878484726, "learning_rate": 4.6583540835581885e-06, "log_odds_chosen": 0.4262576103210449, "log_odds_ratio": -0.5114239454269409, "logits/chosen": -0.04603245109319687, "logits/rejected": -1.176690936088562, "logps/chosen": -1.551270842552185, "logps/rejected": -1.902989387512207, "loss": 1.6683, "nll_loss": 1.6171845197677612, "rewards/accuracies": 1.0, "rewards/chosen": -0.15512707829475403, "rewards/margins": 0.03517187386751175, "rewards/rejected": -0.19029895961284637, "step": 275 }, { "epoch": 0.762167759751467, "grad_norm": 0.20076729357242584, "learning_rate": 4.654298841826988e-06, "log_odds_chosen": 0.35074901580810547, "log_odds_ratio": -0.5370126962661743, "logits/chosen": -0.04673296585679054, "logits/rejected": -1.0621964931488037, "logps/chosen": -1.4891098737716675, "logps/rejected": -1.7715747356414795, "loss": 1.5999, "nll_loss": 1.5461980104446411, "rewards/accuracies": 1.0, "rewards/chosen": -0.1489109843969345, "rewards/margins": 0.02824649028480053, "rewards/rejected": -0.1771574765443802, "step": 276 }, { "epoch": 0.7649292371418709, "grad_norm": 0.2110532820224762, "learning_rate": 4.6502214613420164e-06, "log_odds_chosen": 0.6417544484138489, "log_odds_ratio": -0.4257214069366455, "logits/chosen": -0.04618730768561363, "logits/rejected": -1.1318330764770508, "logps/chosen": -1.457794189453125, "logps/rejected": -1.9827320575714111, "loss": 1.5723, "nll_loss": 1.5297592878341675, "rewards/accuracies": 1.0, "rewards/chosen": -0.14577943086624146, "rewards/margins": 0.052493780851364136, "rewards/rejected": -0.1982732117176056, "step": 277 }, { "epoch": 0.7676907145322748, "grad_norm": 0.23329485952854156, "learning_rate": 4.646121984004666e-06, "log_odds_chosen": 0.5122417211532593, "log_odds_ratio": -0.47840094566345215, "logits/chosen": -0.014363419264554977, "logits/rejected": -1.0884288549423218, "logps/chosen": -1.582539677619934, "logps/rejected": -2.0093653202056885, "loss": 1.7056, "nll_loss": 1.6577521562576294, "rewards/accuracies": 1.0, "rewards/chosen": -0.15825395286083221, "rewards/margins": 0.042682573199272156, "rewards/rejected": -0.20093654096126556, "step": 278 }, { "epoch": 0.7704521919226787, "grad_norm": 0.21924971044063568, "learning_rate": 4.642000451943409e-06, "log_odds_chosen": 0.4158315062522888, "log_odds_ratio": -0.5117031335830688, "logits/chosen": 0.08326566219329834, "logits/rejected": -1.0792198181152344, "logps/chosen": -1.6467266082763672, "logps/rejected": -1.9971718788146973, "loss": 1.7532, "nll_loss": 1.7020772695541382, "rewards/accuracies": 1.0, "rewards/chosen": -0.16467267274856567, "rewards/margins": 0.03504452481865883, "rewards/rejected": -0.199717178940773, "step": 279 }, { "epoch": 0.7732136693130826, "grad_norm": 0.2058788388967514, "learning_rate": 4.637856907513366e-06, "log_odds_chosen": 0.5298818945884705, "log_odds_ratio": -0.4738863706588745, "logits/chosen": -0.052150044590234756, "logits/rejected": -1.148370623588562, "logps/chosen": -1.6041690111160278, "logps/rejected": -2.045445680618286, "loss": 1.697, "nll_loss": 1.6495710611343384, "rewards/accuracies": 1.0, "rewards/chosen": -0.16041693091392517, "rewards/margins": 0.04412766546010971, "rewards/rejected": -0.2045445740222931, "step": 280 }, { "epoch": 0.7759751467034863, "grad_norm": 0.19594821333885193, "learning_rate": 4.633691393295865e-06, "log_odds_chosen": 0.33417803049087524, "log_odds_ratio": -0.5438077449798584, "logits/chosen": -0.02928408607840538, "logits/rejected": -1.2465791702270508, "logps/chosen": -1.6147441864013672, "logps/rejected": -1.8932818174362183, "loss": 1.7171, "nll_loss": 1.6626923084259033, "rewards/accuracies": 0.875, "rewards/chosen": -0.16147442162036896, "rewards/margins": 0.027853764593601227, "rewards/rejected": -0.18932819366455078, "step": 281 }, { "epoch": 0.7787366240938902, "grad_norm": 0.2078380137681961, "learning_rate": 4.629503952098011e-06, "log_odds_chosen": 0.6047598123550415, "log_odds_ratio": -0.4436852037906647, "logits/chosen": -0.0367959700524807, "logits/rejected": -1.4540506601333618, "logps/chosen": -1.5447849035263062, "logps/rejected": -2.0499391555786133, "loss": 1.6528, "nll_loss": 1.6084190607070923, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544785052537918, "rewards/margins": 0.050515398383140564, "rewards/rejected": -0.20499390363693237, "step": 282 }, { "epoch": 0.7814981014842941, "grad_norm": 0.20601390302181244, "learning_rate": 4.6252946269522406e-06, "log_odds_chosen": 0.38651254773139954, "log_odds_ratio": -0.5361747741699219, "logits/chosen": -0.06736142188310623, "logits/rejected": -1.498019814491272, "logps/chosen": -1.5710289478302002, "logps/rejected": -1.8959977626800537, "loss": 1.6776, "nll_loss": 1.6239646673202515, "rewards/accuracies": 0.75, "rewards/chosen": -0.15710289776325226, "rewards/margins": 0.03249688073992729, "rewards/rejected": -0.18959978222846985, "step": 283 }, { "epoch": 0.784259578874698, "grad_norm": 0.20410317182540894, "learning_rate": 4.621063461115882e-06, "log_odds_chosen": 0.36583784222602844, "log_odds_ratio": -0.5439432263374329, "logits/chosen": -0.001009071245789528, "logits/rejected": -1.3512407541275024, "logps/chosen": -1.6412224769592285, "logps/rejected": -1.9503740072250366, "loss": 1.7443, "nll_loss": 1.6899352073669434, "rewards/accuracies": 0.875, "rewards/chosen": -0.16412223875522614, "rewards/margins": 0.030915159732103348, "rewards/rejected": -0.19503742456436157, "step": 284 }, { "epoch": 0.7870210562651019, "grad_norm": 0.2131362408399582, "learning_rate": 4.6168104980707105e-06, "log_odds_chosen": 0.44328027963638306, "log_odds_ratio": -0.49936914443969727, "logits/chosen": -0.058265671133995056, "logits/rejected": -1.3667389154434204, "logps/chosen": -1.5707849264144897, "logps/rejected": -1.9372496604919434, "loss": 1.6771, "nll_loss": 1.6271687746047974, "rewards/accuracies": 1.0, "rewards/chosen": -0.15707850456237793, "rewards/margins": 0.03664647787809372, "rewards/rejected": -0.19372497498989105, "step": 285 }, { "epoch": 0.7897825336555057, "grad_norm": 0.1902783066034317, "learning_rate": 4.612535781522504e-06, "log_odds_chosen": 0.34869980812072754, "log_odds_ratio": -0.5359938144683838, "logits/chosen": -0.04153807833790779, "logits/rejected": -1.3222988843917847, "logps/chosen": -1.537316918373108, "logps/rejected": -1.8218798637390137, "loss": 1.6401, "nll_loss": 1.5864717960357666, "rewards/accuracies": 1.0, "rewards/chosen": -0.15373168885707855, "rewards/margins": 0.02845628745853901, "rewards/rejected": -0.1821879893541336, "step": 286 }, { "epoch": 0.7925440110459095, "grad_norm": 0.20296384394168854, "learning_rate": 4.6082393554005855e-06, "log_odds_chosen": 0.5364274978637695, "log_odds_ratio": -0.4683885872364044, "logits/chosen": 0.05323365330696106, "logits/rejected": -1.2694860696792603, "logps/chosen": -1.5045408010482788, "logps/rejected": -1.949967384338379, "loss": 1.6241, "nll_loss": 1.5772613286972046, "rewards/accuracies": 0.875, "rewards/chosen": -0.1504540890455246, "rewards/margins": 0.04454266279935837, "rewards/rejected": -0.19499675929546356, "step": 287 }, { "epoch": 0.7953054884363134, "grad_norm": 0.2286815494298935, "learning_rate": 4.6039212638573835e-06, "log_odds_chosen": 0.2805465757846832, "log_odds_ratio": -0.5681173205375671, "logits/chosen": -0.09262797981500626, "logits/rejected": -1.4461908340454102, "logps/chosen": -1.680091381072998, "logps/rejected": -1.9138922691345215, "loss": 1.7686, "nll_loss": 1.7117558717727661, "rewards/accuracies": 0.875, "rewards/chosen": -0.16800916194915771, "rewards/margins": 0.02338009513914585, "rewards/rejected": -0.19138923287391663, "step": 288 }, { "epoch": 0.7980669658267173, "grad_norm": 0.21770860254764557, "learning_rate": 4.599581551267969e-06, "log_odds_chosen": 0.5229898691177368, "log_odds_ratio": -0.47206243872642517, "logits/chosen": 0.0574706494808197, "logits/rejected": -1.1304949522018433, "logps/chosen": -1.6498278379440308, "logps/rejected": -2.090548038482666, "loss": 1.7491, "nll_loss": 1.701880931854248, "rewards/accuracies": 1.0, "rewards/chosen": -0.16498279571533203, "rewards/margins": 0.04407202824950218, "rewards/rejected": -0.20905481278896332, "step": 289 }, { "epoch": 0.8008284432171212, "grad_norm": 0.2001868039369583, "learning_rate": 4.5952202622296015e-06, "log_odds_chosen": 0.33771640062332153, "log_odds_ratio": -0.5422191619873047, "logits/chosen": -0.08145216107368469, "logits/rejected": -1.286673903465271, "logps/chosen": -1.5776478052139282, "logps/rejected": -1.857193946838379, "loss": 1.6829, "nll_loss": 1.6286838054656982, "rewards/accuracies": 1.0, "rewards/chosen": -0.15776477754116058, "rewards/margins": 0.027954626828432083, "rewards/rejected": -0.18571940064430237, "step": 290 }, { "epoch": 0.803589920607525, "grad_norm": 0.19392110407352448, "learning_rate": 4.590837441561277e-06, "log_odds_chosen": 0.4482704699039459, "log_odds_ratio": -0.504833459854126, "logits/chosen": -0.05314670130610466, "logits/rejected": -1.3596844673156738, "logps/chosen": -1.596375584602356, "logps/rejected": -1.9696097373962402, "loss": 1.6868, "nll_loss": 1.6362988948822021, "rewards/accuracies": 1.0, "rewards/chosen": -0.15963755548000336, "rewards/margins": 0.037323422729969025, "rewards/rejected": -0.19696098566055298, "step": 291 }, { "epoch": 0.8063513979979289, "grad_norm": 0.18595081567764282, "learning_rate": 4.586433134303257e-06, "log_odds_chosen": 0.5394923090934753, "log_odds_ratio": -0.4622240662574768, "logits/chosen": -0.05867187678813934, "logits/rejected": -1.1816052198410034, "logps/chosen": -1.4675815105438232, "logps/rejected": -1.904748797416687, "loss": 1.5911, "nll_loss": 1.5448615550994873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14675816893577576, "rewards/margins": 0.04371672868728638, "rewards/rejected": -0.19047488272190094, "step": 292 }, { "epoch": 0.8091128753883328, "grad_norm": 0.2025194615125656, "learning_rate": 4.582007385716614e-06, "log_odds_chosen": 0.3834781348705292, "log_odds_ratio": -0.5252506732940674, "logits/chosen": -0.03208540380001068, "logits/rejected": -1.2696198225021362, "logps/chosen": -1.5880941152572632, "logps/rejected": -1.9049972295761108, "loss": 1.7106, "nll_loss": 1.6580520868301392, "rewards/accuracies": 1.0, "rewards/chosen": -0.15880942344665527, "rewards/margins": 0.0316903218626976, "rewards/rejected": -0.19049973785877228, "step": 293 }, { "epoch": 0.8118743527787367, "grad_norm": 0.21143269538879395, "learning_rate": 4.57756024128276e-06, "log_odds_chosen": 0.5385761260986328, "log_odds_ratio": -0.4631249010562897, "logits/chosen": -0.12074042856693268, "logits/rejected": -1.2522529363632202, "logps/chosen": -1.5002479553222656, "logps/rejected": -1.9420486688613892, "loss": 1.6132, "nll_loss": 1.566838026046753, "rewards/accuracies": 1.0, "rewards/chosen": -0.15002478659152985, "rewards/margins": 0.044180065393447876, "rewards/rejected": -0.19420485198497772, "step": 294 }, { "epoch": 0.8146358301691405, "grad_norm": 0.1943599432706833, "learning_rate": 4.573091746702988e-06, "log_odds_chosen": 0.5673821568489075, "log_odds_ratio": -0.45537808537483215, "logits/chosen": -0.1322491317987442, "logits/rejected": -1.209002137184143, "logps/chosen": -1.5007110834121704, "logps/rejected": -1.9674400091171265, "loss": 1.6187, "nll_loss": 1.573127031326294, "rewards/accuracies": 1.0, "rewards/chosen": -0.15007109940052032, "rewards/margins": 0.04667289927601814, "rewards/rejected": -0.19674400985240936, "step": 295 }, { "epoch": 0.8173973075595443, "grad_norm": 0.17433036863803864, "learning_rate": 4.5686019478979915e-06, "log_odds_chosen": 0.6013860702514648, "log_odds_ratio": -0.4401004910469055, "logits/chosen": 0.0062029119580984116, "logits/rejected": -1.6436456441879272, "logps/chosen": -1.534691572189331, "logps/rejected": -2.034040927886963, "loss": 1.6251, "nll_loss": 1.5810506343841553, "rewards/accuracies": 1.0, "rewards/chosen": -0.15346917510032654, "rewards/margins": 0.049934931099414825, "rewards/rejected": -0.20340411365032196, "step": 296 }, { "epoch": 0.8201587849499482, "grad_norm": 0.20673449337482452, "learning_rate": 4.564090891007401e-06, "log_odds_chosen": 0.5072333812713623, "log_odds_ratio": -0.47898009419441223, "logits/chosen": -0.12156552821397781, "logits/rejected": -1.4179987907409668, "logps/chosen": -1.63102388381958, "logps/rejected": -2.0585782527923584, "loss": 1.7285, "nll_loss": 1.6806358098983765, "rewards/accuracies": 1.0, "rewards/chosen": -0.1631024032831192, "rewards/margins": 0.04275544360280037, "rewards/rejected": -0.20585784316062927, "step": 297 }, { "epoch": 0.8229202623403521, "grad_norm": 0.1835467368364334, "learning_rate": 4.559558622389304e-06, "log_odds_chosen": 0.5705875754356384, "log_odds_ratio": -0.45824331045150757, "logits/chosen": -0.08186288177967072, "logits/rejected": -1.502951979637146, "logps/chosen": -1.5210040807724, "logps/rejected": -1.987526774406433, "loss": 1.6191, "nll_loss": 1.5732712745666504, "rewards/accuracies": 1.0, "rewards/chosen": -0.15210041403770447, "rewards/margins": 0.046652257442474365, "rewards/rejected": -0.19875267148017883, "step": 298 }, { "epoch": 0.825681739730756, "grad_norm": 0.19064722955226898, "learning_rate": 4.555005188619776e-06, "log_odds_chosen": 0.5287789106369019, "log_odds_ratio": -0.473359614610672, "logits/chosen": -0.1356291025876999, "logits/rejected": -1.412203073501587, "logps/chosen": -1.5497990846633911, "logps/rejected": -1.9918665885925293, "loss": 1.6437, "nll_loss": 1.5964010953903198, "rewards/accuracies": 0.875, "rewards/chosen": -0.1549799144268036, "rewards/margins": 0.04420673847198486, "rewards/rejected": -0.19918665289878845, "step": 299 }, { "epoch": 0.8284432171211599, "grad_norm": 0.1883833110332489, "learning_rate": 4.55043063649239e-06, "log_odds_chosen": 0.6203457117080688, "log_odds_ratio": -0.4358068108558655, "logits/chosen": -0.1520073264837265, "logits/rejected": -1.6608895063400269, "logps/chosen": -1.5645240545272827, "logps/rejected": -2.0832083225250244, "loss": 1.6559, "nll_loss": 1.6122933626174927, "rewards/accuracies": 1.0, "rewards/chosen": -0.15645241737365723, "rewards/margins": 0.05186842754483223, "rewards/rejected": -0.20832082629203796, "step": 300 }, { "epoch": 0.8312046945115636, "grad_norm": 0.18228144943714142, "learning_rate": 4.54583501301775e-06, "log_odds_chosen": 0.5522174835205078, "log_odds_ratio": -0.46315470337867737, "logits/chosen": -0.10763005167245865, "logits/rejected": -1.706877589225769, "logps/chosen": -1.5923174619674683, "logps/rejected": -2.0555925369262695, "loss": 1.6766, "nll_loss": 1.630320429801941, "rewards/accuracies": 1.0, "rewards/chosen": -0.1592317670583725, "rewards/margins": 0.046327486634254456, "rewards/rejected": -0.20555922389030457, "step": 301 }, { "epoch": 0.8339661719019675, "grad_norm": 0.18666355311870575, "learning_rate": 4.541218365422997e-06, "log_odds_chosen": 0.6547673344612122, "log_odds_ratio": -0.4269846975803375, "logits/chosen": -0.1366514414548874, "logits/rejected": -1.3694720268249512, "logps/chosen": -1.4531749486923218, "logps/rejected": -1.9791964292526245, "loss": 1.5613, "nll_loss": 1.5185647010803223, "rewards/accuracies": 1.0, "rewards/chosen": -0.14531749486923218, "rewards/margins": 0.05260216072201729, "rewards/rejected": -0.19791966676712036, "step": 302 }, { "epoch": 0.8367276492923714, "grad_norm": 0.19289466738700867, "learning_rate": 4.536580741151328e-06, "log_odds_chosen": 0.42363226413726807, "log_odds_ratio": -0.5060065984725952, "logits/chosen": -0.1389242559671402, "logits/rejected": -1.133392095565796, "logps/chosen": -1.5939542055130005, "logps/rejected": -1.9430160522460938, "loss": 1.6864, "nll_loss": 1.63578200340271, "rewards/accuracies": 1.0, "rewards/chosen": -0.15939541161060333, "rewards/margins": 0.034906186163425446, "rewards/rejected": -0.19430160522460938, "step": 303 }, { "epoch": 0.8394891266827753, "grad_norm": 0.20372274518013, "learning_rate": 4.531922187861507e-06, "log_odds_chosen": 0.6614566445350647, "log_odds_ratio": -0.41916701197624207, "logits/chosen": -0.11737221479415894, "logits/rejected": -1.2571710348129272, "logps/chosen": -1.4728827476501465, "logps/rejected": -2.0177319049835205, "loss": 1.5909, "nll_loss": 1.548986792564392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1472882777452469, "rewards/margins": 0.05448490381240845, "rewards/rejected": -0.20177316665649414, "step": 304 }, { "epoch": 0.8422506040731792, "grad_norm": 0.20864824950695038, "learning_rate": 4.527242753427378e-06, "log_odds_chosen": 0.4375610649585724, "log_odds_ratio": -0.5043913722038269, "logits/chosen": -0.14712439477443695, "logits/rejected": -1.1074727773666382, "logps/chosen": -1.6707913875579834, "logps/rejected": -2.040156126022339, "loss": 1.7741, "nll_loss": 1.723702073097229, "rewards/accuracies": 1.0, "rewards/chosen": -0.1670791655778885, "rewards/margins": 0.036936454474925995, "rewards/rejected": -0.20401562750339508, "step": 305 }, { "epoch": 0.845012081463583, "grad_norm": 0.1923041045665741, "learning_rate": 4.522542485937369e-06, "log_odds_chosen": 0.6749688982963562, "log_odds_ratio": -0.4191080629825592, "logits/chosen": -0.11244256049394608, "logits/rejected": -1.4451961517333984, "logps/chosen": -1.528746485710144, "logps/rejected": -2.0891406536102295, "loss": 1.644, "nll_loss": 1.602088212966919, "rewards/accuracies": 1.0, "rewards/chosen": -0.1528746634721756, "rewards/margins": 0.05603940039873123, "rewards/rejected": -0.20891407132148743, "step": 306 }, { "epoch": 0.8477735588539869, "grad_norm": 0.19692490994930267, "learning_rate": 4.5178214336940015e-06, "log_odds_chosen": 0.536999523639679, "log_odds_ratio": -0.4662824273109436, "logits/chosen": -0.12849773466587067, "logits/rejected": -1.3011747598648071, "logps/chosen": -1.537503957748413, "logps/rejected": -1.9821370840072632, "loss": 1.6458, "nll_loss": 1.5992212295532227, "rewards/accuracies": 1.0, "rewards/chosen": -0.15375038981437683, "rewards/margins": 0.044463321566581726, "rewards/rejected": -0.19821371138095856, "step": 307 }, { "epoch": 0.8505350362443908, "grad_norm": 0.19530101120471954, "learning_rate": 4.513079645213391e-06, "log_odds_chosen": 0.5806478261947632, "log_odds_ratio": -0.45124712586402893, "logits/chosen": -0.14092004299163818, "logits/rejected": -1.2499042749404907, "logps/chosen": -1.447067141532898, "logps/rejected": -1.9178260564804077, "loss": 1.543, "nll_loss": 1.4978904724121094, "rewards/accuracies": 1.0, "rewards/chosen": -0.14470671117305756, "rewards/margins": 0.04707589000463486, "rewards/rejected": -0.19178259372711182, "step": 308 }, { "epoch": 0.8532965136347946, "grad_norm": 0.21278323233127594, "learning_rate": 4.508317169224752e-06, "log_odds_chosen": 0.2714424431324005, "log_odds_ratio": -0.5724983215332031, "logits/chosen": -0.23690785467624664, "logits/rejected": -1.2109217643737793, "logps/chosen": -1.568101406097412, "logps/rejected": -1.7880043983459473, "loss": 1.6669, "nll_loss": 1.6096391677856445, "rewards/accuracies": 0.875, "rewards/chosen": -0.15681014955043793, "rewards/margins": 0.021990297362208366, "rewards/rejected": -0.17880043387413025, "step": 309 }, { "epoch": 0.8560579910251985, "grad_norm": 0.3061252534389496, "learning_rate": 4.5035340546698915e-06, "log_odds_chosen": 0.5074098110198975, "log_odds_ratio": -0.47923699021339417, "logits/chosen": -0.08015923947095871, "logits/rejected": -1.4736651182174683, "logps/chosen": -1.5646302700042725, "logps/rejected": -1.9794241189956665, "loss": 1.6599, "nll_loss": 1.6120116710662842, "rewards/accuracies": 1.0, "rewards/chosen": -0.15646302700042725, "rewards/margins": 0.041479405015707016, "rewards/rejected": -0.19794242084026337, "step": 310 }, { "epoch": 0.8588194684156023, "grad_norm": 0.20862670242786407, "learning_rate": 4.4987303507027155e-06, "log_odds_chosen": 0.32739633321762085, "log_odds_ratio": -0.5497077703475952, "logits/chosen": -0.057409606873989105, "logits/rejected": -1.186546802520752, "logps/chosen": -1.6279197931289673, "logps/rejected": -1.8978025913238525, "loss": 1.7296, "nll_loss": 1.6746082305908203, "rewards/accuracies": 0.75, "rewards/chosen": -0.16279199719429016, "rewards/margins": 0.026988260447978973, "rewards/rejected": -0.18978025019168854, "step": 311 }, { "epoch": 0.8615809458060062, "grad_norm": 0.1961701512336731, "learning_rate": 4.493906106688712e-06, "log_odds_chosen": 0.6683472394943237, "log_odds_ratio": -0.4163239598274231, "logits/chosen": -0.08115644752979279, "logits/rejected": -1.3991774320602417, "logps/chosen": -1.5248966217041016, "logps/rejected": -2.0810930728912354, "loss": 1.633, "nll_loss": 1.5913276672363281, "rewards/accuracies": 1.0, "rewards/chosen": -0.15248967707157135, "rewards/margins": 0.05561964958906174, "rewards/rejected": -0.20810934901237488, "step": 312 }, { "epoch": 0.8643424231964101, "grad_norm": 0.4846391975879669, "learning_rate": 4.4890613722044526e-06, "log_odds_chosen": 0.5416931509971619, "log_odds_ratio": -0.46186453104019165, "logits/chosen": -0.13847319781780243, "logits/rejected": -1.388474702835083, "logps/chosen": -1.5097135305404663, "logps/rejected": -1.9569801092147827, "loss": 1.59, "nll_loss": 1.5437886714935303, "rewards/accuracies": 1.0, "rewards/chosen": -0.15097135305404663, "rewards/margins": 0.04472666233778, "rewards/rejected": -0.19569800794124603, "step": 313 }, { "epoch": 0.867103900586814, "grad_norm": 0.2009696215391159, "learning_rate": 4.484196197037082e-06, "log_odds_chosen": 0.5546321868896484, "log_odds_ratio": -0.45888563990592957, "logits/chosen": -0.07164500653743744, "logits/rejected": -1.3182318210601807, "logps/chosen": -1.561537265777588, "logps/rejected": -2.02632212638855, "loss": 1.6542, "nll_loss": 1.6082967519760132, "rewards/accuracies": 1.0, "rewards/chosen": -0.15615373849868774, "rewards/margins": 0.046478480100631714, "rewards/rejected": -0.20263221859931946, "step": 314 }, { "epoch": 0.8698653779772179, "grad_norm": 0.18780747056007385, "learning_rate": 4.4793106311838e-06, "log_odds_chosen": 0.5521085262298584, "log_odds_ratio": -0.46405351161956787, "logits/chosen": -0.17451435327529907, "logits/rejected": -1.2368041276931763, "logps/chosen": -1.547911524772644, "logps/rejected": -2.007674217224121, "loss": 1.6276, "nll_loss": 1.5811690092086792, "rewards/accuracies": 1.0, "rewards/chosen": -0.15479114651679993, "rewards/margins": 0.04597627371549606, "rewards/rejected": -0.2007674127817154, "step": 315 }, { "epoch": 0.8726268553676216, "grad_norm": 0.19249524176120758, "learning_rate": 4.474404724851356e-06, "log_odds_chosen": 0.49972984194755554, "log_odds_ratio": -0.4845547676086426, "logits/chosen": -0.12106968462467194, "logits/rejected": -1.4282002449035645, "logps/chosen": -1.599500060081482, "logps/rejected": -2.0155510902404785, "loss": 1.6902, "nll_loss": 1.641719102859497, "rewards/accuracies": 1.0, "rewards/chosen": -0.15995000302791595, "rewards/margins": 0.04160511493682861, "rewards/rejected": -0.20155511796474457, "step": 316 }, { "epoch": 0.8753883327580255, "grad_norm": 0.2032056301832199, "learning_rate": 4.469478528455529e-06, "log_odds_chosen": 0.5046655535697937, "log_odds_ratio": -0.47710105776786804, "logits/chosen": -0.27712228894233704, "logits/rejected": -1.1458078622817993, "logps/chosen": -1.587306261062622, "logps/rejected": -2.0054972171783447, "loss": 1.6787, "nll_loss": 1.6309565305709839, "rewards/accuracies": 1.0, "rewards/chosen": -0.1587306261062622, "rewards/margins": 0.041819095611572266, "rewards/rejected": -0.20054972171783447, "step": 317 }, { "epoch": 0.8781498101484294, "grad_norm": 0.19460327923297882, "learning_rate": 4.464532092620607e-06, "log_odds_chosen": 0.5168122053146362, "log_odds_ratio": -0.474026620388031, "logits/chosen": -0.1055205836892128, "logits/rejected": -1.2003237009048462, "logps/chosen": -1.6242094039916992, "logps/rejected": -2.0614237785339355, "loss": 1.7035, "nll_loss": 1.6561068296432495, "rewards/accuracies": 1.0, "rewards/chosen": -0.16242094337940216, "rewards/margins": 0.04372143745422363, "rewards/rejected": -0.206142395734787, "step": 318 }, { "epoch": 0.8809112875388333, "grad_norm": 0.17680254578590393, "learning_rate": 4.4595654681788715e-06, "log_odds_chosen": 0.6278887987136841, "log_odds_ratio": -0.43415001034736633, "logits/chosen": -0.15030619502067566, "logits/rejected": -1.5502567291259766, "logps/chosen": -1.5051651000976562, "logps/rejected": -2.024726629257202, "loss": 1.5904, "nll_loss": 1.5469478368759155, "rewards/accuracies": 1.0, "rewards/chosen": -0.15051651000976562, "rewards/margins": 0.051956143230199814, "rewards/rejected": -0.20247265696525574, "step": 319 }, { "epoch": 0.8836727649292372, "grad_norm": 0.20075084269046783, "learning_rate": 4.454578706170075e-06, "log_odds_chosen": 0.3344058692455292, "log_odds_ratio": -0.547435998916626, "logits/chosen": -0.09762324392795563, "logits/rejected": -1.3721954822540283, "logps/chosen": -1.610334873199463, "logps/rejected": -1.8859809637069702, "loss": 1.6984, "nll_loss": 1.6436134576797485, "rewards/accuracies": 1.0, "rewards/chosen": -0.1610334813594818, "rewards/margins": 0.02756461501121521, "rewards/rejected": -0.18859811127185822, "step": 320 }, { "epoch": 0.886434242319641, "grad_norm": 0.1969553381204605, "learning_rate": 4.449571857840911e-06, "log_odds_chosen": 0.5939600467681885, "log_odds_ratio": -0.4538188576698303, "logits/chosen": -0.02511260285973549, "logits/rejected": -1.4034056663513184, "logps/chosen": -1.554402232170105, "logps/rejected": -2.0581822395324707, "loss": 1.647, "nll_loss": 1.6016532182693481, "rewards/accuracies": 0.875, "rewards/chosen": -0.15544021129608154, "rewards/margins": 0.05037800967693329, "rewards/rejected": -0.20581823587417603, "step": 321 }, { "epoch": 0.8891957197100449, "grad_norm": 0.20333482325077057, "learning_rate": 4.444544974644493e-06, "log_odds_chosen": 0.4322926700115204, "log_odds_ratio": -0.5066258311271667, "logits/chosen": -0.1251221001148224, "logits/rejected": -1.2058238983154297, "logps/chosen": -1.5241694450378418, "logps/rejected": -1.878300666809082, "loss": 1.6259, "nll_loss": 1.5751992464065552, "rewards/accuracies": 0.875, "rewards/chosen": -0.15241695940494537, "rewards/margins": 0.035413116216659546, "rewards/rejected": -0.18783007562160492, "step": 322 }, { "epoch": 0.8919571971004487, "grad_norm": 0.19495686888694763, "learning_rate": 4.4394981082398254e-06, "log_odds_chosen": 0.4637213945388794, "log_odds_ratio": -0.5005385875701904, "logits/chosen": -0.19393548369407654, "logits/rejected": -1.3736821413040161, "logps/chosen": -1.5801210403442383, "logps/rejected": -1.9673550128936768, "loss": 1.678, "nll_loss": 1.6279520988464355, "rewards/accuracies": 1.0, "rewards/chosen": -0.15801210701465607, "rewards/margins": 0.0387234091758728, "rewards/rejected": -0.19673550128936768, "step": 323 }, { "epoch": 0.8947186744908526, "grad_norm": 0.21592693030834198, "learning_rate": 4.434431310491267e-06, "log_odds_chosen": 0.6085124015808105, "log_odds_ratio": -0.44234418869018555, "logits/chosen": -0.20342035591602325, "logits/rejected": -1.3250751495361328, "logps/chosen": -1.6450177431106567, "logps/rejected": -2.15885329246521, "loss": 1.7148, "nll_loss": 1.6705245971679688, "rewards/accuracies": 1.0, "rewards/chosen": -0.16450178623199463, "rewards/margins": 0.05138356238603592, "rewards/rejected": -0.21588534116744995, "step": 324 }, { "epoch": 0.8974801518812565, "grad_norm": 0.2041964828968048, "learning_rate": 4.429344633468005e-06, "log_odds_chosen": 0.5157475471496582, "log_odds_ratio": -0.4731763005256653, "logits/chosen": -0.1373744010925293, "logits/rejected": -1.3682070970535278, "logps/chosen": -1.5091853141784668, "logps/rejected": -1.9341413974761963, "loss": 1.5962, "nll_loss": 1.5488858222961426, "rewards/accuracies": 1.0, "rewards/chosen": -0.15091852843761444, "rewards/margins": 0.042495615780353546, "rewards/rejected": -0.19341415166854858, "step": 325 }, { "epoch": 0.9002416292716603, "grad_norm": 0.21775774657726288, "learning_rate": 4.424238129443515e-06, "log_odds_chosen": 0.483593225479126, "log_odds_ratio": -0.48364073038101196, "logits/chosen": -0.09863223880529404, "logits/rejected": -1.0222870111465454, "logps/chosen": -1.6142656803131104, "logps/rejected": -2.01713228225708, "loss": 1.7025, "nll_loss": 1.6541244983673096, "rewards/accuracies": 1.0, "rewards/chosen": -0.1614265739917755, "rewards/margins": 0.04028663784265518, "rewards/rejected": -0.2017132043838501, "step": 326 }, { "epoch": 0.9030031066620642, "grad_norm": 0.20912012457847595, "learning_rate": 4.4191118508950286e-06, "log_odds_chosen": 0.5832557678222656, "log_odds_ratio": -0.44807419180870056, "logits/chosen": -0.18956468999385834, "logits/rejected": -1.2079969644546509, "logps/chosen": -1.5554307699203491, "logps/rejected": -2.04107666015625, "loss": 1.659, "nll_loss": 1.6141505241394043, "rewards/accuracies": 1.0, "rewards/chosen": -0.15554308891296387, "rewards/margins": 0.04856458306312561, "rewards/rejected": -0.20410768687725067, "step": 327 }, { "epoch": 0.9057645840524681, "grad_norm": 0.1959802657365799, "learning_rate": 4.413965850502987e-06, "log_odds_chosen": 0.6536615490913391, "log_odds_ratio": -0.42614060640335083, "logits/chosen": -0.2154429852962494, "logits/rejected": -1.3865175247192383, "logps/chosen": -1.4310425519943237, "logps/rejected": -1.9644105434417725, "loss": 1.4983, "nll_loss": 1.455698013305664, "rewards/accuracies": 1.0, "rewards/chosen": -0.14310424029827118, "rewards/margins": 0.05333680659532547, "rewards/rejected": -0.19644105434417725, "step": 328 }, { "epoch": 0.908526061442872, "grad_norm": 0.19480550289154053, "learning_rate": 4.408800181150509e-06, "log_odds_chosen": 0.6978083252906799, "log_odds_ratio": -0.4107431471347809, "logits/chosen": -0.20347651839256287, "logits/rejected": -1.4490666389465332, "logps/chosen": -1.508465051651001, "logps/rejected": -2.0856833457946777, "loss": 1.5878, "nll_loss": 1.5467225313186646, "rewards/accuracies": 1.0, "rewards/chosen": -0.15084651112556458, "rewards/margins": 0.0577218271791935, "rewards/rejected": -0.20856834948062897, "step": 329 }, { "epoch": 0.9112875388332758, "grad_norm": 0.20806226134300232, "learning_rate": 4.4036148959228365e-06, "log_odds_chosen": 0.5531899333000183, "log_odds_ratio": -0.455844908952713, "logits/chosen": -0.12704087793827057, "logits/rejected": -1.144600510597229, "logps/chosen": -1.5709285736083984, "logps/rejected": -2.0322890281677246, "loss": 1.6525, "nll_loss": 1.6068720817565918, "rewards/accuracies": 1.0, "rewards/chosen": -0.1570928692817688, "rewards/margins": 0.046136029064655304, "rewards/rejected": -0.2032289057970047, "step": 330 }, { "epoch": 0.9140490162236796, "grad_norm": 0.20405174791812897, "learning_rate": 4.3984100481068e-06, "log_odds_chosen": 0.5182926654815674, "log_odds_ratio": -0.4803735911846161, "logits/chosen": -0.05097893998026848, "logits/rejected": -1.530269980430603, "logps/chosen": -1.5223273038864136, "logps/rejected": -1.9506477117538452, "loss": 1.602, "nll_loss": 1.5539740324020386, "rewards/accuracies": 0.875, "rewards/chosen": -0.15223273634910583, "rewards/margins": 0.04283204674720764, "rewards/rejected": -0.19506478309631348, "step": 331 }, { "epoch": 0.9168104936140835, "grad_norm": 0.1879170536994934, "learning_rate": 4.3931856911902635e-06, "log_odds_chosen": 0.6412795782089233, "log_odds_ratio": -0.4325963854789734, "logits/chosen": -0.23800577223300934, "logits/rejected": -1.4617422819137573, "logps/chosen": -1.5373084545135498, "logps/rejected": -2.0728628635406494, "loss": 1.6293, "nll_loss": 1.586013674736023, "rewards/accuracies": 1.0, "rewards/chosen": -0.1537308394908905, "rewards/margins": 0.0535554476082325, "rewards/rejected": -0.2072862982749939, "step": 332 }, { "epoch": 0.9195719710044874, "grad_norm": 0.19721606373786926, "learning_rate": 4.387941878861578e-06, "log_odds_chosen": 0.43159598112106323, "log_odds_ratio": -0.5106831192970276, "logits/chosen": -0.2015897035598755, "logits/rejected": -1.1725409030914307, "logps/chosen": -1.4626684188842773, "logps/rejected": -1.816298246383667, "loss": 1.5551, "nll_loss": 1.5040326118469238, "rewards/accuracies": 0.875, "rewards/chosen": -0.1462668478488922, "rewards/margins": 0.03536297380924225, "rewards/rejected": -0.18162982165813446, "step": 333 }, { "epoch": 0.9223334483948913, "grad_norm": 0.19862483441829681, "learning_rate": 4.382678665009028e-06, "log_odds_chosen": 0.5599039196968079, "log_odds_ratio": -0.45722323656082153, "logits/chosen": -0.1434212028980255, "logits/rejected": -1.4838988780975342, "logps/chosen": -1.5819792747497559, "logps/rejected": -2.048774003982544, "loss": 1.6595, "nll_loss": 1.6138123273849487, "rewards/accuracies": 1.0, "rewards/chosen": -0.15819790959358215, "rewards/margins": 0.04667946696281433, "rewards/rejected": -0.20487739145755768, "step": 334 }, { "epoch": 0.9250949257852952, "grad_norm": 0.20085540413856506, "learning_rate": 4.3773961037202784e-06, "log_odds_chosen": 0.5849855542182922, "log_odds_ratio": -0.4470357596874237, "logits/chosen": -0.16471579670906067, "logits/rejected": -1.48972749710083, "logps/chosen": -1.6449600458145142, "logps/rejected": -2.1387722492218018, "loss": 1.7056, "nll_loss": 1.6609022617340088, "rewards/accuracies": 1.0, "rewards/chosen": -0.16449600458145142, "rewards/margins": 0.04938122630119324, "rewards/rejected": -0.21387723088264465, "step": 335 }, { "epoch": 0.927856403175699, "grad_norm": 0.19542387127876282, "learning_rate": 4.37209424928182e-06, "log_odds_chosen": 0.5759137272834778, "log_odds_ratio": -0.4505816698074341, "logits/chosen": -0.1124081164598465, "logits/rejected": -1.1634758710861206, "logps/chosen": -1.5444155931472778, "logps/rejected": -2.0244972705841064, "loss": 1.6261, "nll_loss": 1.581070065498352, "rewards/accuracies": 1.0, "rewards/chosen": -0.15444158017635345, "rewards/margins": 0.048008158802986145, "rewards/rejected": -0.2024497389793396, "step": 336 }, { "epoch": 0.9306178805661028, "grad_norm": 0.187562957406044, "learning_rate": 4.366773156178413e-06, "log_odds_chosen": 0.40636616945266724, "log_odds_ratio": -0.5145981311798096, "logits/chosen": -0.23331299424171448, "logits/rejected": -1.272404432296753, "logps/chosen": -1.4672964811325073, "logps/rejected": -1.792708396911621, "loss": 1.5577, "nll_loss": 1.5061949491500854, "rewards/accuracies": 1.0, "rewards/chosen": -0.14672963321208954, "rewards/margins": 0.03254120051860809, "rewards/rejected": -0.17927084863185883, "step": 337 }, { "epoch": 0.9333793579565067, "grad_norm": 0.19676648080348969, "learning_rate": 4.361432879092518e-06, "log_odds_chosen": 0.6147382259368896, "log_odds_ratio": -0.444987416267395, "logits/chosen": -0.2584179937839508, "logits/rejected": -1.2025710344314575, "logps/chosen": -1.4777214527130127, "logps/rejected": -1.9875514507293701, "loss": 1.5673, "nll_loss": 1.5228395462036133, "rewards/accuracies": 1.0, "rewards/chosen": -0.1477721631526947, "rewards/margins": 0.0509830042719841, "rewards/rejected": -0.198755145072937, "step": 338 }, { "epoch": 0.9361408353469106, "grad_norm": 0.20113791525363922, "learning_rate": 4.356073472903747e-06, "log_odds_chosen": 0.5940407514572144, "log_odds_ratio": -0.4446149170398712, "logits/chosen": -0.09678924083709717, "logits/rejected": -1.2151576280593872, "logps/chosen": -1.4379996061325073, "logps/rejected": -1.9237509965896606, "loss": 1.5294, "nll_loss": 1.4849097728729248, "rewards/accuracies": 1.0, "rewards/chosen": -0.14379996061325073, "rewards/margins": 0.04857514798641205, "rewards/rejected": -0.19237510859966278, "step": 339 }, { "epoch": 0.9389023127373145, "grad_norm": 0.18967628479003906, "learning_rate": 4.350694992688289e-06, "log_odds_chosen": 0.6668899059295654, "log_odds_ratio": -0.427124559879303, "logits/chosen": -0.08107070624828339, "logits/rejected": -1.2518596649169922, "logps/chosen": -1.471956491470337, "logps/rejected": -2.021543025970459, "loss": 1.5567, "nll_loss": 1.513990879058838, "rewards/accuracies": 1.0, "rewards/chosen": -0.14719566702842712, "rewards/margins": 0.05495864897966385, "rewards/rejected": -0.20215432345867157, "step": 340 }, { "epoch": 0.9416637901277183, "grad_norm": 0.1954769492149353, "learning_rate": 4.345297493718352e-06, "log_odds_chosen": 0.5373238325119019, "log_odds_ratio": -0.46630507707595825, "logits/chosen": -0.24432893097400665, "logits/rejected": -1.0526390075683594, "logps/chosen": -1.472176432609558, "logps/rejected": -1.9091975688934326, "loss": 1.5654, "nll_loss": 1.5187859535217285, "rewards/accuracies": 1.0, "rewards/chosen": -0.14721764624118805, "rewards/margins": 0.04370209947228432, "rewards/rejected": -0.19091975688934326, "step": 341 }, { "epoch": 0.9444252675181222, "grad_norm": 0.18435297906398773, "learning_rate": 4.339881031461588e-06, "log_odds_chosen": 0.46578601002693176, "log_odds_ratio": -0.49112096428871155, "logits/chosen": -0.18604259192943573, "logits/rejected": -1.3026976585388184, "logps/chosen": -1.4346725940704346, "logps/rejected": -1.7989139556884766, "loss": 1.5414, "nll_loss": 1.4923009872436523, "rewards/accuracies": 1.0, "rewards/chosen": -0.1434672772884369, "rewards/margins": 0.036424119025468826, "rewards/rejected": -0.17989139258861542, "step": 342 }, { "epoch": 0.9471867449085261, "grad_norm": 0.19713670015335083, "learning_rate": 4.334445661580527e-06, "log_odds_chosen": 0.5911546945571899, "log_odds_ratio": -0.444367378950119, "logits/chosen": -0.24173688888549805, "logits/rejected": -1.4439072608947754, "logps/chosen": -1.5436309576034546, "logps/rejected": -2.035305976867676, "loss": 1.6258, "nll_loss": 1.5813885927200317, "rewards/accuracies": 1.0, "rewards/chosen": -0.15436309576034546, "rewards/margins": 0.049167513847351074, "rewards/rejected": -0.20353063941001892, "step": 343 }, { "epoch": 0.94994822229893, "grad_norm": 0.19868247210979462, "learning_rate": 4.328991439932003e-06, "log_odds_chosen": 0.646172046661377, "log_odds_ratio": -0.42435139417648315, "logits/chosen": -0.21409977972507477, "logits/rejected": -1.1630572080612183, "logps/chosen": -1.475473165512085, "logps/rejected": -2.007708787918091, "loss": 1.5666, "nll_loss": 1.5241310596466064, "rewards/accuracies": 1.0, "rewards/chosen": -0.14754730463027954, "rewards/margins": 0.053223565220832825, "rewards/rejected": -0.20077086985111237, "step": 344 }, { "epoch": 0.9527096996893338, "grad_norm": 0.2008034884929657, "learning_rate": 4.323518422566586e-06, "log_odds_chosen": 0.7171238660812378, "log_odds_ratio": -0.4020853638648987, "logits/chosen": -0.14678461849689484, "logits/rejected": -1.1799049377441406, "logps/chosen": -1.5431439876556396, "logps/rejected": -2.141486167907715, "loss": 1.639, "nll_loss": 1.5987637042999268, "rewards/accuracies": 1.0, "rewards/chosen": -0.15431438386440277, "rewards/margins": 0.05983421206474304, "rewards/rejected": -0.214148610830307, "step": 345 }, { "epoch": 0.9554711770797376, "grad_norm": 0.18002885580062866, "learning_rate": 4.318026665727993e-06, "log_odds_chosen": 0.7151846885681152, "log_odds_ratio": -0.40609210729599, "logits/chosen": -0.2377801239490509, "logits/rejected": -1.4111603498458862, "logps/chosen": -1.4105151891708374, "logps/rejected": -1.9907076358795166, "loss": 1.4953, "nll_loss": 1.4546504020690918, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410515308380127, "rewards/margins": 0.058019235730171204, "rewards/rejected": -0.1990707665681839, "step": 346 }, { "epoch": 0.9582326544701415, "grad_norm": 0.2088988721370697, "learning_rate": 4.3125162258525265e-06, "log_odds_chosen": 0.37778812646865845, "log_odds_ratio": -0.5246101021766663, "logits/chosen": -0.26066213846206665, "logits/rejected": -1.111747145652771, "logps/chosen": -1.6180295944213867, "logps/rejected": -1.932243824005127, "loss": 1.7004, "nll_loss": 1.6479332447052002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1618029624223709, "rewards/margins": 0.03142143785953522, "rewards/rejected": -0.19322440028190613, "step": 347 }, { "epoch": 0.9609941318605454, "grad_norm": 0.23166809976100922, "learning_rate": 4.3069871595684795e-06, "log_odds_chosen": 0.5429641604423523, "log_odds_ratio": -0.46485579013824463, "logits/chosen": -0.26122069358825684, "logits/rejected": -1.2304470539093018, "logps/chosen": -1.482200026512146, "logps/rejected": -1.9262490272521973, "loss": 1.5751, "nll_loss": 1.5286585092544556, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482200026512146, "rewards/margins": 0.044404882937669754, "rewards/rejected": -0.19262489676475525, "step": 348 }, { "epoch": 0.9637556092509493, "grad_norm": 0.19496473670005798, "learning_rate": 4.3014395236955635e-06, "log_odds_chosen": 0.6688944101333618, "log_odds_ratio": -0.4200526475906372, "logits/chosen": -0.2004820704460144, "logits/rejected": -1.322332501411438, "logps/chosen": -1.4777374267578125, "logps/rejected": -2.0316760540008545, "loss": 1.5606, "nll_loss": 1.5185458660125732, "rewards/accuracies": 1.0, "rewards/chosen": -0.14777372777462006, "rewards/margins": 0.055393870919942856, "rewards/rejected": -0.2031676024198532, "step": 349 }, { "epoch": 0.9665170866413532, "grad_norm": 0.18388359248638153, "learning_rate": 4.295873375244319e-06, "log_odds_chosen": 0.5681695938110352, "log_odds_ratio": -0.4566437602043152, "logits/chosen": -0.21363496780395508, "logits/rejected": -1.1769250631332397, "logps/chosen": -1.439588189125061, "logps/rejected": -1.9001712799072266, "loss": 1.5306, "nll_loss": 1.4849416017532349, "rewards/accuracies": 1.0, "rewards/chosen": -0.14395882189273834, "rewards/margins": 0.04605831205844879, "rewards/rejected": -0.19001714885234833, "step": 350 }, { "epoch": 0.9692785640317569, "grad_norm": 0.20012839138507843, "learning_rate": 4.290288771415536e-06, "log_odds_chosen": 0.5067012906074524, "log_odds_ratio": -0.477801650762558, "logits/chosen": -0.20678414404392242, "logits/rejected": -1.340362787246704, "logps/chosen": -1.5872958898544312, "logps/rejected": -2.0096263885498047, "loss": 1.6634, "nll_loss": 1.6156083345413208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15872959792613983, "rewards/margins": 0.04223306477069855, "rewards/rejected": -0.20096264779567719, "step": 351 }, { "epoch": 0.9720400414221608, "grad_norm": 0.1952664852142334, "learning_rate": 4.284685769599658e-06, "log_odds_chosen": 0.5722883343696594, "log_odds_ratio": -0.45633140206336975, "logits/chosen": -0.2586257755756378, "logits/rejected": -1.3633008003234863, "logps/chosen": -1.495697259902954, "logps/rejected": -1.9731667041778564, "loss": 1.5729, "nll_loss": 1.5272736549377441, "rewards/accuracies": 1.0, "rewards/chosen": -0.14956970512866974, "rewards/margins": 0.04774694889783859, "rewards/rejected": -0.19731666147708893, "step": 352 }, { "epoch": 0.9748015188125647, "grad_norm": 0.194318950176239, "learning_rate": 4.279064427376199e-06, "log_odds_chosen": 0.5692598819732666, "log_odds_ratio": -0.4534481167793274, "logits/chosen": -0.21069735288619995, "logits/rejected": -1.2419835329055786, "logps/chosen": -1.5351362228393555, "logps/rejected": -2.0084264278411865, "loss": 1.6002, "nll_loss": 1.5548111200332642, "rewards/accuracies": 1.0, "rewards/chosen": -0.15351362526416779, "rewards/margins": 0.04732901602983475, "rewards/rejected": -0.20084263384342194, "step": 353 }, { "epoch": 0.9775629962029686, "grad_norm": 0.20712882280349731, "learning_rate": 4.273424802513145e-06, "log_odds_chosen": 0.6014809012413025, "log_odds_ratio": -0.4396442174911499, "logits/chosen": -0.2209128886461258, "logits/rejected": -1.3539609909057617, "logps/chosen": -1.5357590913772583, "logps/rejected": -2.0328547954559326, "loss": 1.6122, "nll_loss": 1.5682051181793213, "rewards/accuracies": 1.0, "rewards/chosen": -0.15357591211795807, "rewards/margins": 0.04970954358577728, "rewards/rejected": -0.20328545570373535, "step": 354 }, { "epoch": 0.9803244735933725, "grad_norm": 0.18310385942459106, "learning_rate": 4.267766952966369e-06, "log_odds_chosen": 0.7377204298973083, "log_odds_ratio": -0.40729206800460815, "logits/chosen": -0.15849949419498444, "logits/rejected": -1.2910916805267334, "logps/chosen": -1.3705458641052246, "logps/rejected": -1.968411922454834, "loss": 1.4587, "nll_loss": 1.4179855585098267, "rewards/accuracies": 1.0, "rewards/chosen": -0.13705459237098694, "rewards/margins": 0.05978662520647049, "rewards/rejected": -0.19684121012687683, "step": 355 }, { "epoch": 0.9830859509837763, "grad_norm": 0.18806028366088867, "learning_rate": 4.26209093687903e-06, "log_odds_chosen": 0.4816596806049347, "log_odds_ratio": -0.48845529556274414, "logits/chosen": -0.2249145656824112, "logits/rejected": -1.4047096967697144, "logps/chosen": -1.5253814458847046, "logps/rejected": -1.9247775077819824, "loss": 1.6102, "nll_loss": 1.5613718032836914, "rewards/accuracies": 1.0, "rewards/chosen": -0.15253815054893494, "rewards/margins": 0.03993961960077286, "rewards/rejected": -0.1924777626991272, "step": 356 }, { "epoch": 0.9858474283741802, "grad_norm": 0.19746609032154083, "learning_rate": 4.2563968125809734e-06, "log_odds_chosen": 0.6187906265258789, "log_odds_ratio": -0.43648919463157654, "logits/chosen": -0.08207077533006668, "logits/rejected": -1.3270372152328491, "logps/chosen": -1.5937108993530273, "logps/rejected": -2.113786458969116, "loss": 1.6592, "nll_loss": 1.615505576133728, "rewards/accuracies": 1.0, "rewards/chosen": -0.15937110781669617, "rewards/margins": 0.05200754106044769, "rewards/rejected": -0.21137863397598267, "step": 357 }, { "epoch": 0.988608905764584, "grad_norm": 0.19708895683288574, "learning_rate": 4.2506846385881375e-06, "log_odds_chosen": 0.6972445845603943, "log_odds_ratio": -0.4114294648170471, "logits/chosen": -0.37635722756385803, "logits/rejected": -1.2826493978500366, "logps/chosen": -1.4048396348953247, "logps/rejected": -1.9649187326431274, "loss": 1.4948, "nll_loss": 1.4536077976226807, "rewards/accuracies": 1.0, "rewards/chosen": -0.14048396050930023, "rewards/margins": 0.056007932871580124, "rewards/rejected": -0.19649189710617065, "step": 358 }, { "epoch": 0.9913703831549879, "grad_norm": 0.19348447024822235, "learning_rate": 4.2449544736019486e-06, "log_odds_chosen": 0.5735771656036377, "log_odds_ratio": -0.4517831802368164, "logits/chosen": -0.15842154622077942, "logits/rejected": -1.160691738128662, "logps/chosen": -1.4741443395614624, "logps/rejected": -1.9411314725875854, "loss": 1.559, "nll_loss": 1.513805866241455, "rewards/accuracies": 1.0, "rewards/chosen": -0.147414430975914, "rewards/margins": 0.04669870436191559, "rewards/rejected": -0.1941131353378296, "step": 359 }, { "epoch": 0.9941318605453918, "grad_norm": 0.18270692229270935, "learning_rate": 4.239206376508716e-06, "log_odds_chosen": 0.7792839407920837, "log_odds_ratio": -0.38477823138237, "logits/chosen": -0.15978127717971802, "logits/rejected": -1.5017080307006836, "logps/chosen": -1.494106411933899, "logps/rejected": -2.147690534591675, "loss": 1.5826, "nll_loss": 1.5441505908966064, "rewards/accuracies": 1.0, "rewards/chosen": -0.14941063523292542, "rewards/margins": 0.06535841524600983, "rewards/rejected": -0.21476906538009644, "step": 360 }, { "epoch": 0.9968933379357956, "grad_norm": 0.18168719112873077, "learning_rate": 4.233440406379032e-06, "log_odds_chosen": 0.5889392495155334, "log_odds_ratio": -0.4472864866256714, "logits/chosen": -0.28017136454582214, "logits/rejected": -1.22226881980896, "logps/chosen": -1.414659857749939, "logps/rejected": -1.88670814037323, "loss": 1.505, "nll_loss": 1.460268497467041, "rewards/accuracies": 1.0, "rewards/chosen": -0.141465961933136, "rewards/margins": 0.047204844653606415, "rewards/rejected": -0.1886708289384842, "step": 361 }, { "epoch": 0.9996548153261995, "grad_norm": 0.19425641000270844, "learning_rate": 4.227656622467162e-06, "log_odds_chosen": 0.6574209928512573, "log_odds_ratio": -0.4261106252670288, "logits/chosen": -0.2706007957458496, "logits/rejected": -1.1785038709640503, "logps/chosen": -1.516379475593567, "logps/rejected": -2.065363883972168, "loss": 1.5932, "nll_loss": 1.550638198852539, "rewards/accuracies": 1.0, "rewards/chosen": -0.1516379415988922, "rewards/margins": 0.054898452013731, "rewards/rejected": -0.2065364122390747, "step": 362 }, { "epoch": 1.0, "grad_norm": 0.46724027395248413, "learning_rate": 4.221855084210433e-06, "log_odds_chosen": 0.3892417550086975, "log_odds_ratio": -0.5173466205596924, "logits/chosen": -0.4694238007068634, "logits/rejected": -1.6226561069488525, "logps/chosen": -1.5516700744628906, "logps/rejected": -1.8701450824737549, "loss": 1.6319, "nll_loss": 1.5801714658737183, "rewards/accuracies": 1.0, "rewards/chosen": -0.15516701340675354, "rewards/margins": 0.03184749186038971, "rewards/rejected": -0.18701450526714325, "step": 363 }, { "epoch": 1.0027614773904039, "grad_norm": 0.19933797419071198, "learning_rate": 4.2160358512286266e-06, "log_odds_chosen": 0.612895667552948, "log_odds_ratio": -0.4389383792877197, "logits/chosen": -0.19393223524093628, "logits/rejected": -1.2420426607131958, "logps/chosen": -1.498990535736084, "logps/rejected": -2.0020782947540283, "loss": 1.5772, "nll_loss": 1.5332666635513306, "rewards/accuracies": 1.0, "rewards/chosen": -0.14989906549453735, "rewards/margins": 0.05030875653028488, "rewards/rejected": -0.20020782947540283, "step": 364 }, { "epoch": 1.0055229547808078, "grad_norm": 0.18562446534633636, "learning_rate": 4.210198983323366e-06, "log_odds_chosen": 0.6299260258674622, "log_odds_ratio": -0.4328676760196686, "logits/chosen": -0.18327558040618896, "logits/rejected": -1.4133291244506836, "logps/chosen": -1.5473169088363647, "logps/rejected": -2.070706605911255, "loss": 1.6099, "nll_loss": 1.5666420459747314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15473169088363647, "rewards/margins": 0.052338968962430954, "rewards/rejected": -0.20707066357135773, "step": 365 }, { "epoch": 1.0082844321712117, "grad_norm": 0.19783955812454224, "learning_rate": 4.204344540477499e-06, "log_odds_chosen": 0.6581704020500183, "log_odds_ratio": -0.42357465624809265, "logits/chosen": -0.21236993372440338, "logits/rejected": -1.2212865352630615, "logps/chosen": -1.5292942523956299, "logps/rejected": -2.0793232917785645, "loss": 1.6086, "nll_loss": 1.5662219524383545, "rewards/accuracies": 1.0, "rewards/chosen": -0.152929425239563, "rewards/margins": 0.055002905428409576, "rewards/rejected": -0.20793233811855316, "step": 366 }, { "epoch": 1.0110459095616156, "grad_norm": 0.18359708786010742, "learning_rate": 4.1984725828544855e-06, "log_odds_chosen": 0.7003411054611206, "log_odds_ratio": -0.40691572427749634, "logits/chosen": -0.18985068798065186, "logits/rejected": -1.434674620628357, "logps/chosen": -1.4541864395141602, "logps/rejected": -2.0293405055999756, "loss": 1.531, "nll_loss": 1.4903078079223633, "rewards/accuracies": 1.0, "rewards/chosen": -0.1454186588525772, "rewards/margins": 0.05751540884375572, "rewards/rejected": -0.20293407142162323, "step": 367 }, { "epoch": 1.0138073869520194, "grad_norm": 0.19799236953258514, "learning_rate": 4.192583170797775e-06, "log_odds_chosen": 0.524876594543457, "log_odds_ratio": -0.4676755368709564, "logits/chosen": -0.2710600793361664, "logits/rejected": -1.0796301364898682, "logps/chosen": -1.492266058921814, "logps/rejected": -1.9222991466522217, "loss": 1.5776, "nll_loss": 1.5308009386062622, "rewards/accuracies": 1.0, "rewards/chosen": -0.1492266058921814, "rewards/margins": 0.04300331324338913, "rewards/rejected": -0.19222994148731232, "step": 368 }, { "epoch": 1.016568864342423, "grad_norm": 0.18693482875823975, "learning_rate": 4.186676364830187e-06, "log_odds_chosen": 0.5753119587898254, "log_odds_ratio": -0.4493744671344757, "logits/chosen": -0.1405227780342102, "logits/rejected": -1.2737160921096802, "logps/chosen": -1.5617775917053223, "logps/rejected": -2.0400755405426025, "loss": 1.6285, "nll_loss": 1.5835627317428589, "rewards/accuracies": 1.0, "rewards/chosen": -0.15617777407169342, "rewards/margins": 0.047829799354076385, "rewards/rejected": -0.2040075659751892, "step": 369 }, { "epoch": 1.019330341732827, "grad_norm": 0.21877062320709229, "learning_rate": 4.1807522256532925e-06, "log_odds_chosen": 0.6495308876037598, "log_odds_ratio": -0.4310915768146515, "logits/chosen": -0.20181499421596527, "logits/rejected": -1.3658957481384277, "logps/chosen": -1.6181979179382324, "logps/rejected": -2.170933485031128, "loss": 1.6861, "nll_loss": 1.6429781913757324, "rewards/accuracies": 1.0, "rewards/chosen": -0.16181980073451996, "rewards/margins": 0.05527355521917343, "rewards/rejected": -0.217093363404274, "step": 370 }, { "epoch": 1.0220918191232309, "grad_norm": 0.18539805710315704, "learning_rate": 4.174810814146789e-06, "log_odds_chosen": 0.6834878921508789, "log_odds_ratio": -0.4169776141643524, "logits/chosen": -0.08832372725009918, "logits/rejected": -1.0964831113815308, "logps/chosen": -1.5441397428512573, "logps/rejected": -2.117156505584717, "loss": 1.6166, "nll_loss": 1.5748991966247559, "rewards/accuracies": 1.0, "rewards/chosen": -0.15441398322582245, "rewards/margins": 0.057301655411720276, "rewards/rejected": -0.21171565353870392, "step": 371 }, { "epoch": 1.0248532965136348, "grad_norm": 0.1914224624633789, "learning_rate": 4.1688521913678706e-06, "log_odds_chosen": 0.7822255492210388, "log_odds_ratio": -0.38675639033317566, "logits/chosen": -0.16042651236057281, "logits/rejected": -1.2148573398590088, "logps/chosen": -1.4989123344421387, "logps/rejected": -2.1540937423706055, "loss": 1.5648, "nll_loss": 1.5261112451553345, "rewards/accuracies": 1.0, "rewards/chosen": -0.14989124238491058, "rewards/margins": 0.06551814079284668, "rewards/rejected": -0.21540936827659607, "step": 372 }, { "epoch": 1.0276147739040387, "grad_norm": 0.1842961460351944, "learning_rate": 4.162876418550606e-06, "log_odds_chosen": 0.5339972376823425, "log_odds_ratio": -0.46532902121543884, "logits/chosen": -0.2638809084892273, "logits/rejected": -1.291076421737671, "logps/chosen": -1.506029486656189, "logps/rejected": -1.944475769996643, "loss": 1.5811, "nll_loss": 1.5345618724822998, "rewards/accuracies": 1.0, "rewards/chosen": -0.15060293674468994, "rewards/margins": 0.043844640254974365, "rewards/rejected": -0.1944475769996643, "step": 373 }, { "epoch": 1.0303762512944425, "grad_norm": 0.19836845993995667, "learning_rate": 4.156883557105308e-06, "log_odds_chosen": 0.5632570385932922, "log_odds_ratio": -0.46161651611328125, "logits/chosen": -0.23987069725990295, "logits/rejected": -1.331959843635559, "logps/chosen": -1.4848788976669312, "logps/rejected": -1.9496028423309326, "loss": 1.5517, "nll_loss": 1.5055512189865112, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484878808259964, "rewards/margins": 0.04647241160273552, "rewards/rejected": -0.19496029615402222, "step": 374 }, { "epoch": 1.0331377286848464, "grad_norm": 0.20077650249004364, "learning_rate": 4.150873668617899e-06, "log_odds_chosen": 0.5416385531425476, "log_odds_ratio": -0.4598068594932556, "logits/chosen": -0.2892989218235016, "logits/rejected": -1.227949619293213, "logps/chosen": -1.5431418418884277, "logps/rejected": -1.9903440475463867, "loss": 1.6064, "nll_loss": 1.5604456663131714, "rewards/accuracies": 1.0, "rewards/chosen": -0.15431420505046844, "rewards/margins": 0.04472021758556366, "rewards/rejected": -0.1990344226360321, "step": 375 }, { "epoch": 1.0358992060752503, "grad_norm": 0.1922207772731781, "learning_rate": 4.144846814849282e-06, "log_odds_chosen": 0.5037797093391418, "log_odds_ratio": -0.4755171537399292, "logits/chosen": -0.10908980667591095, "logits/rejected": -1.1120904684066772, "logps/chosen": -1.5532734394073486, "logps/rejected": -1.9692023992538452, "loss": 1.6357, "nll_loss": 1.5881571769714355, "rewards/accuracies": 1.0, "rewards/chosen": -0.15532734990119934, "rewards/margins": 0.04159289598464966, "rewards/rejected": -0.1969202607870102, "step": 376 }, { "epoch": 1.0386606834656542, "grad_norm": 0.19230647385120392, "learning_rate": 4.138803057734705e-06, "log_odds_chosen": 0.7433000802993774, "log_odds_ratio": -0.3926478624343872, "logits/chosen": -0.16731296479701996, "logits/rejected": -1.154883623123169, "logps/chosen": -1.4572123289108276, "logps/rejected": -2.0705974102020264, "loss": 1.5333, "nll_loss": 1.4939861297607422, "rewards/accuracies": 1.0, "rewards/chosen": -0.14572124183177948, "rewards/margins": 0.06133852154016495, "rewards/rejected": -0.20705974102020264, "step": 377 }, { "epoch": 1.041422160856058, "grad_norm": 0.1747986078262329, "learning_rate": 4.132742459383122e-06, "log_odds_chosen": 0.8132926821708679, "log_odds_ratio": -0.37535253167152405, "logits/chosen": -0.19614934921264648, "logits/rejected": -1.3223804235458374, "logps/chosen": -1.3767224550247192, "logps/rejected": -2.0322914123535156, "loss": 1.4591, "nll_loss": 1.4215335845947266, "rewards/accuracies": 1.0, "rewards/chosen": -0.13767226040363312, "rewards/margins": 0.06555688381195068, "rewards/rejected": -0.2032291442155838, "step": 378 }, { "epoch": 1.0441836382464618, "grad_norm": 0.18519026041030884, "learning_rate": 4.126665082076559e-06, "log_odds_chosen": 0.5479438900947571, "log_odds_ratio": -0.46051234006881714, "logits/chosen": -0.21864745020866394, "logits/rejected": -0.9870752096176147, "logps/chosen": -1.5094008445739746, "logps/rejected": -1.9554524421691895, "loss": 1.5949, "nll_loss": 1.5488578081130981, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094009041786194, "rewards/margins": 0.044605158269405365, "rewards/rejected": -0.1955452561378479, "step": 379 }, { "epoch": 1.0469451156368657, "grad_norm": 0.1884365975856781, "learning_rate": 4.120570988269472e-06, "log_odds_chosen": 0.6451675891876221, "log_odds_ratio": -0.4251843988895416, "logits/chosen": -0.28254202008247375, "logits/rejected": -1.3985097408294678, "logps/chosen": -1.5068163871765137, "logps/rejected": -2.0416600704193115, "loss": 1.5559, "nll_loss": 1.5133352279663086, "rewards/accuracies": 1.0, "rewards/chosen": -0.15068164467811584, "rewards/margins": 0.053484346717596054, "rewards/rejected": -0.2041660100221634, "step": 380 }, { "epoch": 1.0497065930272695, "grad_norm": 0.20361238718032837, "learning_rate": 4.114460240588101e-06, "log_odds_chosen": 0.8017259836196899, "log_odds_ratio": -0.3787066638469696, "logits/chosen": -0.18577685952186584, "logits/rejected": -1.1138646602630615, "logps/chosen": -1.5362298488616943, "logps/rejected": -2.2140719890594482, "loss": 1.6003, "nll_loss": 1.5624499320983887, "rewards/accuracies": 1.0, "rewards/chosen": -0.15362299978733063, "rewards/margins": 0.06778421998023987, "rewards/rejected": -0.2214072048664093, "step": 381 }, { "epoch": 1.0524680704176734, "grad_norm": 0.18294233083724976, "learning_rate": 4.1083329018298356e-06, "log_odds_chosen": 0.7440272569656372, "log_odds_ratio": -0.39435288310050964, "logits/chosen": -0.18778713047504425, "logits/rejected": -1.1963489055633545, "logps/chosen": -1.4442341327667236, "logps/rejected": -2.0572502613067627, "loss": 1.5068, "nll_loss": 1.4673796892166138, "rewards/accuracies": 1.0, "rewards/chosen": -0.14442341029644012, "rewards/margins": 0.06130162253975868, "rewards/rejected": -0.2057250291109085, "step": 382 }, { "epoch": 1.0552295478080773, "grad_norm": 0.19120609760284424, "learning_rate": 4.102189034962561e-06, "log_odds_chosen": 0.6499487161636353, "log_odds_ratio": -0.4234417676925659, "logits/chosen": -0.12440590560436249, "logits/rejected": -1.0700702667236328, "logps/chosen": -1.5787014961242676, "logps/rejected": -2.125746726989746, "loss": 1.6325, "nll_loss": 1.5901223421096802, "rewards/accuracies": 1.0, "rewards/chosen": -0.15787014365196228, "rewards/margins": 0.05470450222492218, "rewards/rejected": -0.21257463097572327, "step": 383 }, { "epoch": 1.0579910251984812, "grad_norm": 0.20611584186553955, "learning_rate": 4.096028703124014e-06, "log_odds_chosen": 0.6581447720527649, "log_odds_ratio": -0.427496999502182, "logits/chosen": -0.243414044380188, "logits/rejected": -1.0197745561599731, "logps/chosen": -1.5716708898544312, "logps/rejected": -2.1266674995422363, "loss": 1.6172, "nll_loss": 1.5744341611862183, "rewards/accuracies": 1.0, "rewards/chosen": -0.15716709196567535, "rewards/margins": 0.055499687790870667, "rewards/rejected": -0.21266677975654602, "step": 384 }, { "epoch": 1.060752502588885, "grad_norm": 0.19265590608119965, "learning_rate": 4.089851969621138e-06, "log_odds_chosen": 0.7516465783119202, "log_odds_ratio": -0.4052088260650635, "logits/chosen": -0.19772981107234955, "logits/rejected": -1.2006863355636597, "logps/chosen": -1.4199714660644531, "logps/rejected": -2.0437204837799072, "loss": 1.5074, "nll_loss": 1.466860055923462, "rewards/accuracies": 1.0, "rewards/chosen": -0.14199714362621307, "rewards/margins": 0.062374889850616455, "rewards/rejected": -0.20437204837799072, "step": 385 }, { "epoch": 1.063513979979289, "grad_norm": 0.21174004673957825, "learning_rate": 4.083658897929425e-06, "log_odds_chosen": 0.6503540277481079, "log_odds_ratio": -0.4217776358127594, "logits/chosen": -0.2460387498140335, "logits/rejected": -1.1329982280731201, "logps/chosen": -1.5417426824569702, "logps/rejected": -2.0824155807495117, "loss": 1.605, "nll_loss": 1.5628407001495361, "rewards/accuracies": 1.0, "rewards/chosen": -0.15417428314685822, "rewards/margins": 0.05406728759407997, "rewards/rejected": -0.2082415521144867, "step": 386 }, { "epoch": 1.0662754573696929, "grad_norm": 0.19454148411750793, "learning_rate": 4.077449551692268e-06, "log_odds_chosen": 0.5684500336647034, "log_odds_ratio": -0.45333224534988403, "logits/chosen": -0.13818205893039703, "logits/rejected": -1.2238332033157349, "logps/chosen": -1.5496622323989868, "logps/rejected": -2.0205225944519043, "loss": 1.6156, "nll_loss": 1.570224404335022, "rewards/accuracies": 1.0, "rewards/chosen": -0.15496623516082764, "rewards/margins": 0.047086022794246674, "rewards/rejected": -0.2020522505044937, "step": 387 }, { "epoch": 1.0690369347600965, "grad_norm": 0.19282881915569305, "learning_rate": 4.071223994720309e-06, "log_odds_chosen": 0.7608233690261841, "log_odds_ratio": -0.3879122734069824, "logits/chosen": -0.26540690660476685, "logits/rejected": -1.2793084383010864, "logps/chosen": -1.4896953105926514, "logps/rejected": -2.125817060470581, "loss": 1.557, "nll_loss": 1.518183946609497, "rewards/accuracies": 1.0, "rewards/chosen": -0.14896953105926514, "rewards/margins": 0.06361216306686401, "rewards/rejected": -0.21258167922496796, "step": 388 }, { "epoch": 1.0717984121505004, "grad_norm": 0.18759317696094513, "learning_rate": 4.064982290990777e-06, "log_odds_chosen": 0.6020365357398987, "log_odds_ratio": -0.44241786003112793, "logits/chosen": -0.2355424463748932, "logits/rejected": -1.0852099657058716, "logps/chosen": -1.4403340816497803, "logps/rejected": -1.9294434785842896, "loss": 1.5147, "nll_loss": 1.4704298973083496, "rewards/accuracies": 1.0, "rewards/chosen": -0.14403343200683594, "rewards/margins": 0.04891093447804451, "rewards/rejected": -0.19294434785842896, "step": 389 }, { "epoch": 1.0745598895409043, "grad_norm": 0.19443197548389435, "learning_rate": 4.058724504646834e-06, "log_odds_chosen": 0.725110650062561, "log_odds_ratio": -0.4022667109966278, "logits/chosen": -0.25374311208724976, "logits/rejected": -1.207698106765747, "logps/chosen": -1.4946649074554443, "logps/rejected": -2.0988166332244873, "loss": 1.5592, "nll_loss": 1.518977403640747, "rewards/accuracies": 1.0, "rewards/chosen": -0.14946648478507996, "rewards/margins": 0.060415178537368774, "rewards/rejected": -0.20988167822360992, "step": 390 }, { "epoch": 1.0773213669313082, "grad_norm": 0.19158363342285156, "learning_rate": 4.0524506999969185e-06, "log_odds_chosen": 0.5608286261558533, "log_odds_ratio": -0.4582407772541046, "logits/chosen": -0.21252039074897766, "logits/rejected": -1.3294453620910645, "logps/chosen": -1.54707670211792, "logps/rejected": -2.0072052478790283, "loss": 1.606, "nll_loss": 1.560204029083252, "rewards/accuracies": 1.0, "rewards/chosen": -0.15470768511295319, "rewards/margins": 0.04601283743977547, "rewards/rejected": -0.20072051882743835, "step": 391 }, { "epoch": 1.080082844321712, "grad_norm": 0.18796156346797943, "learning_rate": 4.046160941514079e-06, "log_odds_chosen": 0.7382424473762512, "log_odds_ratio": -0.3966708481311798, "logits/chosen": -0.1265019178390503, "logits/rejected": -0.962821900844574, "logps/chosen": -1.4349395036697388, "logps/rejected": -2.038311719894409, "loss": 1.4915, "nll_loss": 1.4517992734909058, "rewards/accuracies": 1.0, "rewards/chosen": -0.14349395036697388, "rewards/margins": 0.06033723056316376, "rewards/rejected": -0.20383116602897644, "step": 392 }, { "epoch": 1.082844321712116, "grad_norm": 0.18811720609664917, "learning_rate": 4.039855293835316e-06, "log_odds_chosen": 0.6325368881225586, "log_odds_ratio": -0.4284062683582306, "logits/chosen": -0.2596244513988495, "logits/rejected": -1.2793852090835571, "logps/chosen": -1.4744428396224976, "logps/rejected": -1.99100923538208, "loss": 1.5311, "nll_loss": 1.4882373809814453, "rewards/accuracies": 1.0, "rewards/chosen": -0.14744427800178528, "rewards/margins": 0.05165664479136467, "rewards/rejected": -0.19910094141960144, "step": 393 }, { "epoch": 1.0856057991025199, "grad_norm": 0.17588721215724945, "learning_rate": 4.033533821760917e-06, "log_odds_chosen": 0.8095629215240479, "log_odds_ratio": -0.3785027265548706, "logits/chosen": -0.2389419972896576, "logits/rejected": -1.3366788625717163, "logps/chosen": -1.4380571842193604, "logps/rejected": -2.1090192794799805, "loss": 1.5041, "nll_loss": 1.4662492275238037, "rewards/accuracies": 1.0, "rewards/chosen": -0.14380571246147156, "rewards/margins": 0.06709621846675873, "rewards/rejected": -0.21090193092823029, "step": 394 }, { "epoch": 1.0883672764929238, "grad_norm": 0.18705010414123535, "learning_rate": 4.027196590253786e-06, "log_odds_chosen": 0.5284130573272705, "log_odds_ratio": -0.469849556684494, "logits/chosen": -0.17493271827697754, "logits/rejected": -1.0048437118530273, "logps/chosen": -1.4886696338653564, "logps/rejected": -1.9172314405441284, "loss": 1.5668, "nll_loss": 1.5198521614074707, "rewards/accuracies": 1.0, "rewards/chosen": -0.1488669514656067, "rewards/margins": 0.04285619407892227, "rewards/rejected": -0.19172316789627075, "step": 395 }, { "epoch": 1.0911287538833276, "grad_norm": 0.1907300055027008, "learning_rate": 4.020843664438783e-06, "log_odds_chosen": 0.6281604766845703, "log_odds_ratio": -0.43829861283302307, "logits/chosen": -0.19279785454273224, "logits/rejected": -1.2376521825790405, "logps/chosen": -1.4428083896636963, "logps/rejected": -1.9556207656860352, "loss": 1.5237, "nll_loss": 1.4798742532730103, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442808359861374, "rewards/margins": 0.05128122866153717, "rewards/rejected": -0.19556206464767456, "step": 396 }, { "epoch": 1.0938902312737315, "grad_norm": 0.18668295443058014, "learning_rate": 4.01447510960205e-06, "log_odds_chosen": 0.7544000148773193, "log_odds_ratio": -0.39146313071250916, "logits/chosen": -0.2915095388889313, "logits/rejected": -1.2768161296844482, "logps/chosen": -1.5208818912506104, "logps/rejected": -2.149923801422119, "loss": 1.5759, "nll_loss": 1.5367605686187744, "rewards/accuracies": 1.0, "rewards/chosen": -0.1520881950855255, "rewards/margins": 0.06290420144796371, "rewards/rejected": -0.21499237418174744, "step": 397 }, { "epoch": 1.0966517086641354, "grad_norm": 0.1973486691713333, "learning_rate": 4.008090991190341e-06, "log_odds_chosen": 0.5326504111289978, "log_odds_ratio": -0.46492505073547363, "logits/chosen": -0.09128017723560333, "logits/rejected": -1.2047038078308105, "logps/chosen": -1.5613563060760498, "logps/rejected": -2.0032505989074707, "loss": 1.6159, "nll_loss": 1.569425344467163, "rewards/accuracies": 1.0, "rewards/chosen": -0.15613561868667603, "rewards/margins": 0.0441894605755806, "rewards/rejected": -0.20032507181167603, "step": 398 }, { "epoch": 1.099413186054539, "grad_norm": 0.17714130878448486, "learning_rate": 4.001691374810352e-06, "log_odds_chosen": 0.815579354763031, "log_odds_ratio": -0.3770541846752167, "logits/chosen": -0.1730899065732956, "logits/rejected": -1.437078833580017, "logps/chosen": -1.513384222984314, "logps/rejected": -2.199054718017578, "loss": 1.5737, "nll_loss": 1.5360060930252075, "rewards/accuracies": 1.0, "rewards/chosen": -0.15133842825889587, "rewards/margins": 0.06856706738471985, "rewards/rejected": -0.21990549564361572, "step": 399 }, { "epoch": 1.102174663444943, "grad_norm": 0.16967174410820007, "learning_rate": 3.99527632622804e-06, "log_odds_chosen": 0.694899320602417, "log_odds_ratio": -0.4119797945022583, "logits/chosen": -0.21181365847587585, "logits/rejected": -1.1213148832321167, "logps/chosen": -1.4379510879516602, "logps/rejected": -2.007990837097168, "loss": 1.5198, "nll_loss": 1.478610634803772, "rewards/accuracies": 1.0, "rewards/chosen": -0.14379511773586273, "rewards/margins": 0.05700398236513138, "rewards/rejected": -0.2007990926504135, "step": 400 }, { "epoch": 1.1049361408353469, "grad_norm": 0.1991468220949173, "learning_rate": 3.988845911367957e-06, "log_odds_chosen": 0.665392279624939, "log_odds_ratio": -0.423291951417923, "logits/chosen": -0.19501212239265442, "logits/rejected": -1.1544811725616455, "logps/chosen": -1.5978668928146362, "logps/rejected": -2.159170389175415, "loss": 1.6409, "nll_loss": 1.598615050315857, "rewards/accuracies": 1.0, "rewards/chosen": -0.15978670120239258, "rewards/margins": 0.05613034963607788, "rewards/rejected": -0.21591705083847046, "step": 401 }, { "epoch": 1.1076976182257507, "grad_norm": 0.17201201617717743, "learning_rate": 3.982400196312565e-06, "log_odds_chosen": 0.7169694900512695, "log_odds_ratio": -0.39898163080215454, "logits/chosen": -0.18280558288097382, "logits/rejected": -1.0723071098327637, "logps/chosen": -1.4075336456298828, "logps/rejected": -1.985740303993225, "loss": 1.4851, "nll_loss": 1.4452052116394043, "rewards/accuracies": 1.0, "rewards/chosen": -0.140753373503685, "rewards/margins": 0.05782065540552139, "rewards/rejected": -0.198574036359787, "step": 402 }, { "epoch": 1.1104590956161546, "grad_norm": 0.17941723763942719, "learning_rate": 3.975939247301558e-06, "log_odds_chosen": 0.6592923998832703, "log_odds_ratio": -0.420391321182251, "logits/chosen": -0.22413836419582367, "logits/rejected": -1.327505111694336, "logps/chosen": -1.541399598121643, "logps/rejected": -2.0909154415130615, "loss": 1.6127, "nll_loss": 1.570648193359375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541399508714676, "rewards/margins": 0.05495157092809677, "rewards/rejected": -0.20909152925014496, "step": 403 }, { "epoch": 1.1132205730065585, "grad_norm": 0.17807722091674805, "learning_rate": 3.969463130731183e-06, "log_odds_chosen": 0.6306886076927185, "log_odds_ratio": -0.43381085991859436, "logits/chosen": -0.14202114939689636, "logits/rejected": -1.2107291221618652, "logps/chosen": -1.4741935729980469, "logps/rejected": -1.9913864135742188, "loss": 1.5431, "nll_loss": 1.4997388124465942, "rewards/accuracies": 1.0, "rewards/chosen": -0.14741936326026917, "rewards/margins": 0.051719292998313904, "rewards/rejected": -0.19913865625858307, "step": 404 }, { "epoch": 1.1159820503969624, "grad_norm": 0.1876784712076187, "learning_rate": 3.9629719131535595e-06, "log_odds_chosen": 0.5967572331428528, "log_odds_ratio": -0.4506050944328308, "logits/chosen": -0.24889226257801056, "logits/rejected": -1.0915982723236084, "logps/chosen": -1.4914859533309937, "logps/rejected": -1.9845982789993286, "loss": 1.5603, "nll_loss": 1.5152877569198608, "rewards/accuracies": 1.0, "rewards/chosen": -0.1491485983133316, "rewards/margins": 0.049311213195323944, "rewards/rejected": -0.19845981895923615, "step": 405 }, { "epoch": 1.1187435277873663, "grad_norm": 0.16931727528572083, "learning_rate": 3.9564656612759904e-06, "log_odds_chosen": 0.6932112574577332, "log_odds_ratio": -0.4113208055496216, "logits/chosen": -0.1822027862071991, "logits/rejected": -1.3579068183898926, "logps/chosen": -1.496368646621704, "logps/rejected": -2.0719945430755615, "loss": 1.563, "nll_loss": 1.521822214126587, "rewards/accuracies": 1.0, "rewards/chosen": -0.14963684976100922, "rewards/margins": 0.05756259709596634, "rewards/rejected": -0.20719945430755615, "step": 406 }, { "epoch": 1.1215050051777702, "grad_norm": 0.18602602183818817, "learning_rate": 3.94994444196028e-06, "log_odds_chosen": 0.7077181935310364, "log_odds_ratio": -0.4127015471458435, "logits/chosen": -0.15379171073436737, "logits/rejected": -1.1528825759887695, "logps/chosen": -1.5301098823547363, "logps/rejected": -2.1211650371551514, "loss": 1.5721, "nll_loss": 1.5308395624160767, "rewards/accuracies": 1.0, "rewards/chosen": -0.15301097929477692, "rewards/margins": 0.0591055229306221, "rewards/rejected": -0.21211649477481842, "step": 407 }, { "epoch": 1.124266482568174, "grad_norm": 0.17830830812454224, "learning_rate": 3.943408322222049e-06, "log_odds_chosen": 0.7005403637886047, "log_odds_ratio": -0.41374269127845764, "logits/chosen": -0.1289825439453125, "logits/rejected": -0.9252943396568298, "logps/chosen": -1.4544087648391724, "logps/rejected": -2.0350112915039062, "loss": 1.5331, "nll_loss": 1.4917351007461548, "rewards/accuracies": 1.0, "rewards/chosen": -0.14544087648391724, "rewards/margins": 0.05806024372577667, "rewards/rejected": -0.2035011202096939, "step": 408 }, { "epoch": 1.127027959958578, "grad_norm": 0.2026163637638092, "learning_rate": 3.936857369230037e-06, "log_odds_chosen": 0.5416731834411621, "log_odds_ratio": -0.4673037528991699, "logits/chosen": -0.12443944066762924, "logits/rejected": -1.1075230836868286, "logps/chosen": -1.5275638103485107, "logps/rejected": -1.9750971794128418, "loss": 1.5827, "nll_loss": 1.5359312295913696, "rewards/accuracies": 1.0, "rewards/chosen": -0.15275639295578003, "rewards/margins": 0.04475332424044609, "rewards/rejected": -0.19750970602035522, "step": 409 }, { "epoch": 1.1297894373489816, "grad_norm": 0.19184917211532593, "learning_rate": 3.930291650305424e-06, "log_odds_chosen": 0.741117000579834, "log_odds_ratio": -0.39208146929740906, "logits/chosen": -0.14100222289562225, "logits/rejected": -0.9723483920097351, "logps/chosen": -1.4884588718414307, "logps/rejected": -2.1040899753570557, "loss": 1.5337, "nll_loss": 1.494455099105835, "rewards/accuracies": 1.0, "rewards/chosen": -0.14884589612483978, "rewards/margins": 0.061563119292259216, "rewards/rejected": -0.2104090005159378, "step": 410 }, { "epoch": 1.1325509147393855, "grad_norm": 0.18398518860340118, "learning_rate": 3.92371123292113e-06, "log_odds_chosen": 0.665544867515564, "log_odds_ratio": -0.41778889298439026, "logits/chosen": -0.13632512092590332, "logits/rejected": -1.2031248807907104, "logps/chosen": -1.5206879377365112, "logps/rejected": -2.072800636291504, "loss": 1.5771, "nll_loss": 1.5352935791015625, "rewards/accuracies": 1.0, "rewards/chosen": -0.15206880867481232, "rewards/margins": 0.05521126464009285, "rewards/rejected": -0.20728005468845367, "step": 411 }, { "epoch": 1.1353123921297894, "grad_norm": 0.18211683630943298, "learning_rate": 3.917116184701125e-06, "log_odds_chosen": 0.7499443292617798, "log_odds_ratio": -0.39630863070487976, "logits/chosen": -0.17687299847602844, "logits/rejected": -1.3003441095352173, "logps/chosen": -1.4918804168701172, "logps/rejected": -2.1197023391723633, "loss": 1.5502, "nll_loss": 1.5105260610580444, "rewards/accuracies": 1.0, "rewards/chosen": -0.14918804168701172, "rewards/margins": 0.06278219819068909, "rewards/rejected": -0.2119702398777008, "step": 412 }, { "epoch": 1.1380738695201933, "grad_norm": 0.20253390073776245, "learning_rate": 3.910506573419734e-06, "log_odds_chosen": 0.9193601012229919, "log_odds_ratio": -0.34398138523101807, "logits/chosen": -0.14900241792201996, "logits/rejected": -1.0627330541610718, "logps/chosen": -1.4866719245910645, "logps/rejected": -2.259812593460083, "loss": 1.5385, "nll_loss": 1.5040783882141113, "rewards/accuracies": 1.0, "rewards/chosen": -0.14866718649864197, "rewards/margins": 0.07731407880783081, "rewards/rejected": -0.22598126530647278, "step": 413 }, { "epoch": 1.1408353469105972, "grad_norm": 0.18668848276138306, "learning_rate": 3.903882467000938e-06, "log_odds_chosen": 0.8500868678092957, "log_odds_ratio": -0.3602939248085022, "logits/chosen": -0.13838572800159454, "logits/rejected": -1.1434731483459473, "logps/chosen": -1.5281840562820435, "logps/rejected": -2.244469404220581, "loss": 1.5785, "nll_loss": 1.542461633682251, "rewards/accuracies": 1.0, "rewards/chosen": -0.15281839668750763, "rewards/margins": 0.07162855565547943, "rewards/rejected": -0.22444695234298706, "step": 414 }, { "epoch": 1.143596824301001, "grad_norm": 0.18311648070812225, "learning_rate": 3.897243933517679e-06, "log_odds_chosen": 0.7283087968826294, "log_odds_ratio": -0.40107017755508423, "logits/chosen": -0.2569331228733063, "logits/rejected": -1.2123123407363892, "logps/chosen": -1.4449797868728638, "logps/rejected": -2.044313669204712, "loss": 1.5036, "nll_loss": 1.463518738746643, "rewards/accuracies": 1.0, "rewards/chosen": -0.14449797570705414, "rewards/margins": 0.05993340164422989, "rewards/rejected": -0.20443139970302582, "step": 415 }, { "epoch": 1.146358301691405, "grad_norm": 0.18268708884716034, "learning_rate": 3.890591041191162e-06, "log_odds_chosen": 0.8010820150375366, "log_odds_ratio": -0.38091176748275757, "logits/chosen": -0.15429654717445374, "logits/rejected": -1.0080270767211914, "logps/chosen": -1.4056251049041748, "logps/rejected": -2.0608270168304443, "loss": 1.4656, "nll_loss": 1.4274916648864746, "rewards/accuracies": 1.0, "rewards/chosen": -0.1405625194311142, "rewards/margins": 0.06552018225193024, "rewards/rejected": -0.20608270168304443, "step": 416 }, { "epoch": 1.1491197790818088, "grad_norm": 0.198701411485672, "learning_rate": 3.883923858390149e-06, "log_odds_chosen": 0.818614661693573, "log_odds_ratio": -0.36802011728286743, "logits/chosen": -0.189392551779747, "logits/rejected": -1.2750192880630493, "logps/chosen": -1.5129953622817993, "logps/rejected": -2.201220750808716, "loss": 1.5794, "nll_loss": 1.5426466464996338, "rewards/accuracies": 1.0, "rewards/chosen": -0.15129955112934113, "rewards/margins": 0.06882252544164658, "rewards/rejected": -0.2201220840215683, "step": 417 }, { "epoch": 1.1518812564722127, "grad_norm": 0.17471423745155334, "learning_rate": 3.8772424536302565e-06, "log_odds_chosen": 0.6130560040473938, "log_odds_ratio": -0.4377739131450653, "logits/chosen": -0.19274210929870605, "logits/rejected": -1.2090108394622803, "logps/chosen": -1.4925827980041504, "logps/rejected": -1.9969313144683838, "loss": 1.5448, "nll_loss": 1.50102698802948, "rewards/accuracies": 1.0, "rewards/chosen": -0.14925827085971832, "rewards/margins": 0.05043485015630722, "rewards/rejected": -0.19969312846660614, "step": 418 }, { "epoch": 1.1546427338626164, "grad_norm": 0.19965821504592896, "learning_rate": 3.870546895573258e-06, "log_odds_chosen": 0.7444138526916504, "log_odds_ratio": -0.39596325159072876, "logits/chosen": -0.24373914301395416, "logits/rejected": -0.921118438243866, "logps/chosen": -1.4715092182159424, "logps/rejected": -2.086350202560425, "loss": 1.5252, "nll_loss": 1.4855574369430542, "rewards/accuracies": 1.0, "rewards/chosen": -0.147150918841362, "rewards/margins": 0.06148412078619003, "rewards/rejected": -0.20863503217697144, "step": 419 }, { "epoch": 1.1574042112530203, "grad_norm": 0.18526706099510193, "learning_rate": 3.863837253026372e-06, "log_odds_chosen": 0.8123873472213745, "log_odds_ratio": -0.37982794642448425, "logits/chosen": -0.23291738331317902, "logits/rejected": -1.1363976001739502, "logps/chosen": -1.4304343461990356, "logps/rejected": -2.1056971549987793, "loss": 1.4989, "nll_loss": 1.460911512374878, "rewards/accuracies": 1.0, "rewards/chosen": -0.1430434137582779, "rewards/margins": 0.06752629578113556, "rewards/rejected": -0.21056970953941345, "step": 420 }, { "epoch": 1.1601656886434242, "grad_norm": 0.18150660395622253, "learning_rate": 3.857113594941556e-06, "log_odds_chosen": 0.6511182188987732, "log_odds_ratio": -0.4259107708930969, "logits/chosen": -0.19661128520965576, "logits/rejected": -1.3280483484268188, "logps/chosen": -1.527599811553955, "logps/rejected": -2.070669174194336, "loss": 1.5857, "nll_loss": 1.5431334972381592, "rewards/accuracies": 1.0, "rewards/chosen": -0.15275999903678894, "rewards/margins": 0.054306939244270325, "rewards/rejected": -0.20706692337989807, "step": 421 }, { "epoch": 1.162927166033828, "grad_norm": 0.18328295648097992, "learning_rate": 3.8503759904148005e-06, "log_odds_chosen": 0.8013736605644226, "log_odds_ratio": -0.3758004903793335, "logits/chosen": -0.16402505338191986, "logits/rejected": -1.2019381523132324, "logps/chosen": -1.4596202373504639, "logps/rejected": -2.1256799697875977, "loss": 1.5176, "nll_loss": 1.479976773262024, "rewards/accuracies": 1.0, "rewards/chosen": -0.14596202969551086, "rewards/margins": 0.06660597026348114, "rewards/rejected": -0.2125680148601532, "step": 422 }, { "epoch": 1.165688643424232, "grad_norm": 0.18774062395095825, "learning_rate": 3.843624508685416e-06, "log_odds_chosen": 0.6741689443588257, "log_odds_ratio": -0.41605666279792786, "logits/chosen": -0.20964159071445465, "logits/rejected": -1.2953227758407593, "logps/chosen": -1.5373965501785278, "logps/rejected": -2.098203659057617, "loss": 1.6025, "nll_loss": 1.5609112977981567, "rewards/accuracies": 1.0, "rewards/chosen": -0.15373964607715607, "rewards/margins": 0.05608072504401207, "rewards/rejected": -0.20982035994529724, "step": 423 }, { "epoch": 1.1684501208146358, "grad_norm": 0.1771015226840973, "learning_rate": 3.8368592191353246e-06, "log_odds_chosen": 0.7563647031784058, "log_odds_ratio": -0.3966205418109894, "logits/chosen": -0.19912122189998627, "logits/rejected": -1.2726441621780396, "logps/chosen": -1.4663722515106201, "logps/rejected": -2.0917129516601562, "loss": 1.5165, "nll_loss": 1.476802945137024, "rewards/accuracies": 1.0, "rewards/chosen": -0.1466372162103653, "rewards/margins": 0.06253407895565033, "rewards/rejected": -0.20917129516601562, "step": 424 }, { "epoch": 1.1712115982050397, "grad_norm": 0.17853626608848572, "learning_rate": 3.830080191288342e-06, "log_odds_chosen": 0.672751784324646, "log_odds_ratio": -0.41323938965797424, "logits/chosen": -0.1510731726884842, "logits/rejected": -1.1135554313659668, "logps/chosen": -1.4936147928237915, "logps/rejected": -2.0494277477264404, "loss": 1.5598, "nll_loss": 1.518498420715332, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493614763021469, "rewards/margins": 0.05558129400014877, "rewards/rejected": -0.20494277775287628, "step": 425 }, { "epoch": 1.1739730755954436, "grad_norm": 0.19094951450824738, "learning_rate": 3.823287494809469e-06, "log_odds_chosen": 0.7997311949729919, "log_odds_ratio": -0.3781062364578247, "logits/chosen": -0.25045046210289, "logits/rejected": -1.166719913482666, "logps/chosen": -1.472108244895935, "logps/rejected": -2.1340489387512207, "loss": 1.5392, "nll_loss": 1.501369595527649, "rewards/accuracies": 1.0, "rewards/chosen": -0.14721082150936127, "rewards/margins": 0.0661940723657608, "rewards/rejected": -0.21340489387512207, "step": 426 }, { "epoch": 1.1767345529858475, "grad_norm": 0.1843455731868744, "learning_rate": 3.816481199504171e-06, "log_odds_chosen": 0.6943097114562988, "log_odds_ratio": -0.4069713354110718, "logits/chosen": -0.2397070825099945, "logits/rejected": -1.1366225481033325, "logps/chosen": -1.5034641027450562, "logps/rejected": -2.079597234725952, "loss": 1.5747, "nll_loss": 1.5340169668197632, "rewards/accuracies": 1.0, "rewards/chosen": -0.15034641325473785, "rewards/margins": 0.057613298296928406, "rewards/rejected": -0.20795971155166626, "step": 427 }, { "epoch": 1.1794960303762512, "grad_norm": 0.17388851940631866, "learning_rate": 3.8096613753176635e-06, "log_odds_chosen": 0.6214280128479004, "log_odds_ratio": -0.436628133058548, "logits/chosen": -0.1547197550535202, "logits/rejected": -1.0357825756072998, "logps/chosen": -1.3858633041381836, "logps/rejected": -1.8819453716278076, "loss": 1.4633, "nll_loss": 1.4196813106536865, "rewards/accuracies": 1.0, "rewards/chosen": -0.13858634233474731, "rewards/margins": 0.04960820823907852, "rewards/rejected": -0.18819454312324524, "step": 428 }, { "epoch": 1.1822575077666553, "grad_norm": 0.1828448623418808, "learning_rate": 3.8028280923341927e-06, "log_odds_chosen": 0.7184998393058777, "log_odds_ratio": -0.40824440121650696, "logits/chosen": -0.2581023573875427, "logits/rejected": -1.0807780027389526, "logps/chosen": -1.4268940687179565, "logps/rejected": -2.0194613933563232, "loss": 1.4935, "nll_loss": 1.4526987075805664, "rewards/accuracies": 1.0, "rewards/chosen": -0.14268942177295685, "rewards/margins": 0.05925672873854637, "rewards/rejected": -0.20194613933563232, "step": 429 }, { "epoch": 1.185018985157059, "grad_norm": 0.2609389126300812, "learning_rate": 3.7959814207763134e-06, "log_odds_chosen": 0.8415360450744629, "log_odds_ratio": -0.366547167301178, "logits/chosen": -0.2016637921333313, "logits/rejected": -1.2747056484222412, "logps/chosen": -1.539664387702942, "logps/rejected": -2.2510623931884766, "loss": 1.5835, "nll_loss": 1.5468266010284424, "rewards/accuracies": 1.0, "rewards/chosen": -0.15396642684936523, "rewards/margins": 0.07113979011774063, "rewards/rejected": -0.22510623931884766, "step": 430 }, { "epoch": 1.1877804625474628, "grad_norm": 0.18396784365177155, "learning_rate": 3.789121431004168e-06, "log_odds_chosen": 0.6930549144744873, "log_odds_ratio": -0.4097321629524231, "logits/chosen": -0.24285678565502167, "logits/rejected": -1.232874870300293, "logps/chosen": -1.5071865320205688, "logps/rejected": -2.07889986038208, "loss": 1.5539, "nll_loss": 1.5129239559173584, "rewards/accuracies": 1.0, "rewards/chosen": -0.15071865916252136, "rewards/margins": 0.05717131495475769, "rewards/rejected": -0.20788997411727905, "step": 431 }, { "epoch": 1.1905419399378667, "grad_norm": 0.1961180865764618, "learning_rate": 3.782248193514766e-06, "log_odds_chosen": 0.6803969144821167, "log_odds_ratio": -0.41614609956741333, "logits/chosen": -0.1777564138174057, "logits/rejected": -1.077903151512146, "logps/chosen": -1.4115687608718872, "logps/rejected": -1.9654287099838257, "loss": 1.4906, "nll_loss": 1.4489516019821167, "rewards/accuracies": 1.0, "rewards/chosen": -0.14115691184997559, "rewards/margins": 0.05538597330451012, "rewards/rejected": -0.1965428739786148, "step": 432 }, { "epoch": 1.1933034173282706, "grad_norm": 0.18955354392528534, "learning_rate": 3.775361778941257e-06, "log_odds_chosen": 0.7743215560913086, "log_odds_ratio": -0.3833809196949005, "logits/chosen": -0.19383692741394043, "logits/rejected": -1.1276659965515137, "logps/chosen": -1.4961435794830322, "logps/rejected": -2.144127607345581, "loss": 1.5554, "nll_loss": 1.5170167684555054, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496143639087677, "rewards/margins": 0.06479839235544205, "rewards/rejected": -0.21441277861595154, "step": 433 }, { "epoch": 1.1960648947186745, "grad_norm": 0.19281843304634094, "learning_rate": 3.7684622580522057e-06, "log_odds_chosen": 0.45453017950057983, "log_odds_ratio": -0.49613097310066223, "logits/chosen": -0.1650485396385193, "logits/rejected": -1.2051373720169067, "logps/chosen": -1.447697401046753, "logps/rejected": -1.8144056797027588, "loss": 1.5077, "nll_loss": 1.4580379724502563, "rewards/accuracies": 1.0, "rewards/chosen": -0.14476974308490753, "rewards/margins": 0.036670833826065063, "rewards/rejected": -0.1814405769109726, "step": 434 }, { "epoch": 1.1988263721090784, "grad_norm": 0.17417070269584656, "learning_rate": 3.761549701750865e-06, "log_odds_chosen": 0.802248477935791, "log_odds_ratio": -0.38353192806243896, "logits/chosen": -0.18037042021751404, "logits/rejected": -1.3770548105239868, "logps/chosen": -1.4576908349990845, "logps/rejected": -2.126838445663452, "loss": 1.5085, "nll_loss": 1.4701937437057495, "rewards/accuracies": 1.0, "rewards/chosen": -0.14576907455921173, "rewards/margins": 0.06691478192806244, "rewards/rejected": -0.21268387138843536, "step": 435 }, { "epoch": 1.2015878494994823, "grad_norm": 0.17217062413692474, "learning_rate": 3.7546241810744444e-06, "log_odds_chosen": 0.6275637149810791, "log_odds_ratio": -0.4340207278728485, "logits/chosen": -0.18775464594364166, "logits/rejected": -0.9219359159469604, "logps/chosen": -1.4593117237091064, "logps/rejected": -1.9751626253128052, "loss": 1.5097, "nll_loss": 1.4663246870040894, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459311693906784, "rewards/margins": 0.05158507823944092, "rewards/rejected": -0.19751626253128052, "step": 436 }, { "epoch": 1.2043493268898862, "grad_norm": 0.19680339097976685, "learning_rate": 3.747685767193385e-06, "log_odds_chosen": 0.6149947047233582, "log_odds_ratio": -0.4397943317890167, "logits/chosen": -0.24492347240447998, "logits/rejected": -1.0724200010299683, "logps/chosen": -1.5251544713974, "logps/rejected": -2.0360567569732666, "loss": 1.5969, "nll_loss": 1.5528908967971802, "rewards/accuracies": 1.0, "rewards/chosen": -0.1525154560804367, "rewards/margins": 0.05109023675322533, "rewards/rejected": -0.20360569655895233, "step": 437 }, { "epoch": 1.20711080428029, "grad_norm": 0.2228655368089676, "learning_rate": 3.740734531410626e-06, "log_odds_chosen": 0.7606922388076782, "log_odds_ratio": -0.39337587356567383, "logits/chosen": -0.2465159147977829, "logits/rejected": -1.2846262454986572, "logps/chosen": -1.448561191558838, "logps/rejected": -2.069669008255005, "loss": 1.5204, "nll_loss": 1.4810588359832764, "rewards/accuracies": 1.0, "rewards/chosen": -0.14485612511634827, "rewards/margins": 0.062110789120197296, "rewards/rejected": -0.20696690678596497, "step": 438 }, { "epoch": 1.2098722816706937, "grad_norm": 0.18277090787887573, "learning_rate": 3.7337705451608676e-06, "log_odds_chosen": 0.9290235638618469, "log_odds_ratio": -0.3463304042816162, "logits/chosen": -0.3210224509239197, "logits/rejected": -1.3052295446395874, "logps/chosen": -1.3797541856765747, "logps/rejected": -2.1371095180511475, "loss": 1.4436, "nll_loss": 1.4089810848236084, "rewards/accuracies": 1.0, "rewards/chosen": -0.13797542452812195, "rewards/margins": 0.07573550194501877, "rewards/rejected": -0.2137109339237213, "step": 439 }, { "epoch": 1.2126337590610976, "grad_norm": 0.19237011671066284, "learning_rate": 3.7267938800098454e-06, "log_odds_chosen": 0.9403778910636902, "log_odds_ratio": -0.3412759602069855, "logits/chosen": -0.4038398861885071, "logits/rejected": -1.273897409439087, "logps/chosen": -1.3479474782943726, "logps/rejected": -2.1216483116149902, "loss": 1.4158, "nll_loss": 1.3816239833831787, "rewards/accuracies": 1.0, "rewards/chosen": -0.13479475677013397, "rewards/margins": 0.0773700550198555, "rewards/rejected": -0.21216480433940887, "step": 440 }, { "epoch": 1.2153952364515015, "grad_norm": 0.2002696543931961, "learning_rate": 3.7198046076535865e-06, "log_odds_chosen": 0.7807128429412842, "log_odds_ratio": -0.3827586770057678, "logits/chosen": -0.2198580801486969, "logits/rejected": -1.0754787921905518, "logps/chosen": -1.4956470727920532, "logps/rejected": -2.1461679935455322, "loss": 1.5459, "nll_loss": 1.507658839225769, "rewards/accuracies": 1.0, "rewards/chosen": -0.1495647132396698, "rewards/margins": 0.0650520920753479, "rewards/rejected": -0.2146168202161789, "step": 441 }, { "epoch": 1.2181567138419054, "grad_norm": 0.18718883395195007, "learning_rate": 3.71280279991768e-06, "log_odds_chosen": 0.8355327248573303, "log_odds_ratio": -0.36628708243370056, "logits/chosen": -0.27813613414764404, "logits/rejected": -1.077792763710022, "logps/chosen": -1.4404205083847046, "logps/rejected": -2.134765625, "loss": 1.5121, "nll_loss": 1.4754693508148193, "rewards/accuracies": 1.0, "rewards/chosen": -0.1440420299768448, "rewards/margins": 0.0694345235824585, "rewards/rejected": -0.21347656846046448, "step": 442 }, { "epoch": 1.2209181912323093, "grad_norm": 0.17261095345020294, "learning_rate": 3.705788528756533e-06, "log_odds_chosen": 0.8777885437011719, "log_odds_ratio": -0.3522123098373413, "logits/chosen": -0.30880072712898254, "logits/rejected": -1.328255534172058, "logps/chosen": -1.4321343898773193, "logps/rejected": -2.1607189178466797, "loss": 1.4899, "nll_loss": 1.4546407461166382, "rewards/accuracies": 1.0, "rewards/chosen": -0.1432134509086609, "rewards/margins": 0.07285845279693604, "rewards/rejected": -0.21607188880443573, "step": 443 }, { "epoch": 1.2236796686227132, "grad_norm": 0.2517964541912079, "learning_rate": 3.698761866252635e-06, "log_odds_chosen": 0.6931225061416626, "log_odds_ratio": -0.42155617475509644, "logits/chosen": -0.262115478515625, "logits/rejected": -1.257841944694519, "logps/chosen": -1.5295952558517456, "logps/rejected": -2.1139965057373047, "loss": 1.5875, "nll_loss": 1.5453757047653198, "rewards/accuracies": 1.0, "rewards/chosen": -0.15295952558517456, "rewards/margins": 0.05844012275338173, "rewards/rejected": -0.211399644613266, "step": 444 }, { "epoch": 1.226441146013117, "grad_norm": 0.19526347517967224, "learning_rate": 3.691722884615814e-06, "log_odds_chosen": 0.6913182735443115, "log_odds_ratio": -0.41308411955833435, "logits/chosen": -0.17629948258399963, "logits/rejected": -0.7438918352127075, "logps/chosen": -1.4464210271835327, "logps/rejected": -2.0083394050598145, "loss": 1.5191, "nll_loss": 1.4777559041976929, "rewards/accuracies": 1.0, "rewards/chosen": -0.14464209973812103, "rewards/margins": 0.05619185417890549, "rewards/rejected": -0.20083396136760712, "step": 445 }, { "epoch": 1.229202623403521, "grad_norm": 0.1902356892824173, "learning_rate": 3.684671656182497e-06, "log_odds_chosen": 0.8285303115844727, "log_odds_ratio": -0.36952173709869385, "logits/chosen": -0.2879321277141571, "logits/rejected": -1.2174959182739258, "logps/chosen": -1.4798123836517334, "logps/rejected": -2.1687145233154297, "loss": 1.5442, "nll_loss": 1.507254719734192, "rewards/accuracies": 1.0, "rewards/chosen": -0.14798125624656677, "rewards/margins": 0.06889019906520844, "rewards/rejected": -0.216871440410614, "step": 446 }, { "epoch": 1.2319641007939248, "grad_norm": 0.19624045491218567, "learning_rate": 3.6776082534149664e-06, "log_odds_chosen": 0.7684917449951172, "log_odds_ratio": -0.3862813413143158, "logits/chosen": -0.272920161485672, "logits/rejected": -1.2850197553634644, "logps/chosen": -1.5005940198898315, "logps/rejected": -2.1409354209899902, "loss": 1.5377, "nll_loss": 1.4990581274032593, "rewards/accuracies": 1.0, "rewards/chosen": -0.15005940198898315, "rewards/margins": 0.06403413414955139, "rewards/rejected": -0.21409353613853455, "step": 447 }, { "epoch": 1.2347255781843287, "grad_norm": 0.1784961074590683, "learning_rate": 3.670532748900615e-06, "log_odds_chosen": 0.8203001618385315, "log_odds_ratio": -0.37937578558921814, "logits/chosen": -0.2516147494316101, "logits/rejected": -1.1510305404663086, "logps/chosen": -1.4173564910888672, "logps/rejected": -2.08891224861145, "loss": 1.4755, "nll_loss": 1.4375797510147095, "rewards/accuracies": 1.0, "rewards/chosen": -0.14173565804958344, "rewards/margins": 0.06715556979179382, "rewards/rejected": -0.20889122784137726, "step": 448 }, { "epoch": 1.2374870555747326, "grad_norm": 0.1783873736858368, "learning_rate": 3.663445215351198e-06, "log_odds_chosen": 0.9026142954826355, "log_odds_ratio": -0.35365813970565796, "logits/chosen": -0.3238006830215454, "logits/rejected": -1.0488125085830688, "logps/chosen": -1.4057360887527466, "logps/rejected": -2.1597814559936523, "loss": 1.4666, "nll_loss": 1.4311884641647339, "rewards/accuracies": 1.0, "rewards/chosen": -0.14057360589504242, "rewards/margins": 0.07540452480316162, "rewards/rejected": -0.21597814559936523, "step": 449 }, { "epoch": 1.2402485329651363, "grad_norm": 0.1859315186738968, "learning_rate": 3.656345725602089e-06, "log_odds_chosen": 0.9581824541091919, "log_odds_ratio": -0.3327292799949646, "logits/chosen": -0.22859053313732147, "logits/rejected": -1.3163652420043945, "logps/chosen": -1.5037806034088135, "logps/rejected": -2.316061019897461, "loss": 1.5517, "nll_loss": 1.5183771848678589, "rewards/accuracies": 1.0, "rewards/chosen": -0.1503780633211136, "rewards/margins": 0.08122803270816803, "rewards/rejected": -0.23160609602928162, "step": 450 }, { "epoch": 1.2430100103555402, "grad_norm": 0.19541142880916595, "learning_rate": 3.6492343526115292e-06, "log_odds_chosen": 0.7478545308113098, "log_odds_ratio": -0.3891213536262512, "logits/chosen": -0.1598832607269287, "logits/rejected": -1.259619116783142, "logps/chosen": -1.5255135297775269, "logps/rejected": -2.1506118774414062, "loss": 1.5591, "nll_loss": 1.5201823711395264, "rewards/accuracies": 1.0, "rewards/chosen": -0.15255135297775269, "rewards/margins": 0.06250984221696854, "rewards/rejected": -0.21506118774414062, "step": 451 }, { "epoch": 1.245771487745944, "grad_norm": 0.18374916911125183, "learning_rate": 3.642111169459879e-06, "log_odds_chosen": 0.5907194018363953, "log_odds_ratio": -0.4446646273136139, "logits/chosen": -0.2301802933216095, "logits/rejected": -1.1353992223739624, "logps/chosen": -1.4880855083465576, "logps/rejected": -1.973372459411621, "loss": 1.5312, "nll_loss": 1.4867122173309326, "rewards/accuracies": 1.0, "rewards/chosen": -0.148808553814888, "rewards/margins": 0.04852868244051933, "rewards/rejected": -0.19733723998069763, "step": 452 }, { "epoch": 1.248532965136348, "grad_norm": 0.17803093791007996, "learning_rate": 3.634976249348867e-06, "log_odds_chosen": 0.7315319776535034, "log_odds_ratio": -0.4015371799468994, "logits/chosen": -0.2719256281852722, "logits/rejected": -1.3226613998413086, "logps/chosen": -1.4791927337646484, "logps/rejected": -2.084644317626953, "loss": 1.54, "nll_loss": 1.4997994899749756, "rewards/accuracies": 1.0, "rewards/chosen": -0.14791928231716156, "rewards/margins": 0.06054516136646271, "rewards/rejected": -0.20846444368362427, "step": 453 }, { "epoch": 1.2512944425267518, "grad_norm": 0.2094297856092453, "learning_rate": 3.6278296656008366e-06, "log_odds_chosen": 0.9680303931236267, "log_odds_ratio": -0.3453172743320465, "logits/chosen": -0.2988622188568115, "logits/rejected": -1.1323243379592896, "logps/chosen": -1.350219964981079, "logps/rejected": -2.144009590148926, "loss": 1.4208, "nll_loss": 1.3862643241882324, "rewards/accuracies": 1.0, "rewards/chosen": -0.13502199947834015, "rewards/margins": 0.07937898486852646, "rewards/rejected": -0.21440096199512482, "step": 454 }, { "epoch": 1.2540559199171557, "grad_norm": 0.16929712891578674, "learning_rate": 3.6206714916579925e-06, "log_odds_chosen": 0.8621344566345215, "log_odds_ratio": -0.36102256178855896, "logits/chosen": -0.259186714887619, "logits/rejected": -1.2395236492156982, "logps/chosen": -1.3943541049957275, "logps/rejected": -2.1026394367218018, "loss": 1.4532, "nll_loss": 1.4170664548873901, "rewards/accuracies": 1.0, "rewards/chosen": -0.13943539559841156, "rewards/margins": 0.07082855701446533, "rewards/rejected": -0.2102639526128769, "step": 455 }, { "epoch": 1.2568173973075596, "grad_norm": 0.18482358753681183, "learning_rate": 3.613501801081648e-06, "log_odds_chosen": 0.6896051168441772, "log_odds_ratio": -0.4120646119117737, "logits/chosen": -0.24805930256843567, "logits/rejected": -1.0325236320495605, "logps/chosen": -1.4436662197113037, "logps/rejected": -2.008178472518921, "loss": 1.5044, "nll_loss": 1.463235855102539, "rewards/accuracies": 1.0, "rewards/chosen": -0.14436662197113037, "rewards/margins": 0.056451231241226196, "rewards/rejected": -0.20081785321235657, "step": 456 }, { "epoch": 1.2595788746979635, "grad_norm": 0.19444166123867035, "learning_rate": 3.606320667551466e-06, "log_odds_chosen": 0.815831184387207, "log_odds_ratio": -0.3756023645401001, "logits/chosen": -0.2379026561975479, "logits/rejected": -1.2224661111831665, "logps/chosen": -1.4957644939422607, "logps/rejected": -2.179534673690796, "loss": 1.5552, "nll_loss": 1.5175931453704834, "rewards/accuracies": 1.0, "rewards/chosen": -0.14957645535469055, "rewards/margins": 0.06837702542543411, "rewards/rejected": -0.21795348823070526, "step": 457 }, { "epoch": 1.2623403520883674, "grad_norm": 0.18903037905693054, "learning_rate": 3.599128164864706e-06, "log_odds_chosen": 0.8830035924911499, "log_odds_ratio": -0.35656800866127014, "logits/chosen": -0.3331121802330017, "logits/rejected": -1.2537503242492676, "logps/chosen": -1.4486404657363892, "logps/rejected": -2.1861586570739746, "loss": 1.4942, "nll_loss": 1.458591341972351, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448640525341034, "rewards/margins": 0.07375183701515198, "rewards/rejected": -0.21861587464809418, "step": 458 }, { "epoch": 1.265101829478771, "grad_norm": 0.19099265336990356, "learning_rate": 3.5919243669354585e-06, "log_odds_chosen": 0.653681218624115, "log_odds_ratio": -0.4278097450733185, "logits/chosen": -0.3065020442008972, "logits/rejected": -0.8541854023933411, "logps/chosen": -1.5121026039123535, "logps/rejected": -2.0558552742004395, "loss": 1.5649, "nll_loss": 1.5221350193023682, "rewards/accuracies": 1.0, "rewards/chosen": -0.1512102633714676, "rewards/margins": 0.054375264793634415, "rewards/rejected": -0.2055855393409729, "step": 459 }, { "epoch": 1.2678633068691751, "grad_norm": 0.2050788551568985, "learning_rate": 3.5847093477938955e-06, "log_odds_chosen": 0.719269871711731, "log_odds_ratio": -0.403872549533844, "logits/chosen": -0.3556126356124878, "logits/rejected": -1.1764421463012695, "logps/chosen": -1.5112565755844116, "logps/rejected": -2.108696460723877, "loss": 1.5556, "nll_loss": 1.5151852369308472, "rewards/accuracies": 1.0, "rewards/chosen": -0.15112565457820892, "rewards/margins": 0.059744007885456085, "rewards/rejected": -0.2108696550130844, "step": 460 }, { "epoch": 1.2706247842595788, "grad_norm": 0.18415868282318115, "learning_rate": 3.5774831815855017e-06, "log_odds_chosen": 0.9078155755996704, "log_odds_ratio": -0.34306374192237854, "logits/chosen": -0.34916579723358154, "logits/rejected": -1.1399445533752441, "logps/chosen": -1.4003783464431763, "logps/rejected": -2.1475114822387695, "loss": 1.4682, "nll_loss": 1.4339377880096436, "rewards/accuracies": 1.0, "rewards/chosen": -0.14003783464431763, "rewards/margins": 0.0747133195400238, "rewards/rejected": -0.21475116908550262, "step": 461 }, { "epoch": 1.2733862616499827, "grad_norm": 0.1681346744298935, "learning_rate": 3.5702459425703146e-06, "log_odds_chosen": 0.9646233320236206, "log_odds_ratio": -0.3330087959766388, "logits/chosen": -0.3065449595451355, "logits/rejected": -1.3354847431182861, "logps/chosen": -1.4999949932098389, "logps/rejected": -2.319182872772217, "loss": 1.5538, "nll_loss": 1.5204527378082275, "rewards/accuracies": 1.0, "rewards/chosen": -0.14999951422214508, "rewards/margins": 0.08191878348588943, "rewards/rejected": -0.23191829025745392, "step": 462 }, { "epoch": 1.2761477390403866, "grad_norm": 0.18663005530834198, "learning_rate": 3.562997705122162e-06, "log_odds_chosen": 0.8870431184768677, "log_odds_ratio": -0.34840965270996094, "logits/chosen": -0.2869050204753876, "logits/rejected": -1.1682400703430176, "logps/chosen": -1.4178146123886108, "logps/rejected": -2.1509153842926025, "loss": 1.4721, "nll_loss": 1.4372905492782593, "rewards/accuracies": 1.0, "rewards/chosen": -0.14178146421909332, "rewards/margins": 0.07331006228923798, "rewards/rejected": -0.2150915116071701, "step": 463 }, { "epoch": 1.2789092164307905, "grad_norm": 0.17033839225769043, "learning_rate": 3.5557385437279e-06, "log_odds_chosen": 0.923761785030365, "log_odds_ratio": -0.3432496190071106, "logits/chosen": -0.2502524256706238, "logits/rejected": -0.997616708278656, "logps/chosen": -1.4954802989959717, "logps/rejected": -2.2727675437927246, "loss": 1.5311, "nll_loss": 1.4967255592346191, "rewards/accuracies": 1.0, "rewards/chosen": -0.1495480239391327, "rewards/margins": 0.07772872596979141, "rewards/rejected": -0.2272767573595047, "step": 464 }, { "epoch": 1.2816706938211944, "grad_norm": 0.195389986038208, "learning_rate": 3.5484685329866424e-06, "log_odds_chosen": 0.6268041133880615, "log_odds_ratio": -0.4326058626174927, "logits/chosen": -0.2863166928291321, "logits/rejected": -1.0226547718048096, "logps/chosen": -1.5096933841705322, "logps/rejected": -2.028193950653076, "loss": 1.5479, "nll_loss": 1.5046144723892212, "rewards/accuracies": 1.0, "rewards/chosen": -0.15096935629844666, "rewards/margins": 0.05185003951191902, "rewards/rejected": -0.20281939208507538, "step": 465 }, { "epoch": 1.2844321712115983, "grad_norm": 0.17330817878246307, "learning_rate": 3.541187747608998e-06, "log_odds_chosen": 1.0745373964309692, "log_odds_ratio": -0.31021982431411743, "logits/chosen": -0.37704816460609436, "logits/rejected": -1.22407865524292, "logps/chosen": -1.5697916746139526, "logps/rejected": -2.4969704151153564, "loss": 1.5937, "nll_loss": 1.562636375427246, "rewards/accuracies": 1.0, "rewards/chosen": -0.15697917342185974, "rewards/margins": 0.09271789342164993, "rewards/rejected": -0.24969705939292908, "step": 466 }, { "epoch": 1.2871936486020021, "grad_norm": 0.20620717108249664, "learning_rate": 3.533896262416302e-06, "log_odds_chosen": 0.8361949920654297, "log_odds_ratio": -0.36747199296951294, "logits/chosen": -0.31524112820625305, "logits/rejected": -1.093992829322815, "logps/chosen": -1.4830838441848755, "logps/rejected": -2.1828489303588867, "loss": 1.5323, "nll_loss": 1.4955836534500122, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483083963394165, "rewards/margins": 0.06997650861740112, "rewards/rejected": -0.21828490495681763, "step": 467 }, { "epoch": 1.2899551259924058, "grad_norm": 0.20733030140399933, "learning_rate": 3.5265941523398455e-06, "log_odds_chosen": 0.9227453470230103, "log_odds_ratio": -0.3383994400501251, "logits/chosen": -0.3382907211780548, "logits/rejected": -1.0753273963928223, "logps/chosen": -1.5061396360397339, "logps/rejected": -2.2841649055480957, "loss": 1.5394, "nll_loss": 1.5055965185165405, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506139636039734, "rewards/margins": 0.07780253887176514, "rewards/rejected": -0.22841650247573853, "step": 468 }, { "epoch": 1.29271660338281, "grad_norm": 0.1836915910243988, "learning_rate": 3.519281492420108e-06, "log_odds_chosen": 0.5979352593421936, "log_odds_ratio": -0.446799635887146, "logits/chosen": -0.29712000489234924, "logits/rejected": -1.041902780532837, "logps/chosen": -1.5359019041061401, "logps/rejected": -2.0312960147857666, "loss": 1.5753, "nll_loss": 1.5306230783462524, "rewards/accuracies": 1.0, "rewards/chosen": -0.15359018743038177, "rewards/margins": 0.04953942447900772, "rewards/rejected": -0.2031296193599701, "step": 469 }, { "epoch": 1.2954780807732136, "grad_norm": 0.19433459639549255, "learning_rate": 3.5119583578059845e-06, "log_odds_chosen": 0.7800890803337097, "log_odds_ratio": -0.3985646963119507, "logits/chosen": -0.2822642922401428, "logits/rejected": -1.1507655382156372, "logps/chosen": -1.6121432781219482, "logps/rejected": -2.281339406967163, "loss": 1.6286, "nll_loss": 1.588757038116455, "rewards/accuracies": 1.0, "rewards/chosen": -0.16121432185173035, "rewards/margins": 0.06691960990428925, "rewards/rejected": -0.2281339317560196, "step": 470 }, { "epoch": 1.2982395581636175, "grad_norm": 0.1745569258928299, "learning_rate": 3.504624823754014e-06, "log_odds_chosen": 0.9689381122589111, "log_odds_ratio": -0.32553234696388245, "logits/chosen": -0.3070724904537201, "logits/rejected": -1.4188494682312012, "logps/chosen": -1.4123187065124512, "logps/rejected": -2.2118630409240723, "loss": 1.4628, "nll_loss": 1.4302698373794556, "rewards/accuracies": 1.0, "rewards/chosen": -0.14123189449310303, "rewards/margins": 0.07995443046092987, "rewards/rejected": -0.2211862951517105, "step": 471 }, { "epoch": 1.3010010355540214, "grad_norm": 0.18898555636405945, "learning_rate": 3.4972809656276047e-06, "log_odds_chosen": 0.8960251212120056, "log_odds_ratio": -0.3498467206954956, "logits/chosen": -0.273061066865921, "logits/rejected": -1.3158522844314575, "logps/chosen": -1.4974440336227417, "logps/rejected": -2.2527313232421875, "loss": 1.5449, "nll_loss": 1.5098881721496582, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497444063425064, "rewards/margins": 0.07552873343229294, "rewards/rejected": -0.22527314722537994, "step": 472 }, { "epoch": 1.3037625129444252, "grad_norm": 0.1694575846195221, "learning_rate": 3.4899268588962613e-06, "log_odds_chosen": 0.9621706604957581, "log_odds_ratio": -0.32910794019699097, "logits/chosen": -0.3364591598510742, "logits/rejected": -1.1894400119781494, "logps/chosen": -1.363714575767517, "logps/rejected": -2.1495935916900635, "loss": 1.4326, "nll_loss": 1.3997074365615845, "rewards/accuracies": 1.0, "rewards/chosen": -0.1363714635372162, "rewards/margins": 0.07858789712190628, "rewards/rejected": -0.21495933830738068, "step": 473 }, { "epoch": 1.3065239903348291, "grad_norm": 0.18671829998493195, "learning_rate": 3.4825625791348093e-06, "log_odds_chosen": 0.9318122863769531, "log_odds_ratio": -0.3445737957954407, "logits/chosen": -0.29330548644065857, "logits/rejected": -1.1094152927398682, "logps/chosen": -1.4132261276245117, "logps/rejected": -2.188481569290161, "loss": 1.4629, "nll_loss": 1.4284807443618774, "rewards/accuracies": 1.0, "rewards/chosen": -0.14132261276245117, "rewards/margins": 0.0775255486369133, "rewards/rejected": -0.21884815394878387, "step": 474 }, { "epoch": 1.309285467725233, "grad_norm": 0.18166309595108032, "learning_rate": 3.4751882020226174e-06, "log_odds_chosen": 0.8953260779380798, "log_odds_ratio": -0.35324275493621826, "logits/chosen": -0.30813199281692505, "logits/rejected": -1.1583383083343506, "logps/chosen": -1.5182853937149048, "logps/rejected": -2.2794241905212402, "loss": 1.5593, "nll_loss": 1.5240164995193481, "rewards/accuracies": 1.0, "rewards/chosen": -0.15182854235172272, "rewards/margins": 0.07611385732889175, "rewards/rejected": -0.22794242203235626, "step": 475 }, { "epoch": 1.312046945115637, "grad_norm": 0.1913100630044937, "learning_rate": 3.467803803342821e-06, "log_odds_chosen": 0.8503288626670837, "log_odds_ratio": -0.36000919342041016, "logits/chosen": -0.2365928590297699, "logits/rejected": -1.175534725189209, "logps/chosen": -1.4916030168533325, "logps/rejected": -2.2044079303741455, "loss": 1.5322, "nll_loss": 1.4961673021316528, "rewards/accuracies": 1.0, "rewards/chosen": -0.14916031062602997, "rewards/margins": 0.07128050923347473, "rewards/rejected": -0.2204408198595047, "step": 476 }, { "epoch": 1.3148084225060408, "grad_norm": 0.1990288347005844, "learning_rate": 3.4604094589815402e-06, "log_odds_chosen": 0.7814106345176697, "log_odds_ratio": -0.3809904456138611, "logits/chosen": -0.33347588777542114, "logits/rejected": -1.1712278127670288, "logps/chosen": -1.4147124290466309, "logps/rejected": -2.0545654296875, "loss": 1.4683, "nll_loss": 1.4301787614822388, "rewards/accuracies": 1.0, "rewards/chosen": -0.1414712369441986, "rewards/margins": 0.06398531049489975, "rewards/rejected": -0.20545653998851776, "step": 477 }, { "epoch": 1.3175698998964447, "grad_norm": 0.18192388117313385, "learning_rate": 3.4530052449271044e-06, "log_odds_chosen": 0.9670149087905884, "log_odds_ratio": -0.33280444145202637, "logits/chosen": -0.372698038816452, "logits/rejected": -1.1448473930358887, "logps/chosen": -1.4152264595031738, "logps/rejected": -2.2173709869384766, "loss": 1.4648, "nll_loss": 1.4315613508224487, "rewards/accuracies": 1.0, "rewards/chosen": -0.14152264595031738, "rewards/margins": 0.08021444082260132, "rewards/rejected": -0.2217371016740799, "step": 478 }, { "epoch": 1.3203313772868484, "grad_norm": 0.19329139590263367, "learning_rate": 3.4455912372692696e-06, "log_odds_chosen": 0.6430727243423462, "log_odds_ratio": -0.430493026971817, "logits/chosen": -0.28841686248779297, "logits/rejected": -1.1759401559829712, "logps/chosen": -1.4796922206878662, "logps/rejected": -2.0091733932495117, "loss": 1.5189, "nll_loss": 1.475854516029358, "rewards/accuracies": 1.0, "rewards/chosen": -0.14796923100948334, "rewards/margins": 0.05294811725616455, "rewards/rejected": -0.2009173333644867, "step": 479 }, { "epoch": 1.3230928546772525, "grad_norm": 0.18555399775505066, "learning_rate": 3.438167512198436e-06, "log_odds_chosen": 0.6809258460998535, "log_odds_ratio": -0.4141601622104645, "logits/chosen": -0.31838759779930115, "logits/rejected": -1.10935640335083, "logps/chosen": -1.522485613822937, "logps/rejected": -2.0886290073394775, "loss": 1.5706, "nll_loss": 1.529231309890747, "rewards/accuracies": 1.0, "rewards/chosen": -0.1522485464811325, "rewards/margins": 0.05661435052752495, "rewards/rejected": -0.20886291563510895, "step": 480 }, { "epoch": 1.3258543320676561, "grad_norm": 0.185867041349411, "learning_rate": 3.4307341460048633e-06, "log_odds_chosen": 0.8023340106010437, "log_odds_ratio": -0.37456732988357544, "logits/chosen": -0.29839807748794556, "logits/rejected": -1.1352858543395996, "logps/chosen": -1.5517854690551758, "logps/rejected": -2.226045846939087, "loss": 1.5881, "nll_loss": 1.5506727695465088, "rewards/accuracies": 1.0, "rewards/chosen": -0.15517854690551758, "rewards/margins": 0.06742605566978455, "rewards/rejected": -0.22260460257530212, "step": 481 }, { "epoch": 1.32861580945806, "grad_norm": 0.19534382224082947, "learning_rate": 3.4232912150778914e-06, "log_odds_chosen": 0.8740422129631042, "log_odds_ratio": -0.35599130392074585, "logits/chosen": -0.3661443293094635, "logits/rejected": -1.2379000186920166, "logps/chosen": -1.4489246606826782, "logps/rejected": -2.17354416847229, "loss": 1.5169, "nll_loss": 1.4813332557678223, "rewards/accuracies": 1.0, "rewards/chosen": -0.14489248394966125, "rewards/margins": 0.07246193289756775, "rewards/rejected": -0.2173544317483902, "step": 482 }, { "epoch": 1.331377286848464, "grad_norm": 0.18097054958343506, "learning_rate": 3.415838795905151e-06, "log_odds_chosen": 1.0503449440002441, "log_odds_ratio": -0.3068162798881531, "logits/chosen": -0.4407689571380615, "logits/rejected": -1.4052069187164307, "logps/chosen": -1.4251679182052612, "logps/rejected": -2.2970073223114014, "loss": 1.4703, "nll_loss": 1.4396311044692993, "rewards/accuracies": 1.0, "rewards/chosen": -0.14251679182052612, "rewards/margins": 0.08718395978212357, "rewards/rejected": -0.2297007441520691, "step": 483 }, { "epoch": 1.3341387642388678, "grad_norm": 0.19162198901176453, "learning_rate": 3.408376965071779e-06, "log_odds_chosen": 0.7587519884109497, "log_odds_ratio": -0.3879585862159729, "logits/chosen": -0.28846538066864014, "logits/rejected": -1.1753207445144653, "logps/chosen": -1.5185155868530273, "logps/rejected": -2.149603843688965, "loss": 1.5523, "nll_loss": 1.5134819746017456, "rewards/accuracies": 1.0, "rewards/chosen": -0.1518515795469284, "rewards/margins": 0.06310880929231644, "rewards/rejected": -0.21496038138866425, "step": 484 }, { "epoch": 1.3369002416292717, "grad_norm": 0.1803843080997467, "learning_rate": 3.400905799259634e-06, "log_odds_chosen": 0.7836498022079468, "log_odds_ratio": -0.3856756389141083, "logits/chosen": -0.3168273866176605, "logits/rejected": -1.1095941066741943, "logps/chosen": -1.4380414485931396, "logps/rejected": -2.0824549198150635, "loss": 1.4834, "nll_loss": 1.444858431816101, "rewards/accuracies": 1.0, "rewards/chosen": -0.143804132938385, "rewards/margins": 0.06444136798381805, "rewards/rejected": -0.20824551582336426, "step": 485 }, { "epoch": 1.3396617190196756, "grad_norm": 0.18349698185920715, "learning_rate": 3.393425375246503e-06, "log_odds_chosen": 0.7499913573265076, "log_odds_ratio": -0.3889506757259369, "logits/chosen": -0.34318801760673523, "logits/rejected": -1.1768746376037598, "logps/chosen": -1.4634371995925903, "logps/rejected": -2.080674648284912, "loss": 1.5059, "nll_loss": 1.4669642448425293, "rewards/accuracies": 1.0, "rewards/chosen": -0.14634370803833008, "rewards/margins": 0.0617237389087677, "rewards/rejected": -0.20806746184825897, "step": 486 }, { "epoch": 1.3424231964100795, "grad_norm": 0.19035322964191437, "learning_rate": 3.3859357699053165e-06, "log_odds_chosen": 0.8554298877716064, "log_odds_ratio": -0.36611807346343994, "logits/chosen": -0.31382206082344055, "logits/rejected": -1.058935523033142, "logps/chosen": -1.3999180793762207, "logps/rejected": -2.1072771549224854, "loss": 1.4434, "nll_loss": 1.4067771434783936, "rewards/accuracies": 1.0, "rewards/chosen": -0.13999181985855103, "rewards/margins": 0.0707358866930008, "rewards/rejected": -0.210727721452713, "step": 487 }, { "epoch": 1.3451846738004831, "grad_norm": 0.1878138780593872, "learning_rate": 3.3784370602033572e-06, "log_odds_chosen": 0.992766797542572, "log_odds_ratio": -0.3239111304283142, "logits/chosen": -0.40365132689476013, "logits/rejected": -1.291216254234314, "logps/chosen": -1.4312440156936646, "logps/rejected": -2.263004779815674, "loss": 1.4795, "nll_loss": 1.4471262693405151, "rewards/accuracies": 1.0, "rewards/chosen": -0.14312440156936646, "rewards/margins": 0.08317607641220093, "rewards/rejected": -0.22630049288272858, "step": 488 }, { "epoch": 1.3479461511908872, "grad_norm": 0.19307951629161835, "learning_rate": 3.3709293232014705e-06, "log_odds_chosen": 0.8466112613677979, "log_odds_ratio": -0.36574095487594604, "logits/chosen": -0.36816614866256714, "logits/rejected": -1.4561880826950073, "logps/chosen": -1.4958292245864868, "logps/rejected": -2.210787534713745, "loss": 1.5377, "nll_loss": 1.501145839691162, "rewards/accuracies": 1.0, "rewards/chosen": -0.14958293735980988, "rewards/margins": 0.07149583101272583, "rewards/rejected": -0.2210787534713745, "step": 489 }, { "epoch": 1.350707628581291, "grad_norm": 0.1835506707429886, "learning_rate": 3.3634126360532694e-06, "log_odds_chosen": 0.9759219288825989, "log_odds_ratio": -0.3324146568775177, "logits/chosen": -0.24411487579345703, "logits/rejected": -1.3185425996780396, "logps/chosen": -1.5735536813735962, "logps/rejected": -2.4141926765441895, "loss": 1.6083, "nll_loss": 1.5750340223312378, "rewards/accuracies": 1.0, "rewards/chosen": -0.15735535323619843, "rewards/margins": 0.08406390994787216, "rewards/rejected": -0.24141928553581238, "step": 490 }, { "epoch": 1.3534691059716948, "grad_norm": 0.20382438600063324, "learning_rate": 3.355887076004345e-06, "log_odds_chosen": 0.8967198133468628, "log_odds_ratio": -0.3447577953338623, "logits/chosen": -0.45378029346466064, "logits/rejected": -1.0346810817718506, "logps/chosen": -1.4815733432769775, "logps/rejected": -2.233039140701294, "loss": 1.5336, "nll_loss": 1.4991440773010254, "rewards/accuracies": 1.0, "rewards/chosen": -0.14815731346607208, "rewards/margins": 0.07514660060405731, "rewards/rejected": -0.2233039289712906, "step": 491 }, { "epoch": 1.3562305833620987, "grad_norm": 0.1867835968732834, "learning_rate": 3.3483527203914694e-06, "log_odds_chosen": 0.9929035902023315, "log_odds_ratio": -0.3237488567829132, "logits/chosen": -0.3180537819862366, "logits/rejected": -1.2090908288955688, "logps/chosen": -1.517293095588684, "logps/rejected": -2.360647678375244, "loss": 1.5624, "nll_loss": 1.5299937725067139, "rewards/accuracies": 1.0, "rewards/chosen": -0.15172931551933289, "rewards/margins": 0.08433545380830765, "rewards/rejected": -0.23606477677822113, "step": 492 }, { "epoch": 1.3589920607525026, "grad_norm": 0.18780100345611572, "learning_rate": 3.340809646641805e-06, "log_odds_chosen": 0.8353948593139648, "log_odds_ratio": -0.36322087049484253, "logits/chosen": -0.23974277079105377, "logits/rejected": -1.23904550075531, "logps/chosen": -1.5875341892242432, "logps/rejected": -2.3006584644317627, "loss": 1.6096, "nll_loss": 1.5732414722442627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1587534248828888, "rewards/margins": 0.07131239771842957, "rewards/rejected": -0.23006582260131836, "step": 493 }, { "epoch": 1.3617535381429065, "grad_norm": 0.18200430274009705, "learning_rate": 3.333257932272105e-06, "log_odds_chosen": 0.9004358053207397, "log_odds_ratio": -0.3532771170139313, "logits/chosen": -0.2815527021884918, "logits/rejected": -1.1550835371017456, "logps/chosen": -1.4390370845794678, "logps/rejected": -2.194286346435547, "loss": 1.4934, "nll_loss": 1.4581207036972046, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439037024974823, "rewards/margins": 0.07552491873502731, "rewards/rejected": -0.21942861378192902, "step": 494 }, { "epoch": 1.3645150155333103, "grad_norm": 0.19926373660564423, "learning_rate": 3.3256976548879183e-06, "log_odds_chosen": 0.8690962195396423, "log_odds_ratio": -0.3561793863773346, "logits/chosen": -0.3800399601459503, "logits/rejected": -1.047258973121643, "logps/chosen": -1.4909493923187256, "logps/rejected": -2.219834327697754, "loss": 1.5367, "nll_loss": 1.5010950565338135, "rewards/accuracies": 1.0, "rewards/chosen": -0.14909493923187256, "rewards/margins": 0.07288849353790283, "rewards/rejected": -0.2219834327697754, "step": 495 }, { "epoch": 1.3672764929237142, "grad_norm": 0.18317997455596924, "learning_rate": 3.3181288921827925e-06, "log_odds_chosen": 0.9415011405944824, "log_odds_ratio": -0.3410661220550537, "logits/chosen": -0.27754640579223633, "logits/rejected": -1.0399134159088135, "logps/chosen": -1.4013381004333496, "logps/rejected": -2.179905414581299, "loss": 1.465, "nll_loss": 1.4309097528457642, "rewards/accuracies": 1.0, "rewards/chosen": -0.1401338279247284, "rewards/margins": 0.07785671204328537, "rewards/rejected": -0.21799054741859436, "step": 496 }, { "epoch": 1.3700379703141181, "grad_norm": 0.1992860585451126, "learning_rate": 3.310551721937475e-06, "log_odds_chosen": 0.9155603647232056, "log_odds_ratio": -0.340352326631546, "logits/chosen": -0.30399274826049805, "logits/rejected": -1.2022323608398438, "logps/chosen": -1.4278628826141357, "logps/rejected": -2.1877853870391846, "loss": 1.4555, "nll_loss": 1.4214258193969727, "rewards/accuracies": 1.0, "rewards/chosen": -0.14278629422187805, "rewards/margins": 0.07599223405122757, "rewards/rejected": -0.21877853572368622, "step": 497 }, { "epoch": 1.372799447704522, "grad_norm": 0.2043248564004898, "learning_rate": 3.3029662220191146e-06, "log_odds_chosen": 0.9598441123962402, "log_odds_ratio": -0.3286186754703522, "logits/chosen": -0.38024866580963135, "logits/rejected": -1.2354223728179932, "logps/chosen": -1.33193838596344, "logps/rejected": -2.1157238483428955, "loss": 1.388, "nll_loss": 1.3551721572875977, "rewards/accuracies": 1.0, "rewards/chosen": -0.13319383561611176, "rewards/margins": 0.07837854325771332, "rewards/rejected": -0.21157239377498627, "step": 498 }, { "epoch": 1.3755609250949257, "grad_norm": 0.17761792242527008, "learning_rate": 3.2953724703804572e-06, "log_odds_chosen": 1.0203739404678345, "log_odds_ratio": -0.3161318898200989, "logits/chosen": -0.31488168239593506, "logits/rejected": -1.2814425230026245, "logps/chosen": -1.4082773923873901, "logps/rejected": -2.2629306316375732, "loss": 1.4532, "nll_loss": 1.4215905666351318, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408277451992035, "rewards/margins": 0.08546529710292816, "rewards/rejected": -0.22629307210445404, "step": 499 }, { "epoch": 1.3783224024853298, "grad_norm": 0.19437268376350403, "learning_rate": 3.2877705450590525e-06, "log_odds_chosen": 0.8638174533843994, "log_odds_ratio": -0.35613882541656494, "logits/chosen": -0.3166804015636444, "logits/rejected": -1.1927976608276367, "logps/chosen": -1.4706673622131348, "logps/rejected": -2.1903886795043945, "loss": 1.5439, "nll_loss": 1.5082558393478394, "rewards/accuracies": 1.0, "rewards/chosen": -0.14706675708293915, "rewards/margins": 0.07197214663028717, "rewards/rejected": -0.21903888881206512, "step": 500 }, { "epoch": 1.3810838798757334, "grad_norm": 0.17707180976867676, "learning_rate": 3.2801605241764432e-06, "log_odds_chosen": 0.9180523753166199, "log_odds_ratio": -0.34557586908340454, "logits/chosen": -0.30843862891197205, "logits/rejected": -1.2783384323120117, "logps/chosen": -1.4498966932296753, "logps/rejected": -2.2136971950531006, "loss": 1.5081, "nll_loss": 1.4735546112060547, "rewards/accuracies": 1.0, "rewards/chosen": -0.14498966932296753, "rewards/margins": 0.07638007402420044, "rewards/rejected": -0.22136974334716797, "step": 501 }, { "epoch": 1.3838453572661373, "grad_norm": 0.18479248881340027, "learning_rate": 3.272542485937369e-06, "log_odds_chosen": 0.8546453714370728, "log_odds_ratio": -0.36667194962501526, "logits/chosen": -0.3115421533584595, "logits/rejected": -1.017000675201416, "logps/chosen": -1.5001187324523926, "logps/rejected": -2.216719627380371, "loss": 1.5475, "nll_loss": 1.51079261302948, "rewards/accuracies": 1.0, "rewards/chosen": -0.15001188218593597, "rewards/margins": 0.07166009396314621, "rewards/rejected": -0.2216719686985016, "step": 502 }, { "epoch": 1.3866068346565412, "grad_norm": 0.23358802497386932, "learning_rate": 3.2649165086289597e-06, "log_odds_chosen": 0.8840019702911377, "log_odds_ratio": -0.3495294749736786, "logits/chosen": -0.3989594876766205, "logits/rejected": -1.4850833415985107, "logps/chosen": -1.4817544221878052, "logps/rejected": -2.223788022994995, "loss": 1.5166, "nll_loss": 1.4816381931304932, "rewards/accuracies": 1.0, "rewards/chosen": -0.148175448179245, "rewards/margins": 0.07420337200164795, "rewards/rejected": -0.22237882018089294, "step": 503 }, { "epoch": 1.389368312046945, "grad_norm": 0.19259649515151978, "learning_rate": 3.2572826706199304e-06, "log_odds_chosen": 1.1401124000549316, "log_odds_ratio": -0.2851974070072174, "logits/chosen": -0.4010545313358307, "logits/rejected": -1.2275235652923584, "logps/chosen": -1.359872579574585, "logps/rejected": -2.307187080383301, "loss": 1.4284, "nll_loss": 1.3999295234680176, "rewards/accuracies": 1.0, "rewards/chosen": -0.1359872668981552, "rewards/margins": 0.0947314128279686, "rewards/rejected": -0.2307186871767044, "step": 504 }, { "epoch": 1.392129789437349, "grad_norm": 0.1858515590429306, "learning_rate": 3.249641050359779e-06, "log_odds_chosen": 0.906593382358551, "log_odds_ratio": -0.3474721610546112, "logits/chosen": -0.3227766156196594, "logits/rejected": -1.0939997434616089, "logps/chosen": -1.5134148597717285, "logps/rejected": -2.277780055999756, "loss": 1.5509, "nll_loss": 1.5161436796188354, "rewards/accuracies": 1.0, "rewards/chosen": -0.1513414978981018, "rewards/margins": 0.07643650472164154, "rewards/rejected": -0.22777798771858215, "step": 505 }, { "epoch": 1.3948912668277529, "grad_norm": 0.20403239130973816, "learning_rate": 3.2419917263779765e-06, "log_odds_chosen": 0.813700258731842, "log_odds_ratio": -0.36978182196617126, "logits/chosen": -0.4090678095817566, "logits/rejected": -1.1975152492523193, "logps/chosen": -1.4790825843811035, "logps/rejected": -2.156320810317993, "loss": 1.5241, "nll_loss": 1.487076997756958, "rewards/accuracies": 1.0, "rewards/chosen": -0.1479082703590393, "rewards/margins": 0.06772380322217941, "rewards/rejected": -0.21563206613063812, "step": 506 }, { "epoch": 1.3976527442181568, "grad_norm": 0.17158633470535278, "learning_rate": 3.234334777283162e-06, "log_odds_chosen": 1.0563242435455322, "log_odds_ratio": -0.30689284205436707, "logits/chosen": -0.43664678931236267, "logits/rejected": -1.2676841020584106, "logps/chosen": -1.4273220300674438, "logps/rejected": -2.3132920265197754, "loss": 1.4653, "nll_loss": 1.4345909357070923, "rewards/accuracies": 1.0, "rewards/chosen": -0.1427321881055832, "rewards/margins": 0.08859699964523315, "rewards/rejected": -0.23132917284965515, "step": 507 }, { "epoch": 1.4004142216085607, "grad_norm": 0.19566792249679565, "learning_rate": 3.2266702817623348e-06, "log_odds_chosen": 0.7950919270515442, "log_odds_ratio": -0.3772909641265869, "logits/chosen": -0.37184882164001465, "logits/rejected": -1.0912429094314575, "logps/chosen": -1.5028144121170044, "logps/rejected": -2.1690046787261963, "loss": 1.5625, "nll_loss": 1.5247732400894165, "rewards/accuracies": 1.0, "rewards/chosen": -0.15028144419193268, "rewards/margins": 0.06661904603242874, "rewards/rejected": -0.21690048277378082, "step": 508 }, { "epoch": 1.4031756989989645, "grad_norm": 0.1792566031217575, "learning_rate": 3.218998318580043e-06, "log_odds_chosen": 1.0348693132400513, "log_odds_ratio": -0.30789241194725037, "logits/chosen": -0.41302406787872314, "logits/rejected": -1.22664475440979, "logps/chosen": -1.4119377136230469, "logps/rejected": -2.2752819061279297, "loss": 1.4666, "nll_loss": 1.4358162879943848, "rewards/accuracies": 1.0, "rewards/chosen": -0.14119377732276917, "rewards/margins": 0.08633442223072052, "rewards/rejected": -0.2275281846523285, "step": 509 }, { "epoch": 1.4059371763893682, "grad_norm": 0.17888224124908447, "learning_rate": 3.2113189665775812e-06, "log_odds_chosen": 0.9151650667190552, "log_odds_ratio": -0.3469938635826111, "logits/chosen": -0.29533159732818604, "logits/rejected": -1.2474136352539062, "logps/chosen": -1.5112439393997192, "logps/rejected": -2.2847676277160645, "loss": 1.5513, "nll_loss": 1.5165679454803467, "rewards/accuracies": 1.0, "rewards/chosen": -0.15112441778182983, "rewards/margins": 0.07735235244035721, "rewards/rejected": -0.22847676277160645, "step": 510 }, { "epoch": 1.408698653779772, "grad_norm": 0.18235231935977936, "learning_rate": 3.203632304672172e-06, "log_odds_chosen": 1.0543142557144165, "log_odds_ratio": -0.3037016987800598, "logits/chosen": -0.3121378719806671, "logits/rejected": -1.4559226036071777, "logps/chosen": -1.4455695152282715, "logps/rejected": -2.3296966552734375, "loss": 1.4981, "nll_loss": 1.4677432775497437, "rewards/accuracies": 1.0, "rewards/chosen": -0.14455696940422058, "rewards/margins": 0.08841272443532944, "rewards/rejected": -0.23296970129013062, "step": 511 }, { "epoch": 1.411460131170176, "grad_norm": 0.186997190117836, "learning_rate": 3.1959384118561587e-06, "log_odds_chosen": 1.343024492263794, "log_odds_ratio": -0.26705506443977356, "logits/chosen": -0.23677915334701538, "logits/rejected": -1.1647669076919556, "logps/chosen": -1.309016466140747, "logps/rejected": -2.4087283611297607, "loss": 1.3879, "nll_loss": 1.36118483543396, "rewards/accuracies": 1.0, "rewards/chosen": -0.13090163469314575, "rewards/margins": 0.10997119545936584, "rewards/rejected": -0.2408728450536728, "step": 512 }, { "epoch": 1.4142216085605799, "grad_norm": 0.18079784512519836, "learning_rate": 3.188237367196194e-06, "log_odds_chosen": 0.9898212552070618, "log_odds_ratio": -0.32014667987823486, "logits/chosen": -0.4330415725708008, "logits/rejected": -1.1821650266647339, "logps/chosen": -1.4573217630386353, "logps/rejected": -2.289856433868408, "loss": 1.5149, "nll_loss": 1.4828686714172363, "rewards/accuracies": 1.0, "rewards/chosen": -0.14573219418525696, "rewards/margins": 0.08325345814228058, "rewards/rejected": -0.22898563742637634, "step": 513 }, { "epoch": 1.4169830859509838, "grad_norm": 0.1948278695344925, "learning_rate": 3.180529249832428e-06, "log_odds_chosen": 1.0559723377227783, "log_odds_ratio": -0.301472544670105, "logits/chosen": -0.36995819211006165, "logits/rejected": -1.3074886798858643, "logps/chosen": -1.469174861907959, "logps/rejected": -2.3622772693634033, "loss": 1.5107, "nll_loss": 1.4805397987365723, "rewards/accuracies": 1.0, "rewards/chosen": -0.14691749215126038, "rewards/margins": 0.08931022882461548, "rewards/rejected": -0.23622773587703705, "step": 514 }, { "epoch": 1.4197445633413877, "grad_norm": 0.18570828437805176, "learning_rate": 3.172814138977692e-06, "log_odds_chosen": 0.7720561623573303, "log_odds_ratio": -0.38324815034866333, "logits/chosen": -0.3680783212184906, "logits/rejected": -0.9790282249450684, "logps/chosen": -1.4506253004074097, "logps/rejected": -2.089714288711548, "loss": 1.4965, "nll_loss": 1.4582042694091797, "rewards/accuracies": 1.0, "rewards/chosen": -0.14506252110004425, "rewards/margins": 0.0639088824391365, "rewards/rejected": -0.20897141098976135, "step": 515 }, { "epoch": 1.4225060407317915, "grad_norm": 0.20693336427211761, "learning_rate": 3.165092113916688e-06, "log_odds_chosen": 1.1290223598480225, "log_odds_ratio": -0.2917245924472809, "logits/chosen": -0.3547555208206177, "logits/rejected": -1.1518139839172363, "logps/chosen": -1.4400185346603394, "logps/rejected": -2.3950414657592773, "loss": 1.486, "nll_loss": 1.4568506479263306, "rewards/accuracies": 1.0, "rewards/chosen": -0.14400185644626617, "rewards/margins": 0.09550228714942932, "rewards/rejected": -0.2395041584968567, "step": 516 }, { "epoch": 1.4252675181221954, "grad_norm": 0.1988285332918167, "learning_rate": 3.1573632540051702e-06, "log_odds_chosen": 1.0142998695373535, "log_odds_ratio": -0.32072144746780396, "logits/chosen": -0.324567049741745, "logits/rejected": -0.9946063756942749, "logps/chosen": -1.4054532051086426, "logps/rejected": -2.2474942207336426, "loss": 1.4717, "nll_loss": 1.4395849704742432, "rewards/accuracies": 1.0, "rewards/chosen": -0.1405453383922577, "rewards/margins": 0.08420407772064209, "rewards/rejected": -0.22474941611289978, "step": 517 }, { "epoch": 1.4280289955125993, "grad_norm": 0.19122861325740814, "learning_rate": 3.1496276386691327e-06, "log_odds_chosen": 0.7923365831375122, "log_odds_ratio": -0.3814864456653595, "logits/chosen": -0.3203206956386566, "logits/rejected": -1.144274115562439, "logps/chosen": -1.5352751016616821, "logps/rejected": -2.2041633129119873, "loss": 1.5649, "nll_loss": 1.5267633199691772, "rewards/accuracies": 1.0, "rewards/chosen": -0.15352752804756165, "rewards/margins": 0.06688882410526276, "rewards/rejected": -0.2204163521528244, "step": 518 }, { "epoch": 1.430790472903003, "grad_norm": 0.20250949263572693, "learning_rate": 3.1418853474039913e-06, "log_odds_chosen": 1.0723575353622437, "log_odds_ratio": -0.3095499575138092, "logits/chosen": -0.4371488690376282, "logits/rejected": -1.3240580558776855, "logps/chosen": -1.4372949600219727, "logps/rejected": -2.3369717597961426, "loss": 1.4619, "nll_loss": 1.4309086799621582, "rewards/accuracies": 1.0, "rewards/chosen": -0.14372947812080383, "rewards/margins": 0.08996766805648804, "rewards/rejected": -0.23369714617729187, "step": 519 }, { "epoch": 1.433551950293407, "grad_norm": 0.19551275670528412, "learning_rate": 3.1341364597737684e-06, "log_odds_chosen": 0.9039003849029541, "log_odds_ratio": -0.34478437900543213, "logits/chosen": -0.29235294461250305, "logits/rejected": -1.0886852741241455, "logps/chosen": -1.4417136907577515, "logps/rejected": -2.1943249702453613, "loss": 1.481, "nll_loss": 1.4464728832244873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14417137205600739, "rewards/margins": 0.07526111602783203, "rewards/rejected": -0.2194325029850006, "step": 520 }, { "epoch": 1.4363134276838108, "grad_norm": 0.19355492293834686, "learning_rate": 3.126381055410274e-06, "log_odds_chosen": 1.0555334091186523, "log_odds_ratio": -0.30305859446525574, "logits/chosen": -0.3876825273036957, "logits/rejected": -1.0105053186416626, "logps/chosen": -1.442361831665039, "logps/rejected": -2.3280749320983887, "loss": 1.503, "nll_loss": 1.4726606607437134, "rewards/accuracies": 1.0, "rewards/chosen": -0.14423617720603943, "rewards/margins": 0.08857134729623795, "rewards/rejected": -0.23280753195285797, "step": 521 }, { "epoch": 1.4390749050742146, "grad_norm": 0.192452535033226, "learning_rate": 3.1186192140122863e-06, "log_odds_chosen": 0.7785193920135498, "log_odds_ratio": -0.38462716341018677, "logits/chosen": -0.3711756467819214, "logits/rejected": -0.9321059584617615, "logps/chosen": -1.4820040464401245, "logps/rejected": -2.1274261474609375, "loss": 1.5359, "nll_loss": 1.497480034828186, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482004076242447, "rewards/margins": 0.06454220414161682, "rewards/rejected": -0.2127426117658615, "step": 522 }, { "epoch": 1.4418363824646185, "grad_norm": 0.18870854377746582, "learning_rate": 3.1108510153447352e-06, "log_odds_chosen": 0.840644896030426, "log_odds_ratio": -0.3709460496902466, "logits/chosen": -0.42261266708374023, "logits/rejected": -1.1406811475753784, "logps/chosen": -1.4828242063522339, "logps/rejected": -2.191493034362793, "loss": 1.532, "nll_loss": 1.4948935508728027, "rewards/accuracies": 1.0, "rewards/chosen": -0.14828240871429443, "rewards/margins": 0.07086686789989471, "rewards/rejected": -0.21914930641651154, "step": 523 }, { "epoch": 1.4445978598550224, "grad_norm": 0.19503174722194672, "learning_rate": 3.1030765392378813e-06, "log_odds_chosen": 1.1250898838043213, "log_odds_ratio": -0.2840951085090637, "logits/chosen": -0.391498863697052, "logits/rejected": -1.1013753414154053, "logps/chosen": -1.417677402496338, "logps/rejected": -2.3645308017730713, "loss": 1.4462, "nll_loss": 1.417814016342163, "rewards/accuracies": 1.0, "rewards/chosen": -0.14176775515079498, "rewards/margins": 0.09468530118465424, "rewards/rejected": -0.23645305633544922, "step": 524 }, { "epoch": 1.4473593372454263, "grad_norm": 0.19254902005195618, "learning_rate": 3.0952958655864957e-06, "log_odds_chosen": 1.0171841382980347, "log_odds_ratio": -0.3201075494289398, "logits/chosen": -0.3991393744945526, "logits/rejected": -1.1858185529708862, "logps/chosen": -1.4647375345230103, "logps/rejected": -2.3222553730010986, "loss": 1.5096, "nll_loss": 1.4776382446289062, "rewards/accuracies": 1.0, "rewards/chosen": -0.14647376537322998, "rewards/margins": 0.08575180172920227, "rewards/rejected": -0.23222555220127106, "step": 525 }, { "epoch": 1.4501208146358302, "grad_norm": 0.18993216753005981, "learning_rate": 3.0875090743490383e-06, "log_odds_chosen": 1.1832982301712036, "log_odds_ratio": -0.2701611816883087, "logits/chosen": -0.30588698387145996, "logits/rejected": -1.298948049545288, "logps/chosen": -1.4776264429092407, "logps/rejected": -2.4894192218780518, "loss": 1.5195, "nll_loss": 1.4924474954605103, "rewards/accuracies": 1.0, "rewards/chosen": -0.14776265621185303, "rewards/margins": 0.10117927938699722, "rewards/rejected": -0.24894192814826965, "step": 526 }, { "epoch": 1.452882292026234, "grad_norm": 0.2328139990568161, "learning_rate": 3.0797162455468367e-06, "log_odds_chosen": 1.089007019996643, "log_odds_ratio": -0.29314038157463074, "logits/chosen": -0.35738593339920044, "logits/rejected": -1.2926090955734253, "logps/chosen": -1.442283272743225, "logps/rejected": -2.359701156616211, "loss": 1.4862, "nll_loss": 1.4569061994552612, "rewards/accuracies": 1.0, "rewards/chosen": -0.14422833919525146, "rewards/margins": 0.09174177050590515, "rewards/rejected": -0.23597010970115662, "step": 527 }, { "epoch": 1.455643769416638, "grad_norm": 0.19524161517620087, "learning_rate": 3.071917459263264e-06, "log_odds_chosen": 0.979629635810852, "log_odds_ratio": -0.3238218426704407, "logits/chosen": -0.3761988580226898, "logits/rejected": -1.1847519874572754, "logps/chosen": -1.4806503057479858, "logps/rejected": -2.3052115440368652, "loss": 1.5352, "nll_loss": 1.5027679204940796, "rewards/accuracies": 1.0, "rewards/chosen": -0.14806503057479858, "rewards/margins": 0.08245614171028137, "rewards/rejected": -0.23052117228507996, "step": 528 }, { "epoch": 1.4584052468070419, "grad_norm": 0.18576502799987793, "learning_rate": 3.0641127956429157e-06, "log_odds_chosen": 1.271224856376648, "log_odds_ratio": -0.2648095190525055, "logits/chosen": -0.28413140773773193, "logits/rejected": -1.2785409688949585, "logps/chosen": -1.492474913597107, "logps/rejected": -2.5942752361297607, "loss": 1.5287, "nll_loss": 1.502196192741394, "rewards/accuracies": 1.0, "rewards/chosen": -0.14924749732017517, "rewards/margins": 0.11018005013465881, "rewards/rejected": -0.259427547454834, "step": 529 }, { "epoch": 1.4611667241974455, "grad_norm": 0.18335674703121185, "learning_rate": 3.056302334890786e-06, "log_odds_chosen": 1.133495569229126, "log_odds_ratio": -0.29078611731529236, "logits/chosen": -0.3989834487438202, "logits/rejected": -1.2690210342407227, "logps/chosen": -1.408272385597229, "logps/rejected": -2.363489866256714, "loss": 1.4457, "nll_loss": 1.4166673421859741, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408272385597229, "rewards/margins": 0.09552174806594849, "rewards/rejected": -0.2363489866256714, "step": 530 }, { "epoch": 1.4639282015878494, "grad_norm": 0.19092051684856415, "learning_rate": 3.0484861572714446e-06, "log_odds_chosen": 1.2101211547851562, "log_odds_ratio": -0.26364564895629883, "logits/chosen": -0.3666624426841736, "logits/rejected": -1.3500810861587524, "logps/chosen": -1.4388892650604248, "logps/rejected": -2.464836359024048, "loss": 1.4736, "nll_loss": 1.4472087621688843, "rewards/accuracies": 1.0, "rewards/chosen": -0.143888920545578, "rewards/margins": 0.10259471088647842, "rewards/rejected": -0.24648365378379822, "step": 531 }, { "epoch": 1.4666896789782533, "grad_norm": 0.22723126411437988, "learning_rate": 3.0406643431082088e-06, "log_odds_chosen": 1.2191773653030396, "log_odds_ratio": -0.2683591842651367, "logits/chosen": -0.4292551577091217, "logits/rejected": -1.2706406116485596, "logps/chosen": -1.4183095693588257, "logps/rejected": -2.452069044113159, "loss": 1.4576, "nll_loss": 1.4307693243026733, "rewards/accuracies": 1.0, "rewards/chosen": -0.14183096587657928, "rewards/margins": 0.10337594151496887, "rewards/rejected": -0.24520690739154816, "step": 532 }, { "epoch": 1.4694511563686572, "grad_norm": 0.1975235641002655, "learning_rate": 3.0328369727823216e-06, "log_odds_chosen": 0.9465770721435547, "log_odds_ratio": -0.3347564935684204, "logits/chosen": -0.32763075828552246, "logits/rejected": -1.1353988647460938, "logps/chosen": -1.4781655073165894, "logps/rejected": -2.276811122894287, "loss": 1.5176, "nll_loss": 1.4840781688690186, "rewards/accuracies": 1.0, "rewards/chosen": -0.14781653881072998, "rewards/margins": 0.07986456155776978, "rewards/rejected": -0.22768111526966095, "step": 533 }, { "epoch": 1.472212633759061, "grad_norm": 0.19953002035617828, "learning_rate": 3.0250041267321234e-06, "log_odds_chosen": 1.0042380094528198, "log_odds_ratio": -0.3157652020454407, "logits/chosen": -0.40572917461395264, "logits/rejected": -1.2156774997711182, "logps/chosen": -1.3842873573303223, "logps/rejected": -2.2157645225524902, "loss": 1.4475, "nll_loss": 1.4159327745437622, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384287327528, "rewards/margins": 0.08314771950244904, "rewards/rejected": -0.22157645225524902, "step": 534 }, { "epoch": 1.474974111149465, "grad_norm": 0.20859917998313904, "learning_rate": 3.0171658854522274e-06, "log_odds_chosen": 1.2152209281921387, "log_odds_ratio": -0.2747996747493744, "logits/chosen": -0.433580219745636, "logits/rejected": -1.2437716722488403, "logps/chosen": -1.4281524419784546, "logps/rejected": -2.4653677940368652, "loss": 1.465, "nll_loss": 1.437475323677063, "rewards/accuracies": 1.0, "rewards/chosen": -0.1428152322769165, "rewards/margins": 0.10372152924537659, "rewards/rejected": -0.24653677642345428, "step": 535 }, { "epoch": 1.4777355885398689, "grad_norm": 0.19933345913887024, "learning_rate": 3.009322329492689e-06, "log_odds_chosen": 0.8492363095283508, "log_odds_ratio": -0.3588874936103821, "logits/chosen": -0.4448508322238922, "logits/rejected": -1.2199938297271729, "logps/chosen": -1.5185283422470093, "logps/rejected": -2.2324721813201904, "loss": 1.5371, "nll_loss": 1.5011987686157227, "rewards/accuracies": 1.0, "rewards/chosen": -0.15185286104679108, "rewards/margins": 0.07139438390731812, "rewards/rejected": -0.223247230052948, "step": 536 }, { "epoch": 1.4804970659302727, "grad_norm": 0.18872693181037903, "learning_rate": 3.0014735394581824e-06, "log_odds_chosen": 0.9202526211738586, "log_odds_ratio": -0.3476359248161316, "logits/chosen": -0.4314260184764862, "logits/rejected": -1.0960811376571655, "logps/chosen": -1.4040780067443848, "logps/rejected": -2.161449432373047, "loss": 1.4671, "nll_loss": 1.4323309659957886, "rewards/accuracies": 1.0, "rewards/chosen": -0.14040780067443848, "rewards/margins": 0.0757371261715889, "rewards/rejected": -0.21614491939544678, "step": 537 }, { "epoch": 1.4832585433206766, "grad_norm": 0.1901201754808426, "learning_rate": 2.993619596007168e-06, "log_odds_chosen": 1.0711580514907837, "log_odds_ratio": -0.3045736253261566, "logits/chosen": -0.36668646335601807, "logits/rejected": -1.3843791484832764, "logps/chosen": -1.5035892724990845, "logps/rejected": -2.4158196449279785, "loss": 1.5336, "nll_loss": 1.5031917095184326, "rewards/accuracies": 1.0, "rewards/chosen": -0.1503589153289795, "rewards/margins": 0.09122300893068314, "rewards/rejected": -0.2415819615125656, "step": 538 }, { "epoch": 1.4860200207110803, "grad_norm": 0.19086135923862457, "learning_rate": 2.985760579851068e-06, "log_odds_chosen": 1.2659714221954346, "log_odds_ratio": -0.2554362416267395, "logits/chosen": -0.38513892889022827, "logits/rejected": -1.3220562934875488, "logps/chosen": -1.531071424484253, "logps/rejected": -2.6303224563598633, "loss": 1.5539, "nll_loss": 1.528355598449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.15310713648796082, "rewards/margins": 0.10992510616779327, "rewards/rejected": -0.2630322575569153, "step": 539 }, { "epoch": 1.4887814981014844, "grad_norm": 0.18788237869739532, "learning_rate": 2.9778965717534314e-06, "log_odds_chosen": 1.0638117790222168, "log_odds_ratio": -0.31268173456192017, "logits/chosen": -0.4625805914402008, "logits/rejected": -1.360637903213501, "logps/chosen": -1.3956230878829956, "logps/rejected": -2.2779204845428467, "loss": 1.4372, "nll_loss": 1.4059207439422607, "rewards/accuracies": 1.0, "rewards/chosen": -0.13956230878829956, "rewards/margins": 0.08822974562644958, "rewards/rejected": -0.22779206931591034, "step": 540 }, { "epoch": 1.491542975491888, "grad_norm": 0.17800204455852509, "learning_rate": 2.9700276525291096e-06, "log_odds_chosen": 1.2406233549118042, "log_odds_ratio": -0.2622452676296234, "logits/chosen": -0.42659229040145874, "logits/rejected": -1.3596243858337402, "logps/chosen": -1.4014408588409424, "logps/rejected": -2.4498422145843506, "loss": 1.4526, "nll_loss": 1.426424503326416, "rewards/accuracies": 1.0, "rewards/chosen": -0.14014407992362976, "rewards/margins": 0.10484012961387634, "rewards/rejected": -0.2449842095375061, "step": 541 }, { "epoch": 1.494304452882292, "grad_norm": 0.18867123126983643, "learning_rate": 2.9621539030434223e-06, "log_odds_chosen": 1.1968717575073242, "log_odds_ratio": -0.2740924060344696, "logits/chosen": -0.5522366762161255, "logits/rejected": -1.399277925491333, "logps/chosen": -1.427716851234436, "logps/rejected": -2.445732355117798, "loss": 1.4698, "nll_loss": 1.442360520362854, "rewards/accuracies": 1.0, "rewards/chosen": -0.14277169108390808, "rewards/margins": 0.10180152952671051, "rewards/rejected": -0.2445732206106186, "step": 542 }, { "epoch": 1.4970659302726959, "grad_norm": 0.2140800505876541, "learning_rate": 2.954275404211328e-06, "log_odds_chosen": 1.0577278137207031, "log_odds_ratio": -0.3151082992553711, "logits/chosen": -0.37441232800483704, "logits/rejected": -1.2353841066360474, "logps/chosen": -1.3858736753463745, "logps/rejected": -2.2611029148101807, "loss": 1.4471, "nll_loss": 1.4155535697937012, "rewards/accuracies": 1.0, "rewards/chosen": -0.1385873556137085, "rewards/margins": 0.087522953748703, "rewards/rejected": -0.2261103093624115, "step": 543 }, { "epoch": 1.4998274076630997, "grad_norm": 0.20307707786560059, "learning_rate": 2.946392236996592e-06, "log_odds_chosen": 1.240695834159851, "log_odds_ratio": -0.27113664150238037, "logits/chosen": -0.41834384202957153, "logits/rejected": -1.3491978645324707, "logps/chosen": -1.4214861392974854, "logps/rejected": -2.4779489040374756, "loss": 1.4671, "nll_loss": 1.4400283098220825, "rewards/accuracies": 1.0, "rewards/chosen": -0.14214861392974854, "rewards/margins": 0.1056462898850441, "rewards/rejected": -0.24779489636421204, "step": 544 }, { "epoch": 1.5025888850535036, "grad_norm": 0.20308546721935272, "learning_rate": 2.9385044824109544e-06, "log_odds_chosen": 1.1952365636825562, "log_odds_ratio": -0.2849617898464203, "logits/chosen": -0.39065632224082947, "logits/rejected": -1.1724423170089722, "logps/chosen": -1.4500305652618408, "logps/rejected": -2.4722604751586914, "loss": 1.4905, "nll_loss": 1.4620076417922974, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450030505657196, "rewards/margins": 0.10222301632165909, "rewards/rejected": -0.24722608923912048, "step": 545 }, { "epoch": 1.5053503624439075, "grad_norm": 0.19091783463954926, "learning_rate": 2.9306122215132974e-06, "log_odds_chosen": 1.1480547189712524, "log_odds_ratio": -0.2878974676132202, "logits/chosen": -0.3440283536911011, "logits/rejected": -1.1784183979034424, "logps/chosen": -1.4391610622406006, "logps/rejected": -2.4146106243133545, "loss": 1.493, "nll_loss": 1.4642505645751953, "rewards/accuracies": 1.0, "rewards/chosen": -0.14391611516475677, "rewards/margins": 0.09754496067762375, "rewards/rejected": -0.24146109819412231, "step": 546 }, { "epoch": 1.5081118398343114, "grad_norm": 0.20340758562088013, "learning_rate": 2.9227155354088134e-06, "log_odds_chosen": 1.003832459449768, "log_odds_ratio": -0.31678086519241333, "logits/chosen": -0.5404032468795776, "logits/rejected": -1.2029783725738525, "logps/chosen": -1.4751348495483398, "logps/rejected": -2.324713706970215, "loss": 1.4996, "nll_loss": 1.4679594039916992, "rewards/accuracies": 1.0, "rewards/chosen": -0.1475134789943695, "rewards/margins": 0.08495788276195526, "rewards/rejected": -0.23247136175632477, "step": 547 }, { "epoch": 1.510873317224715, "grad_norm": 0.20344741642475128, "learning_rate": 2.91481450524817e-06, "log_odds_chosen": 1.4178953170776367, "log_odds_ratio": -0.2338394820690155, "logits/chosen": -0.47337958216667175, "logits/rejected": -1.19346284866333, "logps/chosen": -1.4051434993743896, "logps/rejected": -2.6211602687835693, "loss": 1.4389, "nll_loss": 1.4155361652374268, "rewards/accuracies": 1.0, "rewards/chosen": -0.14051437377929688, "rewards/margins": 0.12160167843103409, "rewards/rejected": -0.26211604475975037, "step": 548 }, { "epoch": 1.5136347946151192, "grad_norm": 0.18587139248847961, "learning_rate": 2.9069092122266758e-06, "log_odds_chosen": 1.2054380178451538, "log_odds_ratio": -0.26665008068084717, "logits/chosen": -0.32703065872192383, "logits/rejected": -1.3216447830200195, "logps/chosen": -1.4081414937973022, "logps/rejected": -2.42374324798584, "loss": 1.4502, "nll_loss": 1.4235265254974365, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408141553401947, "rewards/margins": 0.10156016796827316, "rewards/rejected": -0.24237433075904846, "step": 549 }, { "epoch": 1.5163962720055228, "grad_norm": 0.18221275508403778, "learning_rate": 2.8989997375834485e-06, "log_odds_chosen": 1.188881754875183, "log_odds_ratio": -0.280866414308548, "logits/chosen": -0.4296250343322754, "logits/rejected": -1.2531516551971436, "logps/chosen": -1.4153172969818115, "logps/rejected": -2.4211583137512207, "loss": 1.4616, "nll_loss": 1.4335592985153198, "rewards/accuracies": 1.0, "rewards/chosen": -0.14153173565864563, "rewards/margins": 0.10058411955833435, "rewards/rejected": -0.24211585521697998, "step": 550 }, { "epoch": 1.519157749395927, "grad_norm": 0.18981464207172394, "learning_rate": 2.8910861626005774e-06, "log_odds_chosen": 1.1493034362792969, "log_odds_ratio": -0.28904151916503906, "logits/chosen": -0.4802493751049042, "logits/rejected": -1.2575275897979736, "logps/chosen": -1.4307961463928223, "logps/rejected": -2.402924060821533, "loss": 1.4782, "nll_loss": 1.4493383169174194, "rewards/accuracies": 1.0, "rewards/chosen": -0.14307962357997894, "rewards/margins": 0.0972127765417099, "rewards/rejected": -0.24029240012168884, "step": 551 }, { "epoch": 1.5219192267863306, "grad_norm": 0.20524165034294128, "learning_rate": 2.8831685686022897e-06, "log_odds_chosen": 1.141466736793518, "log_odds_ratio": -0.28858447074890137, "logits/chosen": -0.3784864842891693, "logits/rejected": -1.2423912286758423, "logps/chosen": -1.4606361389160156, "logps/rejected": -2.4307382106781006, "loss": 1.5012, "nll_loss": 1.472370982170105, "rewards/accuracies": 1.0, "rewards/chosen": -0.14606362581253052, "rewards/margins": 0.09701021760702133, "rewards/rejected": -0.24307383596897125, "step": 552 }, { "epoch": 1.5246807041767345, "grad_norm": 0.2044171839952469, "learning_rate": 2.8752470369541152e-06, "log_odds_chosen": 1.0985397100448608, "log_odds_ratio": -0.30126702785491943, "logits/chosen": -0.481227308511734, "logits/rejected": -1.1952636241912842, "logps/chosen": -1.4951497316360474, "logps/rejected": -2.4360949993133545, "loss": 1.5379, "nll_loss": 1.5077705383300781, "rewards/accuracies": 1.0, "rewards/chosen": -0.14951497316360474, "rewards/margins": 0.09409452974796295, "rewards/rejected": -0.2436094880104065, "step": 553 }, { "epoch": 1.5274421815671384, "grad_norm": 0.18418313562870026, "learning_rate": 2.8673216490620453e-06, "log_odds_chosen": 1.043046236038208, "log_odds_ratio": -0.30748340487480164, "logits/chosen": -0.37187397480010986, "logits/rejected": -1.1451232433319092, "logps/chosen": -1.4433115720748901, "logps/rejected": -2.3221161365509033, "loss": 1.4913, "nll_loss": 1.4605624675750732, "rewards/accuracies": 1.0, "rewards/chosen": -0.144331157207489, "rewards/margins": 0.0878804624080658, "rewards/rejected": -0.232211634516716, "step": 554 }, { "epoch": 1.5302036589575423, "grad_norm": 0.20266424119472504, "learning_rate": 2.859392486371705e-06, "log_odds_chosen": 1.1158493757247925, "log_odds_ratio": -0.3089603781700134, "logits/chosen": -0.4935527443885803, "logits/rejected": -1.1026583909988403, "logps/chosen": -1.4476503133773804, "logps/rejected": -2.402031421661377, "loss": 1.4953, "nll_loss": 1.4643837213516235, "rewards/accuracies": 1.0, "rewards/chosen": -0.14476501941680908, "rewards/margins": 0.09543813019990921, "rewards/rejected": -0.24020317196846008, "step": 555 }, { "epoch": 1.5329651363479462, "grad_norm": 0.20939421653747559, "learning_rate": 2.8514596303675073e-06, "log_odds_chosen": 1.337761640548706, "log_odds_ratio": -0.24065151810646057, "logits/chosen": -0.44341740012168884, "logits/rejected": -1.2423213720321655, "logps/chosen": -1.4196741580963135, "logps/rejected": -2.5635533332824707, "loss": 1.458, "nll_loss": 1.4339191913604736, "rewards/accuracies": 1.0, "rewards/chosen": -0.14196740090847015, "rewards/margins": 0.11438792198896408, "rewards/rejected": -0.256355345249176, "step": 556 }, { "epoch": 1.53572661373835, "grad_norm": 0.19183450937271118, "learning_rate": 2.8435231625718242e-06, "log_odds_chosen": 1.2861474752426147, "log_odds_ratio": -0.2519036829471588, "logits/chosen": -0.3986874222755432, "logits/rejected": -1.4165081977844238, "logps/chosen": -1.4597793817520142, "logps/rejected": -2.5625228881835938, "loss": 1.4835, "nll_loss": 1.4582821130752563, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459779441356659, "rewards/margins": 0.11027435213327408, "rewards/rejected": -0.2562522888183594, "step": 557 }, { "epoch": 1.538488091128754, "grad_norm": 0.1966404765844345, "learning_rate": 2.835583164544139e-06, "log_odds_chosen": 1.1652964353561401, "log_odds_ratio": -0.28210559487342834, "logits/chosen": -0.34424522519111633, "logits/rejected": -1.2208619117736816, "logps/chosen": -1.432441234588623, "logps/rejected": -2.421638250350952, "loss": 1.4784, "nll_loss": 1.4502085447311401, "rewards/accuracies": 1.0, "rewards/chosen": -0.14324413239955902, "rewards/margins": 0.09891966730356216, "rewards/rejected": -0.24216382205486298, "step": 558 }, { "epoch": 1.5412495685191576, "grad_norm": 0.20868726074695587, "learning_rate": 2.827639717880218e-06, "log_odds_chosen": 1.0152394771575928, "log_odds_ratio": -0.3130377233028412, "logits/chosen": -0.4198133945465088, "logits/rejected": -1.3799936771392822, "logps/chosen": -1.483889102935791, "logps/rejected": -2.3396737575531006, "loss": 1.5087, "nll_loss": 1.477385401725769, "rewards/accuracies": 1.0, "rewards/chosen": -0.14838890731334686, "rewards/margins": 0.08557846397161484, "rewards/rejected": -0.2339673787355423, "step": 559 }, { "epoch": 1.5440110459095617, "grad_norm": 0.20906363427639008, "learning_rate": 2.8196929042112652e-06, "log_odds_chosen": 0.9060257077217102, "log_odds_ratio": -0.3446670472621918, "logits/chosen": -0.37944304943084717, "logits/rejected": -1.105902910232544, "logps/chosen": -1.5224838256835938, "logps/rejected": -2.288818836212158, "loss": 1.5646, "nll_loss": 1.530143141746521, "rewards/accuracies": 1.0, "rewards/chosen": -0.15224838256835938, "rewards/margins": 0.07663349062204361, "rewards/rejected": -0.22888188064098358, "step": 560 }, { "epoch": 1.5467725232999654, "grad_norm": 0.1921614706516266, "learning_rate": 2.811742805203087e-06, "log_odds_chosen": 1.2298061847686768, "log_odds_ratio": -0.2589764893054962, "logits/chosen": -0.45182332396507263, "logits/rejected": -1.330810785293579, "logps/chosen": -1.4006352424621582, "logps/rejected": -2.4393558502197266, "loss": 1.4415, "nll_loss": 1.4156479835510254, "rewards/accuracies": 1.0, "rewards/chosen": -0.14006352424621582, "rewards/margins": 0.10387204587459564, "rewards/rejected": -0.24393558502197266, "step": 561 }, { "epoch": 1.5495340006903695, "grad_norm": 0.19811220467090607, "learning_rate": 2.8037895025552513e-06, "log_odds_chosen": 0.9634622931480408, "log_odds_ratio": -0.32694584131240845, "logits/chosen": -0.39885398745536804, "logits/rejected": -1.2232179641723633, "logps/chosen": -1.5278772115707397, "logps/rejected": -2.345613956451416, "loss": 1.5515, "nll_loss": 1.518759846687317, "rewards/accuracies": 1.0, "rewards/chosen": -0.1527877300977707, "rewards/margins": 0.08177367597818375, "rewards/rejected": -0.23456138372421265, "step": 562 }, { "epoch": 1.5522954780807732, "grad_norm": 0.18278725445270538, "learning_rate": 2.7958330780002472e-06, "log_odds_chosen": 1.1775528192520142, "log_odds_ratio": -0.27846190333366394, "logits/chosen": -0.4758724272251129, "logits/rejected": -1.1088275909423828, "logps/chosen": -1.448551893234253, "logps/rejected": -2.4480299949645996, "loss": 1.4855, "nll_loss": 1.4576462507247925, "rewards/accuracies": 1.0, "rewards/chosen": -0.14485520124435425, "rewards/margins": 0.09994781762361526, "rewards/rejected": -0.2448030263185501, "step": 563 }, { "epoch": 1.555056955471177, "grad_norm": 0.2877790033817291, "learning_rate": 2.787873613302649e-06, "log_odds_chosen": 1.0789649486541748, "log_odds_ratio": -0.29869213700294495, "logits/chosen": -0.3708610534667969, "logits/rejected": -1.1095765829086304, "logps/chosen": -1.587816834449768, "logps/rejected": -2.5128860473632812, "loss": 1.6136, "nll_loss": 1.583752989768982, "rewards/accuracies": 1.0, "rewards/chosen": -0.15878169238567352, "rewards/margins": 0.09250693768262863, "rewards/rejected": -0.25128865242004395, "step": 564 }, { "epoch": 1.557818432861581, "grad_norm": 0.18686001002788544, "learning_rate": 2.7799111902582697e-06, "log_odds_chosen": 1.2737551927566528, "log_odds_ratio": -0.25510525703430176, "logits/chosen": -0.4066820740699768, "logits/rejected": -1.183882236480713, "logps/chosen": -1.4138309955596924, "logps/rejected": -2.4978373050689697, "loss": 1.4509, "nll_loss": 1.4253870248794556, "rewards/accuracies": 1.0, "rewards/chosen": -0.1413831114768982, "rewards/margins": 0.1084006279706955, "rewards/rejected": -0.24978375434875488, "step": 565 }, { "epoch": 1.5605799102519848, "grad_norm": 0.1999645084142685, "learning_rate": 2.7719458906933277e-06, "log_odds_chosen": 1.0915559530258179, "log_odds_ratio": -0.29378724098205566, "logits/chosen": -0.4203072190284729, "logits/rejected": -1.252396583557129, "logps/chosen": -1.4182379245758057, "logps/rejected": -2.3360824584960938, "loss": 1.4624, "nll_loss": 1.4330657720565796, "rewards/accuracies": 1.0, "rewards/chosen": -0.14182378351688385, "rewards/margins": 0.09178448468446732, "rewards/rejected": -0.23360827565193176, "step": 566 }, { "epoch": 1.5633413876423887, "grad_norm": 0.1945320963859558, "learning_rate": 2.763977796463599e-06, "log_odds_chosen": 1.1439540386199951, "log_odds_ratio": -0.279013991355896, "logits/chosen": -0.4679332971572876, "logits/rejected": -1.3151532411575317, "logps/chosen": -1.4525378942489624, "logps/rejected": -2.4222702980041504, "loss": 1.4773, "nll_loss": 1.4494402408599854, "rewards/accuracies": 1.0, "rewards/chosen": -0.14525380730628967, "rewards/margins": 0.0969732403755188, "rewards/rejected": -0.24222703278064728, "step": 567 }, { "epoch": 1.5661028650327924, "grad_norm": 0.22834345698356628, "learning_rate": 2.7560069894535783e-06, "log_odds_chosen": 1.0128172636032104, "log_odds_ratio": -0.31582072377204895, "logits/chosen": -0.37588873505592346, "logits/rejected": -1.1757762432098389, "logps/chosen": -1.4910950660705566, "logps/rejected": -2.349849224090576, "loss": 1.5305, "nll_loss": 1.4989250898361206, "rewards/accuracies": 1.0, "rewards/chosen": -0.14910952746868134, "rewards/margins": 0.08587540686130524, "rewards/rejected": -0.23498491942882538, "step": 568 }, { "epoch": 1.5688643424231965, "grad_norm": 0.22205309569835663, "learning_rate": 2.748033551575644e-06, "log_odds_chosen": 0.9333980083465576, "log_odds_ratio": -0.3366999924182892, "logits/chosen": -0.5289470553398132, "logits/rejected": -1.091086983680725, "logps/chosen": -1.5043820142745972, "logps/rejected": -2.2949490547180176, "loss": 1.5466, "nll_loss": 1.5129384994506836, "rewards/accuracies": 1.0, "rewards/chosen": -0.15043820440769196, "rewards/margins": 0.07905671000480652, "rewards/rejected": -0.22949492931365967, "step": 569 }, { "epoch": 1.5716258198136002, "grad_norm": 0.20217594504356384, "learning_rate": 2.7400575647692046e-06, "log_odds_chosen": 1.029040813446045, "log_odds_ratio": -0.3093741536140442, "logits/chosen": -0.45937010645866394, "logits/rejected": -1.3903661966323853, "logps/chosen": -1.519418716430664, "logps/rejected": -2.3968727588653564, "loss": 1.5564, "nll_loss": 1.525468111038208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15194186568260193, "rewards/margins": 0.08774541318416595, "rewards/rejected": -0.2396872639656067, "step": 570 }, { "epoch": 1.5743872972040043, "grad_norm": 0.1940579116344452, "learning_rate": 2.7320791109998655e-06, "log_odds_chosen": 1.1399390697479248, "log_odds_ratio": -0.2861974239349365, "logits/chosen": -0.6214785575866699, "logits/rejected": -1.4372279644012451, "logps/chosen": -1.4143788814544678, "logps/rejected": -2.368788003921509, "loss": 1.4472, "nll_loss": 1.4185714721679688, "rewards/accuracies": 1.0, "rewards/chosen": -0.14143788814544678, "rewards/margins": 0.09544092416763306, "rewards/rejected": -0.23687879741191864, "step": 571 }, { "epoch": 1.577148774594408, "grad_norm": 0.20230019092559814, "learning_rate": 2.724098272258584e-06, "log_odds_chosen": 1.1091217994689941, "log_odds_ratio": -0.29568183422088623, "logits/chosen": -0.47217854857444763, "logits/rejected": -1.1805694103240967, "logps/chosen": -1.407993197441101, "logps/rejected": -2.340590238571167, "loss": 1.4511, "nll_loss": 1.4215528964996338, "rewards/accuracies": 1.0, "rewards/chosen": -0.14079932868480682, "rewards/margins": 0.09325972199440002, "rewards/rejected": -0.23405905067920685, "step": 572 }, { "epoch": 1.5799102519848118, "grad_norm": 0.1986916959285736, "learning_rate": 2.7161151305608258e-06, "log_odds_chosen": 1.1757807731628418, "log_odds_ratio": -0.27835384011268616, "logits/chosen": -0.5039411187171936, "logits/rejected": -1.1578638553619385, "logps/chosen": -1.356804609298706, "logps/rejected": -2.339522123336792, "loss": 1.4047, "nll_loss": 1.3769071102142334, "rewards/accuracies": 1.0, "rewards/chosen": -0.13568046689033508, "rewards/margins": 0.09827174246311188, "rewards/rejected": -0.23395220935344696, "step": 573 }, { "epoch": 1.5826717293752157, "grad_norm": 0.2001020759344101, "learning_rate": 2.7081297679457238e-06, "log_odds_chosen": 1.1079169511795044, "log_odds_ratio": -0.2983851432800293, "logits/chosen": -0.5363353490829468, "logits/rejected": -1.1894265413284302, "logps/chosen": -1.3313634395599365, "logps/rejected": -2.2467293739318848, "loss": 1.3781, "nll_loss": 1.3482534885406494, "rewards/accuracies": 1.0, "rewards/chosen": -0.13313636183738708, "rewards/margins": 0.09153657406568527, "rewards/rejected": -0.22467292845249176, "step": 574 }, { "epoch": 1.5854332067656196, "grad_norm": 0.19239993393421173, "learning_rate": 2.7001422664752338e-06, "log_odds_chosen": 1.1083507537841797, "log_odds_ratio": -0.29225873947143555, "logits/chosen": -0.45637497305870056, "logits/rejected": -1.456113338470459, "logps/chosen": -1.445547342300415, "logps/rejected": -2.381343126296997, "loss": 1.4794, "nll_loss": 1.450154423713684, "rewards/accuracies": 1.0, "rewards/chosen": -0.1445547342300415, "rewards/margins": 0.09357957541942596, "rewards/rejected": -0.23813430964946747, "step": 575 }, { "epoch": 1.5881946841560235, "grad_norm": 0.2084936797618866, "learning_rate": 2.692152708233292e-06, "log_odds_chosen": 1.2013274431228638, "log_odds_ratio": -0.2678019404411316, "logits/chosen": -0.44403111934661865, "logits/rejected": -1.3234542608261108, "logps/chosen": -1.4591835737228394, "logps/rejected": -2.4847757816314697, "loss": 1.4966, "nll_loss": 1.4698439836502075, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459183543920517, "rewards/margins": 0.10255924612283707, "rewards/rejected": -0.24847759306430817, "step": 576 }, { "epoch": 1.5909561615464274, "grad_norm": 0.1804315149784088, "learning_rate": 2.684161175324971e-06, "log_odds_chosen": 1.3307889699935913, "log_odds_ratio": -0.25683870911598206, "logits/chosen": -0.4339476227760315, "logits/rejected": -1.3696343898773193, "logps/chosen": -1.3890607357025146, "logps/rejected": -2.522487163543701, "loss": 1.4172, "nll_loss": 1.3915408849716187, "rewards/accuracies": 1.0, "rewards/chosen": -0.1389060765504837, "rewards/margins": 0.11334265768527985, "rewards/rejected": -0.25224873423576355, "step": 577 }, { "epoch": 1.5937176389368313, "grad_norm": 0.1975688487291336, "learning_rate": 2.676167749875635e-06, "log_odds_chosen": 1.1008861064910889, "log_odds_ratio": -0.2976444661617279, "logits/chosen": -0.4536512494087219, "logits/rejected": -1.1986560821533203, "logps/chosen": -1.4587433338165283, "logps/rejected": -2.3928651809692383, "loss": 1.4979, "nll_loss": 1.4681648015975952, "rewards/accuracies": 1.0, "rewards/chosen": -0.14587433636188507, "rewards/margins": 0.09341219812631607, "rewards/rejected": -0.23928652703762054, "step": 578 }, { "epoch": 1.596479116327235, "grad_norm": 0.20506463944911957, "learning_rate": 2.6681725140300995e-06, "log_odds_chosen": 1.2498944997787476, "log_odds_ratio": -0.26152512431144714, "logits/chosen": -0.5536816716194153, "logits/rejected": -1.1964740753173828, "logps/chosen": -1.3689132928848267, "logps/rejected": -2.422372341156006, "loss": 1.4038, "nll_loss": 1.3776739835739136, "rewards/accuracies": 1.0, "rewards/chosen": -0.13689135015010834, "rewards/margins": 0.10534589737653732, "rewards/rejected": -0.24223724007606506, "step": 579 }, { "epoch": 1.599240593717639, "grad_norm": 0.1859833002090454, "learning_rate": 2.6601755499517826e-06, "log_odds_chosen": 1.075775384902954, "log_odds_ratio": -0.29822129011154175, "logits/chosen": -0.4594959020614624, "logits/rejected": -1.4271994829177856, "logps/chosen": -1.4741156101226807, "logps/rejected": -2.38793683052063, "loss": 1.5153, "nll_loss": 1.4854364395141602, "rewards/accuracies": 1.0, "rewards/chosen": -0.14741156995296478, "rewards/margins": 0.09138211607933044, "rewards/rejected": -0.23879368603229523, "step": 580 }, { "epoch": 1.6020020711080427, "grad_norm": 0.2031044065952301, "learning_rate": 2.6521769398218635e-06, "log_odds_chosen": 1.27201247215271, "log_odds_ratio": -0.2482789307832718, "logits/chosen": -0.4510449767112732, "logits/rejected": -1.3866052627563477, "logps/chosen": -1.4636785984039307, "logps/rejected": -2.5514018535614014, "loss": 1.4995, "nll_loss": 1.4746792316436768, "rewards/accuracies": 1.0, "rewards/chosen": -0.1463678479194641, "rewards/margins": 0.10877235233783722, "rewards/rejected": -0.25514018535614014, "step": 581 }, { "epoch": 1.6047635484984468, "grad_norm": 0.2318173348903656, "learning_rate": 2.6441767658384363e-06, "log_odds_chosen": 1.3877590894699097, "log_odds_ratio": -0.2377496063709259, "logits/chosen": -0.506435751914978, "logits/rejected": -1.2904635667800903, "logps/chosen": -1.4102269411087036, "logps/rejected": -2.5927672386169434, "loss": 1.4431, "nll_loss": 1.419339656829834, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410226821899414, "rewards/margins": 0.11825403571128845, "rewards/rejected": -0.25927671790122986, "step": 582 }, { "epoch": 1.6075250258888505, "grad_norm": 0.1891685426235199, "learning_rate": 2.6361751102156673e-06, "log_odds_chosen": 1.234060525894165, "log_odds_ratio": -0.2588406503200531, "logits/chosen": -0.5742760300636292, "logits/rejected": -1.3154078722000122, "logps/chosen": -1.476389765739441, "logps/rejected": -2.5342161655426025, "loss": 1.5065, "nll_loss": 1.480628490447998, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476389765739441, "rewards/margins": 0.10578266531229019, "rewards/rejected": -0.2534216344356537, "step": 583 }, { "epoch": 1.6102865032792544, "grad_norm": 0.19079244136810303, "learning_rate": 2.628172055182948e-06, "log_odds_chosen": 1.249068021774292, "log_odds_ratio": -0.25688526034355164, "logits/chosen": -0.4866940379142761, "logits/rejected": -1.2861042022705078, "logps/chosen": -1.412126064300537, "logps/rejected": -2.464944839477539, "loss": 1.4506, "nll_loss": 1.4248709678649902, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412126123905182, "rewards/margins": 0.10528188943862915, "rewards/rejected": -0.24649450182914734, "step": 584 }, { "epoch": 1.6130479806696583, "grad_norm": 0.2094712108373642, "learning_rate": 2.620167682984052e-06, "log_odds_chosen": 1.2959085702896118, "log_odds_ratio": -0.24370384216308594, "logits/chosen": -0.4422675669193268, "logits/rejected": -1.3102877140045166, "logps/chosen": -1.4990110397338867, "logps/rejected": -2.614492893218994, "loss": 1.5143, "nll_loss": 1.4899609088897705, "rewards/accuracies": 1.0, "rewards/chosen": -0.1499011069536209, "rewards/margins": 0.11154817044734955, "rewards/rejected": -0.26144927740097046, "step": 585 }, { "epoch": 1.6158094580600622, "grad_norm": 0.20059597492218018, "learning_rate": 2.6121620758762877e-06, "log_odds_chosen": 1.482043743133545, "log_odds_ratio": -0.2227107137441635, "logits/chosen": -0.4936378002166748, "logits/rejected": -1.332862138748169, "logps/chosen": -1.4045792818069458, "logps/rejected": -2.679506778717041, "loss": 1.4353, "nll_loss": 1.4130373001098633, "rewards/accuracies": 1.0, "rewards/chosen": -0.14045794308185577, "rewards/margins": 0.1274927407503128, "rewards/rejected": -0.2679506540298462, "step": 586 }, { "epoch": 1.618570935450466, "grad_norm": 0.19402813911437988, "learning_rate": 2.604155316129654e-06, "log_odds_chosen": 1.3943512439727783, "log_odds_ratio": -0.24305211007595062, "logits/chosen": -0.4123231768608093, "logits/rejected": -1.2003227472305298, "logps/chosen": -1.3726210594177246, "logps/rejected": -2.560669183731079, "loss": 1.4197, "nll_loss": 1.395385503768921, "rewards/accuracies": 1.0, "rewards/chosen": -0.13726209104061127, "rewards/margins": 0.11880481988191605, "rewards/rejected": -0.2560669183731079, "step": 587 }, { "epoch": 1.6213324128408697, "grad_norm": 0.20310352742671967, "learning_rate": 2.596147486025996e-06, "log_odds_chosen": 1.207950472831726, "log_odds_ratio": -0.2783939838409424, "logits/chosen": -0.4812185764312744, "logits/rejected": -1.2721320390701294, "logps/chosen": -1.4551044702529907, "logps/rejected": -2.4887349605560303, "loss": 1.4958, "nll_loss": 1.4679381847381592, "rewards/accuracies": 1.0, "rewards/chosen": -0.1455104649066925, "rewards/margins": 0.1033630222082138, "rewards/rejected": -0.2488734871149063, "step": 588 }, { "epoch": 1.6240938902312738, "grad_norm": 0.1964907944202423, "learning_rate": 2.5881386678581587e-06, "log_odds_chosen": 1.2026597261428833, "log_odds_ratio": -0.2818419635295868, "logits/chosen": -0.4697020351886749, "logits/rejected": -1.3185147047042847, "logps/chosen": -1.4676806926727295, "logps/rejected": -2.4999566078186035, "loss": 1.5004, "nll_loss": 1.4721908569335938, "rewards/accuracies": 1.0, "rewards/chosen": -0.14676807820796967, "rewards/margins": 0.10322756320238113, "rewards/rejected": -0.2499956488609314, "step": 589 }, { "epoch": 1.6268553676216775, "grad_norm": 0.1904708296060562, "learning_rate": 2.580128943929139e-06, "log_odds_chosen": 1.256077527999878, "log_odds_ratio": -0.26542928814888, "logits/chosen": -0.5716696381568909, "logits/rejected": -1.3447500467300415, "logps/chosen": -1.4825561046600342, "logps/rejected": -2.565556049346924, "loss": 1.5033, "nll_loss": 1.4767552614212036, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482556164264679, "rewards/margins": 0.10830000042915344, "rewards/rejected": -0.25655558705329895, "step": 590 }, { "epoch": 1.6296168450120816, "grad_norm": 0.20396575331687927, "learning_rate": 2.5721183965512424e-06, "log_odds_chosen": 1.2327979803085327, "log_odds_ratio": -0.26732996106147766, "logits/chosen": -0.48041045665740967, "logits/rejected": -1.3550949096679688, "logps/chosen": -1.4357949495315552, "logps/rejected": -2.487416982650757, "loss": 1.47, "nll_loss": 1.4432601928710938, "rewards/accuracies": 1.0, "rewards/chosen": -0.14357951283454895, "rewards/margins": 0.10516219586133957, "rewards/rejected": -0.24874168634414673, "step": 591 }, { "epoch": 1.6323783224024853, "grad_norm": 0.2318975031375885, "learning_rate": 2.564107108045239e-06, "log_odds_chosen": 1.1630630493164062, "log_odds_ratio": -0.2828628122806549, "logits/chosen": -0.38048335909843445, "logits/rejected": -1.4143285751342773, "logps/chosen": -1.50632643699646, "logps/rejected": -2.5033631324768066, "loss": 1.5278, "nll_loss": 1.499497890472412, "rewards/accuracies": 1.0, "rewards/chosen": -0.15063263475894928, "rewards/margins": 0.09970366954803467, "rewards/rejected": -0.25033631920814514, "step": 592 }, { "epoch": 1.6351397997928891, "grad_norm": 0.2259538769721985, "learning_rate": 2.556095160739513e-06, "log_odds_chosen": 1.3017429113388062, "log_odds_ratio": -0.24936413764953613, "logits/chosen": -0.48417800664901733, "logits/rejected": -1.272289514541626, "logps/chosen": -1.4881000518798828, "logps/rejected": -2.6138710975646973, "loss": 1.5371, "nll_loss": 1.5121734142303467, "rewards/accuracies": 1.0, "rewards/chosen": -0.1488099992275238, "rewards/margins": 0.11257712543010712, "rewards/rejected": -0.2613871097564697, "step": 593 }, { "epoch": 1.637901277183293, "grad_norm": 0.20119845867156982, "learning_rate": 2.5480826369692178e-06, "log_odds_chosen": 1.1775891780853271, "log_odds_ratio": -0.3018704354763031, "logits/chosen": -0.46723365783691406, "logits/rejected": -1.2464815378189087, "logps/chosen": -1.4256364107131958, "logps/rejected": -2.4372968673706055, "loss": 1.4709, "nll_loss": 1.440727710723877, "rewards/accuracies": 1.0, "rewards/chosen": -0.14256364107131958, "rewards/margins": 0.10116603225469589, "rewards/rejected": -0.24372969567775726, "step": 594 }, { "epoch": 1.640662754573697, "grad_norm": 0.23628610372543335, "learning_rate": 2.5400696190754347e-06, "log_odds_chosen": 1.319360375404358, "log_odds_ratio": -0.2527797818183899, "logits/chosen": -0.5433751344680786, "logits/rejected": -1.3125942945480347, "logps/chosen": -1.410939335823059, "logps/rejected": -2.5374069213867188, "loss": 1.4554, "nll_loss": 1.430107593536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.14109393954277039, "rewards/margins": 0.11264677345752716, "rewards/rejected": -0.25374072790145874, "step": 595 }, { "epoch": 1.6434242319641008, "grad_norm": 0.19188590347766876, "learning_rate": 2.532056189404318e-06, "log_odds_chosen": 1.2003345489501953, "log_odds_ratio": -0.2738706171512604, "logits/chosen": -0.5800256133079529, "logits/rejected": -1.2052563428878784, "logps/chosen": -1.3315454721450806, "logps/rejected": -2.3315844535827637, "loss": 1.3629, "nll_loss": 1.335497498512268, "rewards/accuracies": 1.0, "rewards/chosen": -0.13315454125404358, "rewards/margins": 0.1000039130449295, "rewards/rejected": -0.23315846920013428, "step": 596 }, { "epoch": 1.6461857093545047, "grad_norm": 0.2017136663198471, "learning_rate": 2.524042430306258e-06, "log_odds_chosen": 1.3254406452178955, "log_odds_ratio": -0.24632897973060608, "logits/chosen": -0.5421135425567627, "logits/rejected": -1.2799872159957886, "logps/chosen": -1.4299598932266235, "logps/rejected": -2.5672287940979004, "loss": 1.4611, "nll_loss": 1.4365150928497314, "rewards/accuracies": 1.0, "rewards/chosen": -0.14299599826335907, "rewards/margins": 0.1137268915772438, "rewards/rejected": -0.25672289729118347, "step": 597 }, { "epoch": 1.6489471867449086, "grad_norm": 0.2213965654373169, "learning_rate": 2.516028424135028e-06, "log_odds_chosen": 1.1672590970993042, "log_odds_ratio": -0.2739957273006439, "logits/chosen": -0.4646860957145691, "logits/rejected": -1.2242817878723145, "logps/chosen": -1.4266612529754639, "logps/rejected": -2.4114158153533936, "loss": 1.4773, "nll_loss": 1.449898362159729, "rewards/accuracies": 1.0, "rewards/chosen": -0.14266614615917206, "rewards/margins": 0.09847543388605118, "rewards/rejected": -0.24114155769348145, "step": 598 }, { "epoch": 1.6517086641353123, "grad_norm": 0.19725504517555237, "learning_rate": 2.5080142532469405e-06, "log_odds_chosen": 1.0459624528884888, "log_odds_ratio": -0.3061697483062744, "logits/chosen": -0.48442986607551575, "logits/rejected": -1.2821546792984009, "logps/chosen": -1.3831356763839722, "logps/rejected": -2.2537589073181152, "loss": 1.4229, "nll_loss": 1.3922457695007324, "rewards/accuracies": 1.0, "rewards/chosen": -0.13831356167793274, "rewards/margins": 0.08706232905387878, "rewards/rejected": -0.22537589073181152, "step": 599 }, { "epoch": 1.6544701415257164, "grad_norm": 0.19101086258888245, "learning_rate": 2.5e-06, "log_odds_chosen": 1.2026180028915405, "log_odds_ratio": -0.26589304208755493, "logits/chosen": -0.4705696702003479, "logits/rejected": -1.344178318977356, "logps/chosen": -1.4015612602233887, "logps/rejected": -2.415797472000122, "loss": 1.4376, "nll_loss": 1.4110187292099, "rewards/accuracies": 1.0, "rewards/chosen": -0.1401561200618744, "rewards/margins": 0.10142363607883453, "rewards/rejected": -0.24157977104187012, "step": 600 }, { "epoch": 1.65723161891612, "grad_norm": 0.25608527660369873, "learning_rate": 2.4919857467530608e-06, "log_odds_chosen": 1.3867363929748535, "log_odds_ratio": -0.22609145939350128, "logits/chosen": -0.5900862216949463, "logits/rejected": -1.3379616737365723, "logps/chosen": -1.4174102544784546, "logps/rejected": -2.6021459102630615, "loss": 1.4574, "nll_loss": 1.4347683191299438, "rewards/accuracies": 1.0, "rewards/chosen": -0.14174103736877441, "rewards/margins": 0.11847357451915741, "rewards/rejected": -0.26021459698677063, "step": 601 }, { "epoch": 1.6599930963065241, "grad_norm": 0.2190130352973938, "learning_rate": 2.4839715758649724e-06, "log_odds_chosen": 1.1793828010559082, "log_odds_ratio": -0.2720653712749481, "logits/chosen": -0.46051257848739624, "logits/rejected": -1.3344554901123047, "logps/chosen": -1.5085346698760986, "logps/rejected": -2.5193119049072266, "loss": 1.516, "nll_loss": 1.48884117603302, "rewards/accuracies": 1.0, "rewards/chosen": -0.1508534699678421, "rewards/margins": 0.10107773542404175, "rewards/rejected": -0.25193122029304504, "step": 602 }, { "epoch": 1.6627545736969278, "grad_norm": 0.20978617668151855, "learning_rate": 2.475957569693742e-06, "log_odds_chosen": 1.3839938640594482, "log_odds_ratio": -0.22898712754249573, "logits/chosen": -0.5034874081611633, "logits/rejected": -1.3480463027954102, "logps/chosen": -1.4645452499389648, "logps/rejected": -2.6591544151306152, "loss": 1.4951, "nll_loss": 1.4721866846084595, "rewards/accuracies": 1.0, "rewards/chosen": -0.14645451307296753, "rewards/margins": 0.11946091055870056, "rewards/rejected": -0.2659154534339905, "step": 603 }, { "epoch": 1.6655160510873317, "grad_norm": 0.21475645899772644, "learning_rate": 2.467943810595682e-06, "log_odds_chosen": 0.9173645973205566, "log_odds_ratio": -0.34221386909484863, "logits/chosen": -0.6114155650138855, "logits/rejected": -1.0889524221420288, "logps/chosen": -1.453405737876892, "logps/rejected": -2.221648693084717, "loss": 1.4869, "nll_loss": 1.452643871307373, "rewards/accuracies": 1.0, "rewards/chosen": -0.14534059166908264, "rewards/margins": 0.07682427763938904, "rewards/rejected": -0.22216485440731049, "step": 604 }, { "epoch": 1.6682775284777356, "grad_norm": 0.2003358155488968, "learning_rate": 2.459930380924566e-06, "log_odds_chosen": 1.2651866674423218, "log_odds_ratio": -0.25426802039146423, "logits/chosen": -0.4258677363395691, "logits/rejected": -1.2574172019958496, "logps/chosen": -1.444228172302246, "logps/rejected": -2.523735523223877, "loss": 1.4812, "nll_loss": 1.4557747840881348, "rewards/accuracies": 1.0, "rewards/chosen": -0.14442281424999237, "rewards/margins": 0.10795073211193085, "rewards/rejected": -0.2523735463619232, "step": 605 }, { "epoch": 1.6710390058681395, "grad_norm": 0.21021923422813416, "learning_rate": 2.4519173630307826e-06, "log_odds_chosen": 1.1628345251083374, "log_odds_ratio": -0.2822219133377075, "logits/chosen": -0.5015078783035278, "logits/rejected": -1.1216881275177002, "logps/chosen": -1.3848209381103516, "logps/rejected": -2.359142780303955, "loss": 1.4154, "nll_loss": 1.3871599435806274, "rewards/accuracies": 1.0, "rewards/chosen": -0.13848209381103516, "rewards/margins": 0.09743218868970871, "rewards/rejected": -0.23591428995132446, "step": 606 }, { "epoch": 1.6738004832585434, "grad_norm": 0.1923941969871521, "learning_rate": 2.443904839260488e-06, "log_odds_chosen": 1.3123136758804321, "log_odds_ratio": -0.24494773149490356, "logits/chosen": -0.47081807255744934, "logits/rejected": -1.412470817565918, "logps/chosen": -1.4636693000793457, "logps/rejected": -2.589008331298828, "loss": 1.4923, "nll_loss": 1.4678207635879517, "rewards/accuracies": 1.0, "rewards/chosen": -0.1463669389486313, "rewards/margins": 0.11253391206264496, "rewards/rejected": -0.25890082120895386, "step": 607 }, { "epoch": 1.676561960648947, "grad_norm": 0.22025130689144135, "learning_rate": 2.4358928919547616e-06, "log_odds_chosen": 1.247449517250061, "log_odds_ratio": -0.26593223214149475, "logits/chosen": -0.6333668231964111, "logits/rejected": -1.0267213582992554, "logps/chosen": -1.442392110824585, "logps/rejected": -2.512017011642456, "loss": 1.4899, "nll_loss": 1.4633054733276367, "rewards/accuracies": 1.0, "rewards/chosen": -0.14423921704292297, "rewards/margins": 0.10696247220039368, "rewards/rejected": -0.25120168924331665, "step": 608 }, { "epoch": 1.6793234380393511, "grad_norm": 0.20500634610652924, "learning_rate": 2.427881603448758e-06, "log_odds_chosen": 1.4188507795333862, "log_odds_ratio": -0.22846895456314087, "logits/chosen": -0.4928903579711914, "logits/rejected": -1.30084228515625, "logps/chosen": -1.3443284034729004, "logps/rejected": -2.542612075805664, "loss": 1.3963, "nll_loss": 1.3734071254730225, "rewards/accuracies": 1.0, "rewards/chosen": -0.1344328373670578, "rewards/margins": 0.11982835084199905, "rewards/rejected": -0.25426119565963745, "step": 609 }, { "epoch": 1.6820849154297548, "grad_norm": 0.17917676270008087, "learning_rate": 2.4198710560708623e-06, "log_odds_chosen": 1.4775829315185547, "log_odds_ratio": -0.21497662365436554, "logits/chosen": -0.5062185525894165, "logits/rejected": -1.4588205814361572, "logps/chosen": -1.4291479587554932, "logps/rejected": -2.6995742321014404, "loss": 1.4751, "nll_loss": 1.4535646438598633, "rewards/accuracies": 1.0, "rewards/chosen": -0.142914816737175, "rewards/margins": 0.12704265117645264, "rewards/rejected": -0.26995745301246643, "step": 610 }, { "epoch": 1.684846392820159, "grad_norm": 0.20908141136169434, "learning_rate": 2.411861332141842e-06, "log_odds_chosen": 1.3848119974136353, "log_odds_ratio": -0.2302837073802948, "logits/chosen": -0.5499736666679382, "logits/rejected": -1.3162180185317993, "logps/chosen": -1.345915675163269, "logps/rejected": -2.5131568908691406, "loss": 1.3779, "nll_loss": 1.3548433780670166, "rewards/accuracies": 1.0, "rewards/chosen": -0.13459156453609467, "rewards/margins": 0.11672413349151611, "rewards/rejected": -0.251315712928772, "step": 611 }, { "epoch": 1.6876078702105626, "grad_norm": 0.2531079053878784, "learning_rate": 2.403852513974004e-06, "log_odds_chosen": 1.3185536861419678, "log_odds_ratio": -0.2559550702571869, "logits/chosen": -0.418059378862381, "logits/rejected": -1.0699851512908936, "logps/chosen": -1.5014467239379883, "logps/rejected": -2.6189117431640625, "loss": 1.5026, "nll_loss": 1.4769723415374756, "rewards/accuracies": 1.0, "rewards/chosen": -0.15014466643333435, "rewards/margins": 0.11174651235342026, "rewards/rejected": -0.2618911862373352, "step": 612 }, { "epoch": 1.6903693476009665, "grad_norm": 0.20742389559745789, "learning_rate": 2.3958446838703462e-06, "log_odds_chosen": 1.4952136278152466, "log_odds_ratio": -0.2095656543970108, "logits/chosen": -0.4312644898891449, "logits/rejected": -1.4026414155960083, "logps/chosen": -1.4566928148269653, "logps/rejected": -2.752230167388916, "loss": 1.471, "nll_loss": 1.4500129222869873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14566928148269653, "rewards/margins": 0.12955373525619507, "rewards/rejected": -0.275223046541214, "step": 613 }, { "epoch": 1.6931308249913704, "grad_norm": 0.19029247760772705, "learning_rate": 2.3878379241237136e-06, "log_odds_chosen": 1.1128027439117432, "log_odds_ratio": -0.2920597791671753, "logits/chosen": -0.5362916588783264, "logits/rejected": -1.0783685445785522, "logps/chosen": -1.4788368940353394, "logps/rejected": -2.426758050918579, "loss": 1.5086, "nll_loss": 1.4793455600738525, "rewards/accuracies": 1.0, "rewards/chosen": -0.14788369834423065, "rewards/margins": 0.09479210525751114, "rewards/rejected": -0.2426757961511612, "step": 614 }, { "epoch": 1.6958923023817742, "grad_norm": 0.19316430389881134, "learning_rate": 2.3798323170159487e-06, "log_odds_chosen": 1.215958833694458, "log_odds_ratio": -0.2683961093425751, "logits/chosen": -0.5434881448745728, "logits/rejected": -1.3635523319244385, "logps/chosen": -1.4304609298706055, "logps/rejected": -2.4664320945739746, "loss": 1.4648, "nll_loss": 1.4380061626434326, "rewards/accuracies": 1.0, "rewards/chosen": -0.14304609596729279, "rewards/margins": 0.10359711199998856, "rewards/rejected": -0.24664321541786194, "step": 615 }, { "epoch": 1.6986537797721781, "grad_norm": 0.20579014718532562, "learning_rate": 2.3718279448170527e-06, "log_odds_chosen": 1.2950018644332886, "log_odds_ratio": -0.2555186152458191, "logits/chosen": -0.5487691760063171, "logits/rejected": -1.1629122495651245, "logps/chosen": -1.4710986614227295, "logps/rejected": -2.586869955062866, "loss": 1.507, "nll_loss": 1.4814012050628662, "rewards/accuracies": 1.0, "rewards/chosen": -0.14710988104343414, "rewards/margins": 0.1115771159529686, "rewards/rejected": -0.25868698954582214, "step": 616 }, { "epoch": 1.701415257162582, "grad_norm": 0.1915186047554016, "learning_rate": 2.363824889784333e-06, "log_odds_chosen": 1.3144534826278687, "log_odds_ratio": -0.2478482872247696, "logits/chosen": -0.4563327729701996, "logits/rejected": -1.3170278072357178, "logps/chosen": -1.381361961364746, "logps/rejected": -2.494753122329712, "loss": 1.4223, "nll_loss": 1.3975437879562378, "rewards/accuracies": 1.0, "rewards/chosen": -0.13813620805740356, "rewards/margins": 0.11133913695812225, "rewards/rejected": -0.24947534501552582, "step": 617 }, { "epoch": 1.704176734552986, "grad_norm": 0.2174552083015442, "learning_rate": 2.3558232341615645e-06, "log_odds_chosen": 1.1446022987365723, "log_odds_ratio": -0.28423556685447693, "logits/chosen": -0.4333910346031189, "logits/rejected": -1.2090650796890259, "logps/chosen": -1.4719913005828857, "logps/rejected": -2.4457285404205322, "loss": 1.5174, "nll_loss": 1.4889425039291382, "rewards/accuracies": 1.0, "rewards/chosen": -0.1471991240978241, "rewards/margins": 0.09737375378608704, "rewards/rejected": -0.24457289278507233, "step": 618 }, { "epoch": 1.7069382119433896, "grad_norm": 0.19920134544372559, "learning_rate": 2.3478230601781374e-06, "log_odds_chosen": 1.2477004528045654, "log_odds_ratio": -0.261337548494339, "logits/chosen": -0.6517589688301086, "logits/rejected": -1.3488858938217163, "logps/chosen": -1.452478051185608, "logps/rejected": -2.5188519954681396, "loss": 1.4912, "nll_loss": 1.46504545211792, "rewards/accuracies": 1.0, "rewards/chosen": -0.14524781703948975, "rewards/margins": 0.1066373959183693, "rewards/rejected": -0.25188520550727844, "step": 619 }, { "epoch": 1.7096996893337937, "grad_norm": 0.19521193206310272, "learning_rate": 2.339824450048218e-06, "log_odds_chosen": 1.381776213645935, "log_odds_ratio": -0.22621171176433563, "logits/chosen": -0.4401188790798187, "logits/rejected": -1.3361949920654297, "logps/chosen": -1.4716118574142456, "logps/rejected": -2.6622977256774902, "loss": 1.506, "nll_loss": 1.4834117889404297, "rewards/accuracies": 1.0, "rewards/chosen": -0.14716118574142456, "rewards/margins": 0.11906860768795013, "rewards/rejected": -0.2662297785282135, "step": 620 }, { "epoch": 1.7124611667241973, "grad_norm": 0.21328336000442505, "learning_rate": 2.331827485969901e-06, "log_odds_chosen": 1.3648643493652344, "log_odds_ratio": -0.24040046334266663, "logits/chosen": -0.5407308340072632, "logits/rejected": -1.3733265399932861, "logps/chosen": -1.4444665908813477, "logps/rejected": -2.6159818172454834, "loss": 1.4725, "nll_loss": 1.448486328125, "rewards/accuracies": 1.0, "rewards/chosen": -0.14444665610790253, "rewards/margins": 0.11715152859687805, "rewards/rejected": -0.2615981698036194, "step": 621 }, { "epoch": 1.7152226441146015, "grad_norm": 0.21624958515167236, "learning_rate": 2.323832250124365e-06, "log_odds_chosen": 1.4620325565338135, "log_odds_ratio": -0.21990615129470825, "logits/chosen": -0.5375818014144897, "logits/rejected": -1.3520660400390625, "logps/chosen": -1.4297106266021729, "logps/rejected": -2.691267490386963, "loss": 1.4527, "nll_loss": 1.4307479858398438, "rewards/accuracies": 1.0, "rewards/chosen": -0.14297106862068176, "rewards/margins": 0.12615568935871124, "rewards/rejected": -0.2691267728805542, "step": 622 }, { "epoch": 1.7179841215050051, "grad_norm": 0.2126341015100479, "learning_rate": 2.3158388246750308e-06, "log_odds_chosen": 1.093624234199524, "log_odds_ratio": -0.30036717653274536, "logits/chosen": -0.5867878794670105, "logits/rejected": -1.1208299398422241, "logps/chosen": -1.434640884399414, "logps/rejected": -2.356733560562134, "loss": 1.4736, "nll_loss": 1.443612813949585, "rewards/accuracies": 1.0, "rewards/chosen": -0.1434640735387802, "rewards/margins": 0.09220927953720093, "rewards/rejected": -0.23567335307598114, "step": 623 }, { "epoch": 1.720745598895409, "grad_norm": 0.2003176212310791, "learning_rate": 2.307847291766709e-06, "log_odds_chosen": 1.270575761795044, "log_odds_ratio": -0.2549559473991394, "logits/chosen": -0.5376480221748352, "logits/rejected": -1.3180276155471802, "logps/chosen": -1.4446300268173218, "logps/rejected": -2.5307838916778564, "loss": 1.4805, "nll_loss": 1.4549742937088013, "rewards/accuracies": 1.0, "rewards/chosen": -0.14446301758289337, "rewards/margins": 0.10861539095640182, "rewards/rejected": -0.2530784010887146, "step": 624 }, { "epoch": 1.723507076285813, "grad_norm": 0.19683247804641724, "learning_rate": 2.299857733524767e-06, "log_odds_chosen": 1.130317211151123, "log_odds_ratio": -0.28923070430755615, "logits/chosen": -0.5133277773857117, "logits/rejected": -1.3150393962860107, "logps/chosen": -1.4076216220855713, "logps/rejected": -2.3613972663879395, "loss": 1.4449, "nll_loss": 1.415968894958496, "rewards/accuracies": 1.0, "rewards/chosen": -0.14076215028762817, "rewards/margins": 0.09537756443023682, "rewards/rejected": -0.23613972961902618, "step": 625 }, { "epoch": 1.7262685536762168, "grad_norm": 0.21189947426319122, "learning_rate": 2.291870232054277e-06, "log_odds_chosen": 1.1442424058914185, "log_odds_ratio": -0.2826092541217804, "logits/chosen": -0.4401281774044037, "logits/rejected": -1.4753754138946533, "logps/chosen": -1.4552067518234253, "logps/rejected": -2.4241669178009033, "loss": 1.502, "nll_loss": 1.4737443923950195, "rewards/accuracies": 1.0, "rewards/chosen": -0.14552068710327148, "rewards/margins": 0.09689600020647049, "rewards/rejected": -0.24241667985916138, "step": 626 }, { "epoch": 1.7290300310666207, "grad_norm": 0.22946350276470184, "learning_rate": 2.283884869439175e-06, "log_odds_chosen": 1.4120537042617798, "log_odds_ratio": -0.22159436345100403, "logits/chosen": -0.5116217136383057, "logits/rejected": -1.4520902633666992, "logps/chosen": -1.516663908958435, "logps/rejected": -2.7454750537872314, "loss": 1.5292, "nll_loss": 1.5070664882659912, "rewards/accuracies": 1.0, "rewards/chosen": -0.15166638791561127, "rewards/margins": 0.12288112938404083, "rewards/rejected": -0.2745475172996521, "step": 627 }, { "epoch": 1.7317915084570243, "grad_norm": 0.20952844619750977, "learning_rate": 2.2759017277414165e-06, "log_odds_chosen": 1.6083521842956543, "log_odds_ratio": -0.19025254249572754, "logits/chosen": -0.5128480195999146, "logits/rejected": -1.405259609222412, "logps/chosen": -1.3790092468261719, "logps/rejected": -2.761821985244751, "loss": 1.4027, "nll_loss": 1.3836663961410522, "rewards/accuracies": 1.0, "rewards/chosen": -0.1379009336233139, "rewards/margins": 0.13828125596046448, "rewards/rejected": -0.2761822044849396, "step": 628 }, { "epoch": 1.7345529858474285, "grad_norm": 0.20528221130371094, "learning_rate": 2.267920889000135e-06, "log_odds_chosen": 1.1331124305725098, "log_odds_ratio": -0.2840345501899719, "logits/chosen": -0.48978346586227417, "logits/rejected": -1.3469130992889404, "logps/chosen": -1.4904502630233765, "logps/rejected": -2.4590048789978027, "loss": 1.5178, "nll_loss": 1.4894224405288696, "rewards/accuracies": 1.0, "rewards/chosen": -0.14904503524303436, "rewards/margins": 0.09685546159744263, "rewards/rejected": -0.2459004819393158, "step": 629 }, { "epoch": 1.7373144632378321, "grad_norm": 0.21817800402641296, "learning_rate": 2.2599424352307958e-06, "log_odds_chosen": 1.4032493829727173, "log_odds_ratio": -0.22257739305496216, "logits/chosen": -0.5719871520996094, "logits/rejected": -1.3948708772659302, "logps/chosen": -1.295206904411316, "logps/rejected": -2.467498779296875, "loss": 1.3485, "nll_loss": 1.326221227645874, "rewards/accuracies": 1.0, "rewards/chosen": -0.12952068448066711, "rewards/margins": 0.11722921580076218, "rewards/rejected": -0.2467498928308487, "step": 630 }, { "epoch": 1.7400759406282362, "grad_norm": 0.200408473610878, "learning_rate": 2.2519664484243564e-06, "log_odds_chosen": 1.247944951057434, "log_odds_ratio": -0.26303768157958984, "logits/chosen": -0.5529355406761169, "logits/rejected": -1.2141852378845215, "logps/chosen": -1.4364863634109497, "logps/rejected": -2.4989378452301025, "loss": 1.4599, "nll_loss": 1.4335633516311646, "rewards/accuracies": 1.0, "rewards/chosen": -0.14364862442016602, "rewards/margins": 0.10624517500400543, "rewards/rejected": -0.24989379942417145, "step": 631 }, { "epoch": 1.74283741801864, "grad_norm": 0.1950322389602661, "learning_rate": 2.243993010546422e-06, "log_odds_chosen": 1.312355875968933, "log_odds_ratio": -0.2571851909160614, "logits/chosen": -0.5099678039550781, "logits/rejected": -1.0664350986480713, "logps/chosen": -1.4604507684707642, "logps/rejected": -2.5883612632751465, "loss": 1.4882, "nll_loss": 1.4624412059783936, "rewards/accuracies": 1.0, "rewards/chosen": -0.14604508876800537, "rewards/margins": 0.1127910241484642, "rewards/rejected": -0.25883612036705017, "step": 632 }, { "epoch": 1.745598895409044, "grad_norm": 0.2070559412240982, "learning_rate": 2.2360222035364027e-06, "log_odds_chosen": 1.1783565282821655, "log_odds_ratio": -0.27288612723350525, "logits/chosen": -0.49306997656822205, "logits/rejected": -1.191769003868103, "logps/chosen": -1.4389595985412598, "logps/rejected": -2.4384591579437256, "loss": 1.4741, "nll_loss": 1.4468226432800293, "rewards/accuracies": 1.0, "rewards/chosen": -0.1438959836959839, "rewards/margins": 0.09994994103908539, "rewards/rejected": -0.24384590983390808, "step": 633 }, { "epoch": 1.7483603727994477, "grad_norm": 0.19269593060016632, "learning_rate": 2.228054109306673e-06, "log_odds_chosen": 1.489130973815918, "log_odds_ratio": -0.21353797614574432, "logits/chosen": -0.5342303514480591, "logits/rejected": -1.532766342163086, "logps/chosen": -1.4256529808044434, "logps/rejected": -2.711120128631592, "loss": 1.452, "nll_loss": 1.4306724071502686, "rewards/accuracies": 1.0, "rewards/chosen": -0.1425653100013733, "rewards/margins": 0.12854671478271484, "rewards/rejected": -0.27111202478408813, "step": 634 }, { "epoch": 1.7511218501898516, "grad_norm": 0.2160586714744568, "learning_rate": 2.2200888097417308e-06, "log_odds_chosen": 1.392540454864502, "log_odds_ratio": -0.22759199142456055, "logits/chosen": -0.5508123636245728, "logits/rejected": -1.3161637783050537, "logps/chosen": -1.459984302520752, "logps/rejected": -2.658564329147339, "loss": 1.5019, "nll_loss": 1.4791315793991089, "rewards/accuracies": 1.0, "rewards/chosen": -0.14599843323230743, "rewards/margins": 0.11985800415277481, "rewards/rejected": -0.26585644483566284, "step": 635 }, { "epoch": 1.7538833275802554, "grad_norm": 0.21206967532634735, "learning_rate": 2.212126386697352e-06, "log_odds_chosen": 1.169439435005188, "log_odds_ratio": -0.2783280909061432, "logits/chosen": -0.5745557546615601, "logits/rejected": -1.096634864807129, "logps/chosen": -1.4884278774261475, "logps/rejected": -2.4870123863220215, "loss": 1.5165, "nll_loss": 1.4886353015899658, "rewards/accuracies": 1.0, "rewards/chosen": -0.14884279668331146, "rewards/margins": 0.09985843300819397, "rewards/rejected": -0.24870122969150543, "step": 636 }, { "epoch": 1.7566448049706593, "grad_norm": 0.19732512533664703, "learning_rate": 2.204166921999753e-06, "log_odds_chosen": 1.3438539505004883, "log_odds_ratio": -0.2363303005695343, "logits/chosen": -0.49269139766693115, "logits/rejected": -1.3638474941253662, "logps/chosen": -1.421623706817627, "logps/rejected": -2.560853958129883, "loss": 1.4491, "nll_loss": 1.4254412651062012, "rewards/accuracies": 1.0, "rewards/chosen": -0.14216238260269165, "rewards/margins": 0.11392302811145782, "rewards/rejected": -0.25608542561531067, "step": 637 }, { "epoch": 1.7594062823610632, "grad_norm": 0.2325635850429535, "learning_rate": 2.196210497444749e-06, "log_odds_chosen": 1.4335474967956543, "log_odds_ratio": -0.21837694942951202, "logits/chosen": -0.6258276104927063, "logits/rejected": -1.4965717792510986, "logps/chosen": -1.460412621498108, "logps/rejected": -2.6993117332458496, "loss": 1.4758, "nll_loss": 1.453973412513733, "rewards/accuracies": 1.0, "rewards/chosen": -0.14604127407073975, "rewards/margins": 0.12388992309570312, "rewards/rejected": -0.2699311673641205, "step": 638 }, { "epoch": 1.7621677597514669, "grad_norm": 0.20603640377521515, "learning_rate": 2.1882571947969134e-06, "log_odds_chosen": 1.176297903060913, "log_odds_ratio": -0.2788979709148407, "logits/chosen": -0.5073390603065491, "logits/rejected": -1.2635929584503174, "logps/chosen": -1.4227516651153564, "logps/rejected": -2.4186222553253174, "loss": 1.46, "nll_loss": 1.4320876598358154, "rewards/accuracies": 1.0, "rewards/chosen": -0.14227518439292908, "rewards/margins": 0.09958705306053162, "rewards/rejected": -0.2418622374534607, "step": 639 }, { "epoch": 1.764929237141871, "grad_norm": 0.20436997711658478, "learning_rate": 2.1803070957887348e-06, "log_odds_chosen": 1.1730990409851074, "log_odds_ratio": -0.27653899788856506, "logits/chosen": -0.4932800829410553, "logits/rejected": -1.2079682350158691, "logps/chosen": -1.3943347930908203, "logps/rejected": -2.378185272216797, "loss": 1.4255, "nll_loss": 1.3978185653686523, "rewards/accuracies": 1.0, "rewards/chosen": -0.13943347334861755, "rewards/margins": 0.0983850508928299, "rewards/rejected": -0.23781853914260864, "step": 640 }, { "epoch": 1.7676907145322747, "grad_norm": 0.19971086084842682, "learning_rate": 2.1723602821197835e-06, "log_odds_chosen": 1.2909458875656128, "log_odds_ratio": -0.251300185918808, "logits/chosen": -0.5772767066955566, "logits/rejected": -1.3081845045089722, "logps/chosen": -1.4345896244049072, "logps/rejected": -2.5330145359039307, "loss": 1.4736, "nll_loss": 1.4484513998031616, "rewards/accuracies": 1.0, "rewards/chosen": -0.14345896244049072, "rewards/margins": 0.10984249413013458, "rewards/rejected": -0.2533014416694641, "step": 641 }, { "epoch": 1.7704521919226788, "grad_norm": 0.29978740215301514, "learning_rate": 2.1644168354558623e-06, "log_odds_chosen": 1.3097952604293823, "log_odds_ratio": -0.24568939208984375, "logits/chosen": -0.5743271708488464, "logits/rejected": -1.243380069732666, "logps/chosen": -1.4992752075195312, "logps/rejected": -2.6309869289398193, "loss": 1.5258, "nll_loss": 1.5011857748031616, "rewards/accuracies": 1.0, "rewards/chosen": -0.1499275267124176, "rewards/margins": 0.11317116022109985, "rewards/rejected": -0.26309868693351746, "step": 642 }, { "epoch": 1.7732136693130824, "grad_norm": 0.23188254237174988, "learning_rate": 2.1564768374281774e-06, "log_odds_chosen": 1.5355854034423828, "log_odds_ratio": -0.20826081931591034, "logits/chosen": -0.6844518184661865, "logits/rejected": -1.5084853172302246, "logps/chosen": -1.4327890872955322, "logps/rejected": -2.7608115673065186, "loss": 1.4598, "nll_loss": 1.439016580581665, "rewards/accuracies": 1.0, "rewards/chosen": -0.14327891170978546, "rewards/margins": 0.13280226290225983, "rewards/rejected": -0.2760811448097229, "step": 643 }, { "epoch": 1.7759751467034863, "grad_norm": 0.2001214474439621, "learning_rate": 2.1485403696324935e-06, "log_odds_chosen": 1.2288817167282104, "log_odds_ratio": -0.25941622257232666, "logits/chosen": -0.5647374987602234, "logits/rejected": -1.275839924812317, "logps/chosen": -1.4434062242507935, "logps/rejected": -2.4897499084472656, "loss": 1.4847, "nll_loss": 1.4588059186935425, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443406343460083, "rewards/margins": 0.1046343594789505, "rewards/rejected": -0.2489749938249588, "step": 644 }, { "epoch": 1.7787366240938902, "grad_norm": 0.22045178711414337, "learning_rate": 2.140607513628296e-06, "log_odds_chosen": 1.320005178451538, "log_odds_ratio": -0.24695321917533875, "logits/chosen": -0.5557460784912109, "logits/rejected": -1.3601739406585693, "logps/chosen": -1.4880132675170898, "logps/rejected": -2.6263864040374756, "loss": 1.5197, "nll_loss": 1.4949910640716553, "rewards/accuracies": 1.0, "rewards/chosen": -0.14880132675170898, "rewards/margins": 0.11383730173110962, "rewards/rejected": -0.2626386284828186, "step": 645 }, { "epoch": 1.781498101484294, "grad_norm": 0.18967492878437042, "learning_rate": 2.1326783509379555e-06, "log_odds_chosen": 1.5548627376556396, "log_odds_ratio": -0.21647949516773224, "logits/chosen": -0.6216481924057007, "logits/rejected": -1.3868874311447144, "logps/chosen": -1.336053729057312, "logps/rejected": -2.667104959487915, "loss": 1.3806, "nll_loss": 1.359001874923706, "rewards/accuracies": 1.0, "rewards/chosen": -0.13360537588596344, "rewards/margins": 0.13310512900352478, "rewards/rejected": -0.2667105197906494, "step": 646 }, { "epoch": 1.784259578874698, "grad_norm": 0.21197842061519623, "learning_rate": 2.1247529630458856e-06, "log_odds_chosen": 1.184888482093811, "log_odds_ratio": -0.28200045228004456, "logits/chosen": -0.5845165252685547, "logits/rejected": -1.2176001071929932, "logps/chosen": -1.4788322448730469, "logps/rejected": -2.495992660522461, "loss": 1.5091, "nll_loss": 1.480862021446228, "rewards/accuracies": 1.0, "rewards/chosen": -0.14788323640823364, "rewards/margins": 0.1017160415649414, "rewards/rejected": -0.24959927797317505, "step": 647 }, { "epoch": 1.7870210562651019, "grad_norm": 0.23366686701774597, "learning_rate": 2.11683143139771e-06, "log_odds_chosen": 1.3463926315307617, "log_odds_ratio": -0.24869170784950256, "logits/chosen": -0.545903205871582, "logits/rejected": -1.2253026962280273, "logps/chosen": -1.4214776754379272, "logps/rejected": -2.5768463611602783, "loss": 1.4653, "nll_loss": 1.4404457807540894, "rewards/accuracies": 1.0, "rewards/chosen": -0.14214777946472168, "rewards/margins": 0.11553685367107391, "rewards/rejected": -0.2576846182346344, "step": 648 }, { "epoch": 1.7897825336555058, "grad_norm": 0.42823606729507446, "learning_rate": 2.1089138373994226e-06, "log_odds_chosen": 1.402082920074463, "log_odds_ratio": -0.22581687569618225, "logits/chosen": -0.5818082690238953, "logits/rejected": -1.4480851888656616, "logps/chosen": -1.487385630607605, "logps/rejected": -2.7044169902801514, "loss": 1.5324, "nll_loss": 1.509821891784668, "rewards/accuracies": 1.0, "rewards/chosen": -0.1487385630607605, "rewards/margins": 0.1217031329870224, "rewards/rejected": -0.2704416811466217, "step": 649 }, { "epoch": 1.7925440110459094, "grad_norm": 0.22044222056865692, "learning_rate": 2.1010002624165528e-06, "log_odds_chosen": 1.1276612281799316, "log_odds_ratio": -0.3008562922477722, "logits/chosen": -0.510073721408844, "logits/rejected": -1.1139882802963257, "logps/chosen": -1.516619086265564, "logps/rejected": -2.487675189971924, "loss": 1.5416, "nll_loss": 1.5114856958389282, "rewards/accuracies": 1.0, "rewards/chosen": -0.1516619175672531, "rewards/margins": 0.09710560739040375, "rewards/rejected": -0.24876752495765686, "step": 650 }, { "epoch": 1.7953054884363135, "grad_norm": 0.1925649344921112, "learning_rate": 2.0930907877733255e-06, "log_odds_chosen": 1.3609724044799805, "log_odds_ratio": -0.23300984501838684, "logits/chosen": -0.4456290304660797, "logits/rejected": -1.386549711227417, "logps/chosen": -1.4195668697357178, "logps/rejected": -2.5815658569335938, "loss": 1.4427, "nll_loss": 1.4193679094314575, "rewards/accuracies": 1.0, "rewards/chosen": -0.14195668697357178, "rewards/margins": 0.11619990319013596, "rewards/rejected": -0.25815656781196594, "step": 651 }, { "epoch": 1.7980669658267172, "grad_norm": 0.20307257771492004, "learning_rate": 2.085185494751831e-06, "log_odds_chosen": 1.4135050773620605, "log_odds_ratio": -0.23486249148845673, "logits/chosen": -0.5184406042098999, "logits/rejected": -1.2367839813232422, "logps/chosen": -1.3290120363235474, "logps/rejected": -2.525050640106201, "loss": 1.3869, "nll_loss": 1.3634486198425293, "rewards/accuracies": 1.0, "rewards/chosen": -0.13290120661258698, "rewards/margins": 0.11960387229919434, "rewards/rejected": -0.2525050640106201, "step": 652 }, { "epoch": 1.8008284432171213, "grad_norm": 0.20454271137714386, "learning_rate": 2.077284464591187e-06, "log_odds_chosen": 1.3477123975753784, "log_odds_ratio": -0.2414405345916748, "logits/chosen": -0.4554806351661682, "logits/rejected": -1.3435975313186646, "logps/chosen": -1.4703125953674316, "logps/rejected": -2.6332030296325684, "loss": 1.4933, "nll_loss": 1.469182014465332, "rewards/accuracies": 1.0, "rewards/chosen": -0.1470312774181366, "rewards/margins": 0.11628903448581696, "rewards/rejected": -0.26332032680511475, "step": 653 }, { "epoch": 1.803589920607525, "grad_norm": 0.21406574547290802, "learning_rate": 2.069387778486703e-06, "log_odds_chosen": 1.4436681270599365, "log_odds_ratio": -0.22512835264205933, "logits/chosen": -0.5911950469017029, "logits/rejected": -1.4518043994903564, "logps/chosen": -1.5536481142044067, "logps/rejected": -2.819931983947754, "loss": 1.5765, "nll_loss": 1.5539945363998413, "rewards/accuracies": 1.0, "rewards/chosen": -0.15536481142044067, "rewards/margins": 0.1266283392906189, "rewards/rejected": -0.28199315071105957, "step": 654 }, { "epoch": 1.8063513979979289, "grad_norm": 0.22096370160579681, "learning_rate": 2.0614955175890464e-06, "log_odds_chosen": 1.386144995689392, "log_odds_ratio": -0.24010394513607025, "logits/chosen": -0.5797655582427979, "logits/rejected": -1.404088020324707, "logps/chosen": -1.4838389158248901, "logps/rejected": -2.6851325035095215, "loss": 1.4997, "nll_loss": 1.4756494760513306, "rewards/accuracies": 1.0, "rewards/chosen": -0.14838388562202454, "rewards/margins": 0.12012939155101776, "rewards/rejected": -0.2685132920742035, "step": 655 }, { "epoch": 1.8091128753883328, "grad_norm": 0.20253252983093262, "learning_rate": 2.053607763003409e-06, "log_odds_chosen": 1.56206476688385, "log_odds_ratio": -0.22250880300998688, "logits/chosen": -0.5364841222763062, "logits/rejected": -1.4170889854431152, "logps/chosen": -1.4285337924957275, "logps/rejected": -2.787846088409424, "loss": 1.4693, "nll_loss": 1.4470548629760742, "rewards/accuracies": 1.0, "rewards/chosen": -0.14285339415073395, "rewards/margins": 0.13593122363090515, "rewards/rejected": -0.2787846028804779, "step": 656 }, { "epoch": 1.8118743527787367, "grad_norm": 0.20023828744888306, "learning_rate": 2.045724595788673e-06, "log_odds_chosen": 1.521580696105957, "log_odds_ratio": -0.20243754982948303, "logits/chosen": -0.5525627136230469, "logits/rejected": -1.4261201620101929, "logps/chosen": -1.418937087059021, "logps/rejected": -2.731451988220215, "loss": 1.4506, "nll_loss": 1.4303292036056519, "rewards/accuracies": 1.0, "rewards/chosen": -0.14189371466636658, "rewards/margins": 0.1312515139579773, "rewards/rejected": -0.27314525842666626, "step": 657 }, { "epoch": 1.8146358301691405, "grad_norm": 0.21408717334270477, "learning_rate": 2.037846096956578e-06, "log_odds_chosen": 1.4303852319717407, "log_odds_ratio": -0.2271844446659088, "logits/chosen": -0.5656251311302185, "logits/rejected": -1.2124847173690796, "logps/chosen": -1.4132343530654907, "logps/rejected": -2.644820213317871, "loss": 1.4516, "nll_loss": 1.4288756847381592, "rewards/accuracies": 1.0, "rewards/chosen": -0.14132341742515564, "rewards/margins": 0.12315858900547028, "rewards/rejected": -0.2644820213317871, "step": 658 }, { "epoch": 1.8173973075595442, "grad_norm": 0.19765910506248474, "learning_rate": 2.0299723474708917e-06, "log_odds_chosen": 1.2989411354064941, "log_odds_ratio": -0.25527897477149963, "logits/chosen": -0.5173696875572205, "logits/rejected": -1.2977350950241089, "logps/chosen": -1.4076229333877563, "logps/rejected": -2.513385772705078, "loss": 1.4382, "nll_loss": 1.4126545190811157, "rewards/accuracies": 1.0, "rewards/chosen": -0.14076228439807892, "rewards/margins": 0.11057627201080322, "rewards/rejected": -0.25133857131004333, "step": 659 }, { "epoch": 1.8201587849499483, "grad_norm": 0.22843243181705475, "learning_rate": 2.02210342824657e-06, "log_odds_chosen": 1.5275391340255737, "log_odds_ratio": -0.2096565067768097, "logits/chosen": -0.6402820944786072, "logits/rejected": -1.4909406900405884, "logps/chosen": -1.4253227710723877, "logps/rejected": -2.739055633544922, "loss": 1.4597, "nll_loss": 1.4387259483337402, "rewards/accuracies": 1.0, "rewards/chosen": -0.14253227412700653, "rewards/margins": 0.13137328624725342, "rewards/rejected": -0.27390557527542114, "step": 660 }, { "epoch": 1.822920262340352, "grad_norm": 0.2163650542497635, "learning_rate": 2.0142394201489334e-06, "log_odds_chosen": 1.5283253192901611, "log_odds_ratio": -0.19948820769786835, "logits/chosen": -0.5087510347366333, "logits/rejected": -1.5421096086502075, "logps/chosen": -1.448826551437378, "logps/rejected": -2.7744712829589844, "loss": 1.4652, "nll_loss": 1.4452749490737915, "rewards/accuracies": 1.0, "rewards/chosen": -0.14488264918327332, "rewards/margins": 0.1325644999742508, "rewards/rejected": -0.2774471640586853, "step": 661 }, { "epoch": 1.825681739730756, "grad_norm": 0.19599059224128723, "learning_rate": 2.0063804039928324e-06, "log_odds_chosen": 1.5170414447784424, "log_odds_ratio": -0.21490773558616638, "logits/chosen": -0.5267460942268372, "logits/rejected": -1.3850765228271484, "logps/chosen": -1.4472379684448242, "logps/rejected": -2.7692365646362305, "loss": 1.4741, "nll_loss": 1.4525951147079468, "rewards/accuracies": 1.0, "rewards/chosen": -0.1447238028049469, "rewards/margins": 0.13219986855983734, "rewards/rejected": -0.27692368626594543, "step": 662 }, { "epoch": 1.8284432171211598, "grad_norm": 0.20956429839134216, "learning_rate": 1.9985264605418185e-06, "log_odds_chosen": 1.5385915040969849, "log_odds_ratio": -0.21618826687335968, "logits/chosen": -0.610063374042511, "logits/rejected": -1.4560281038284302, "logps/chosen": -1.4275507926940918, "logps/rejected": -2.757424831390381, "loss": 1.4673, "nll_loss": 1.4456710815429688, "rewards/accuracies": 1.0, "rewards/chosen": -0.14275507628917694, "rewards/margins": 0.1329873949289322, "rewards/rejected": -0.2757425010204315, "step": 663 }, { "epoch": 1.8312046945115636, "grad_norm": 0.22745032608509064, "learning_rate": 1.9906776705073113e-06, "log_odds_chosen": 1.7066198587417603, "log_odds_ratio": -0.18275754153728485, "logits/chosen": -0.6127128005027771, "logits/rejected": -1.5824415683746338, "logps/chosen": -1.4544042348861694, "logps/rejected": -2.9532053470611572, "loss": 1.4755, "nll_loss": 1.45720374584198, "rewards/accuracies": 1.0, "rewards/chosen": -0.14544041454792023, "rewards/margins": 0.14988011121749878, "rewards/rejected": -0.2953205406665802, "step": 664 }, { "epoch": 1.8339661719019675, "grad_norm": 0.2195546180009842, "learning_rate": 1.982834114547773e-06, "log_odds_chosen": 1.419616937637329, "log_odds_ratio": -0.2214689701795578, "logits/chosen": -0.5952095985412598, "logits/rejected": -1.4947268962860107, "logps/chosen": -1.443178653717041, "logps/rejected": -2.6637141704559326, "loss": 1.4802, "nll_loss": 1.458052396774292, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443178653717041, "rewards/margins": 0.12205353379249573, "rewards/rejected": -0.26637139916419983, "step": 665 }, { "epoch": 1.8367276492923714, "grad_norm": 0.21036118268966675, "learning_rate": 1.9749958732678766e-06, "log_odds_chosen": 1.4819594621658325, "log_odds_ratio": -0.21237432956695557, "logits/chosen": -0.587083101272583, "logits/rejected": -1.478203296661377, "logps/chosen": -1.4206076860427856, "logps/rejected": -2.695976972579956, "loss": 1.4598, "nll_loss": 1.438591718673706, "rewards/accuracies": 1.0, "rewards/chosen": -0.1420607715845108, "rewards/margins": 0.12753693759441376, "rewards/rejected": -0.26959770917892456, "step": 666 }, { "epoch": 1.8394891266827753, "grad_norm": 0.22457025945186615, "learning_rate": 1.967163027217679e-06, "log_odds_chosen": 1.3330204486846924, "log_odds_ratio": -0.24896101653575897, "logits/chosen": -0.5952029228210449, "logits/rejected": -1.2656255960464478, "logps/chosen": -1.4437137842178345, "logps/rejected": -2.583770751953125, "loss": 1.4838, "nll_loss": 1.4589177370071411, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443713754415512, "rewards/margins": 0.11400569975376129, "rewards/rejected": -0.2583770751953125, "step": 667 }, { "epoch": 1.8422506040731792, "grad_norm": 0.22367823123931885, "learning_rate": 1.9593356568917916e-06, "log_odds_chosen": 1.2996011972427368, "log_odds_ratio": -0.2595880925655365, "logits/chosen": -0.58436518907547, "logits/rejected": -1.2079554796218872, "logps/chosen": -1.5410425662994385, "logps/rejected": -2.6774637699127197, "loss": 1.5623, "nll_loss": 1.5363686084747314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15410427749156952, "rewards/margins": 0.1136421337723732, "rewards/rejected": -0.2677464187145233, "step": 668 }, { "epoch": 1.845012081463583, "grad_norm": 0.19504238665103912, "learning_rate": 1.951513842728556e-06, "log_odds_chosen": 1.4380453824996948, "log_odds_ratio": -0.21663914620876312, "logits/chosen": -0.565570592880249, "logits/rejected": -1.4562053680419922, "logps/chosen": -1.4308907985687256, "logps/rejected": -2.665372848510742, "loss": 1.4631, "nll_loss": 1.441426396369934, "rewards/accuracies": 1.0, "rewards/chosen": -0.14308908581733704, "rewards/margins": 0.12344817817211151, "rewards/rejected": -0.26653724908828735, "step": 669 }, { "epoch": 1.8477735588539868, "grad_norm": 0.22681987285614014, "learning_rate": 1.9436976651092143e-06, "log_odds_chosen": 1.441846251487732, "log_odds_ratio": -0.21692335605621338, "logits/chosen": -0.6347320675849915, "logits/rejected": -1.2910839319229126, "logps/chosen": -1.4252921342849731, "logps/rejected": -2.665217399597168, "loss": 1.4627, "nll_loss": 1.4409923553466797, "rewards/accuracies": 1.0, "rewards/chosen": -0.1425292193889618, "rewards/margins": 0.12399252504110336, "rewards/rejected": -0.26652175188064575, "step": 670 }, { "epoch": 1.8505350362443909, "grad_norm": 0.2053694874048233, "learning_rate": 1.935887204357085e-06, "log_odds_chosen": 1.627618670463562, "log_odds_ratio": -0.2071387618780136, "logits/chosen": -0.5737074017524719, "logits/rejected": -1.4058080911636353, "logps/chosen": -1.4551550149917603, "logps/rejected": -2.881498098373413, "loss": 1.4788, "nll_loss": 1.4580904245376587, "rewards/accuracies": 1.0, "rewards/chosen": -0.14551550149917603, "rewards/margins": 0.142634317278862, "rewards/rejected": -0.28814980387687683, "step": 671 }, { "epoch": 1.8532965136347945, "grad_norm": 0.2068096548318863, "learning_rate": 1.928082540736737e-06, "log_odds_chosen": 1.6747455596923828, "log_odds_ratio": -0.19803184270858765, "logits/chosen": -0.5947539806365967, "logits/rejected": -1.356528639793396, "logps/chosen": -1.4107545614242554, "logps/rejected": -2.8761708736419678, "loss": 1.4539, "nll_loss": 1.4340720176696777, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410754770040512, "rewards/margins": 0.14654162526130676, "rewards/rejected": -0.2876170873641968, "step": 672 }, { "epoch": 1.8560579910251986, "grad_norm": 0.21715955436229706, "learning_rate": 1.920283754453164e-06, "log_odds_chosen": 1.6058988571166992, "log_odds_ratio": -0.19034096598625183, "logits/chosen": -0.5639494061470032, "logits/rejected": -1.437272548675537, "logps/chosen": -1.371682047843933, "logps/rejected": -2.748628616333008, "loss": 1.3959, "nll_loss": 1.3768653869628906, "rewards/accuracies": 1.0, "rewards/chosen": -0.13716822862625122, "rewards/margins": 0.13769464194774628, "rewards/rejected": -0.2748628854751587, "step": 673 }, { "epoch": 1.8588194684156023, "grad_norm": 0.2249247431755066, "learning_rate": 1.912490925650962e-06, "log_odds_chosen": 1.6531316041946411, "log_odds_ratio": -0.1766660213470459, "logits/chosen": -0.6709973216056824, "logits/rejected": -1.562389612197876, "logps/chosen": -1.4700473546981812, "logps/rejected": -2.9151792526245117, "loss": 1.4777, "nll_loss": 1.4600156545639038, "rewards/accuracies": 1.0, "rewards/chosen": -0.14700475335121155, "rewards/margins": 0.14451317489147186, "rewards/rejected": -0.2915179133415222, "step": 674 }, { "epoch": 1.8615809458060062, "grad_norm": 0.22879739105701447, "learning_rate": 1.9047041344135045e-06, "log_odds_chosen": 1.3171937465667725, "log_odds_ratio": -0.24630063772201538, "logits/chosen": -0.6080371141433716, "logits/rejected": -1.4477941989898682, "logps/chosen": -1.5433239936828613, "logps/rejected": -2.693405866622925, "loss": 1.5573, "nll_loss": 1.5326746702194214, "rewards/accuracies": 1.0, "rewards/chosen": -0.15433238446712494, "rewards/margins": 0.11500819772481918, "rewards/rejected": -0.2693405747413635, "step": 675 }, { "epoch": 1.86434242319641, "grad_norm": 0.19868780672550201, "learning_rate": 1.8969234607621187e-06, "log_odds_chosen": 1.4915571212768555, "log_odds_ratio": -0.21195626258850098, "logits/chosen": -0.6185406446456909, "logits/rejected": -1.4291622638702393, "logps/chosen": -1.3992692232131958, "logps/rejected": -2.673830509185791, "loss": 1.4368, "nll_loss": 1.4156408309936523, "rewards/accuracies": 1.0, "rewards/chosen": -0.13992692530155182, "rewards/margins": 0.12745614349842072, "rewards/rejected": -0.26738306879997253, "step": 676 }, { "epoch": 1.867103900586814, "grad_norm": 0.5184875726699829, "learning_rate": 1.8891489846552645e-06, "log_odds_chosen": 1.4404429197311401, "log_odds_ratio": -0.21510863304138184, "logits/chosen": -0.5530182123184204, "logits/rejected": -1.534920573234558, "logps/chosen": -1.343485713005066, "logps/rejected": -2.5627851486206055, "loss": 1.3929, "nll_loss": 1.3713606595993042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1343485713005066, "rewards/margins": 0.12192997336387634, "rewards/rejected": -0.25627854466438293, "step": 677 }, { "epoch": 1.8698653779772179, "grad_norm": 0.20503979921340942, "learning_rate": 1.8813807859877147e-06, "log_odds_chosen": 1.3210561275482178, "log_odds_ratio": -0.2472156435251236, "logits/chosen": -0.49510863423347473, "logits/rejected": -1.2383815050125122, "logps/chosen": -1.464887022972107, "logps/rejected": -2.601665496826172, "loss": 1.4964, "nll_loss": 1.4716849327087402, "rewards/accuracies": 1.0, "rewards/chosen": -0.14648869633674622, "rewards/margins": 0.11367785930633545, "rewards/rejected": -0.26016658544540405, "step": 678 }, { "epoch": 1.8726268553676215, "grad_norm": 0.19606317579746246, "learning_rate": 1.8736189445897268e-06, "log_odds_chosen": 1.437435507774353, "log_odds_ratio": -0.21829615533351898, "logits/chosen": -0.6662943363189697, "logits/rejected": -1.4489812850952148, "logps/chosen": -1.3239669799804688, "logps/rejected": -2.5263566970825195, "loss": 1.3693, "nll_loss": 1.3474818468093872, "rewards/accuracies": 1.0, "rewards/chosen": -0.13239668309688568, "rewards/margins": 0.12023897469043732, "rewards/rejected": -0.252635657787323, "step": 679 }, { "epoch": 1.8753883327580256, "grad_norm": 0.20712296664714813, "learning_rate": 1.865863540226232e-06, "log_odds_chosen": 1.5857123136520386, "log_odds_ratio": -0.18892855942249298, "logits/chosen": -0.5754145979881287, "logits/rejected": -1.3390053510665894, "logps/chosen": -1.3540685176849365, "logps/rejected": -2.7056896686553955, "loss": 1.394, "nll_loss": 1.3751423358917236, "rewards/accuracies": 1.0, "rewards/chosen": -0.13540685176849365, "rewards/margins": 0.1351621448993683, "rewards/rejected": -0.27056899666786194, "step": 680 }, { "epoch": 1.8781498101484293, "grad_norm": 0.20168127119541168, "learning_rate": 1.8581146525960093e-06, "log_odds_chosen": 1.3839046955108643, "log_odds_ratio": -0.23602686822414398, "logits/chosen": -0.6133604645729065, "logits/rejected": -1.264336347579956, "logps/chosen": -1.3550195693969727, "logps/rejected": -2.5316176414489746, "loss": 1.4055, "nll_loss": 1.3818817138671875, "rewards/accuracies": 1.0, "rewards/chosen": -0.13550196588039398, "rewards/margins": 0.1176597997546196, "rewards/rejected": -0.253161758184433, "step": 681 }, { "epoch": 1.8809112875388334, "grad_norm": 0.2090066522359848, "learning_rate": 1.8503723613308683e-06, "log_odds_chosen": 1.587690830230713, "log_odds_ratio": -0.2084326446056366, "logits/chosen": -0.54063481092453, "logits/rejected": -1.4373234510421753, "logps/chosen": -1.4417269229888916, "logps/rejected": -2.826479196548462, "loss": 1.4565, "nll_loss": 1.4356616735458374, "rewards/accuracies": 1.0, "rewards/chosen": -0.14417271316051483, "rewards/margins": 0.1384752094745636, "rewards/rejected": -0.28264790773391724, "step": 682 }, { "epoch": 1.883672764929237, "grad_norm": 0.1996944546699524, "learning_rate": 1.8426367459948306e-06, "log_odds_chosen": 1.5597014427185059, "log_odds_ratio": -0.20431235432624817, "logits/chosen": -0.5971145629882812, "logits/rejected": -1.4541590213775635, "logps/chosen": -1.4264284372329712, "logps/rejected": -2.7803056240081787, "loss": 1.4571, "nll_loss": 1.4366974830627441, "rewards/accuracies": 1.0, "rewards/chosen": -0.14264284074306488, "rewards/margins": 0.13538771867752075, "rewards/rejected": -0.27803054451942444, "step": 683 }, { "epoch": 1.886434242319641, "grad_norm": 0.25149136781692505, "learning_rate": 1.8349078860833125e-06, "log_odds_chosen": 1.3872942924499512, "log_odds_ratio": -0.22665993869304657, "logits/chosen": -0.6253618597984314, "logits/rejected": -1.4364534616470337, "logps/chosen": -1.4543843269348145, "logps/rejected": -2.6452903747558594, "loss": 1.4739, "nll_loss": 1.451229453086853, "rewards/accuracies": 1.0, "rewards/chosen": -0.14543844759464264, "rewards/margins": 0.11909062415361404, "rewards/rejected": -0.2645290791988373, "step": 684 }, { "epoch": 1.8891957197100449, "grad_norm": 0.21674232184886932, "learning_rate": 1.827185861022308e-06, "log_odds_chosen": 1.5947694778442383, "log_odds_ratio": -0.1962125301361084, "logits/chosen": -0.5783556699752808, "logits/rejected": -1.423490047454834, "logps/chosen": -1.4327027797698975, "logps/rejected": -2.8187496662139893, "loss": 1.4447, "nll_loss": 1.4250459671020508, "rewards/accuracies": 1.0, "rewards/chosen": -0.14327028393745422, "rewards/margins": 0.13860468566417694, "rewards/rejected": -0.28187495470046997, "step": 685 }, { "epoch": 1.8919571971004487, "grad_norm": 0.20934255421161652, "learning_rate": 1.8194707501675724e-06, "log_odds_chosen": 1.6588257551193237, "log_odds_ratio": -0.18640321493148804, "logits/chosen": -0.6773664951324463, "logits/rejected": -1.4079738855361938, "logps/chosen": -1.3775007724761963, "logps/rejected": -2.804997682571411, "loss": 1.4028, "nll_loss": 1.384152889251709, "rewards/accuracies": 1.0, "rewards/chosen": -0.1377500742673874, "rewards/margins": 0.14274966716766357, "rewards/rejected": -0.28049975633621216, "step": 686 }, { "epoch": 1.8947186744908526, "grad_norm": 0.5570221543312073, "learning_rate": 1.8117626328038069e-06, "log_odds_chosen": 1.4294530153274536, "log_odds_ratio": -0.21934457123279572, "logits/chosen": -0.5100858211517334, "logits/rejected": -1.3611512184143066, "logps/chosen": -1.4136455059051514, "logps/rejected": -2.6365740299224854, "loss": 1.4353, "nll_loss": 1.4133678674697876, "rewards/accuracies": 1.0, "rewards/chosen": -0.14136455953121185, "rewards/margins": 0.12229281663894653, "rewards/rejected": -0.2636573910713196, "step": 687 }, { "epoch": 1.8974801518812565, "grad_norm": 0.2056214064359665, "learning_rate": 1.8040615881438425e-06, "log_odds_chosen": 1.3149546384811401, "log_odds_ratio": -0.24417175352573395, "logits/chosen": -0.6643521189689636, "logits/rejected": -1.3342581987380981, "logps/chosen": -1.454761266708374, "logps/rejected": -2.584627866744995, "loss": 1.4984, "nll_loss": 1.4740098714828491, "rewards/accuracies": 1.0, "rewards/chosen": -0.14547613263130188, "rewards/margins": 0.11298668384552002, "rewards/rejected": -0.2584628164768219, "step": 688 }, { "epoch": 1.9002416292716604, "grad_norm": 0.21506081521511078, "learning_rate": 1.7963676953278292e-06, "log_odds_chosen": 1.5690062046051025, "log_odds_ratio": -0.192954882979393, "logits/chosen": -0.6318016052246094, "logits/rejected": -1.4247246980667114, "logps/chosen": -1.4502630233764648, "logps/rejected": -2.814924955368042, "loss": 1.4808, "nll_loss": 1.4614722728729248, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450263112783432, "rewards/margins": 0.13646619021892548, "rewards/rejected": -0.2814925014972687, "step": 689 }, { "epoch": 1.903003106662064, "grad_norm": 0.21637850999832153, "learning_rate": 1.7886810334224192e-06, "log_odds_chosen": 1.4747532606124878, "log_odds_ratio": -0.21638529002666473, "logits/chosen": -0.6685853004455566, "logits/rejected": -1.5218487977981567, "logps/chosen": -1.417362928390503, "logps/rejected": -2.687438488006592, "loss": 1.4488, "nll_loss": 1.4271397590637207, "rewards/accuracies": 1.0, "rewards/chosen": -0.14173629879951477, "rewards/margins": 0.12700755894184113, "rewards/rejected": -0.2687438726425171, "step": 690 }, { "epoch": 1.9057645840524682, "grad_norm": 0.19811324775218964, "learning_rate": 1.781001681419957e-06, "log_odds_chosen": 1.8503344058990479, "log_odds_ratio": -0.18576686084270477, "logits/chosen": -0.5725023746490479, "logits/rejected": -1.3703765869140625, "logps/chosen": -1.355568528175354, "logps/rejected": -2.9761404991149902, "loss": 1.3917, "nll_loss": 1.3731472492218018, "rewards/accuracies": 1.0, "rewards/chosen": -0.1355568766593933, "rewards/margins": 0.16205722093582153, "rewards/rejected": -0.29761409759521484, "step": 691 }, { "epoch": 1.9085260614428718, "grad_norm": 0.21310174465179443, "learning_rate": 1.7733297182376663e-06, "log_odds_chosen": 1.4929585456848145, "log_odds_ratio": -0.21834099292755127, "logits/chosen": -0.6685935854911804, "logits/rejected": -1.358173131942749, "logps/chosen": -1.358945608139038, "logps/rejected": -2.6349611282348633, "loss": 1.4108, "nll_loss": 1.389008641242981, "rewards/accuracies": 1.0, "rewards/chosen": -0.1358945667743683, "rewards/margins": 0.12760156393051147, "rewards/rejected": -0.26349613070487976, "step": 692 }, { "epoch": 1.911287538833276, "grad_norm": 0.27899911999702454, "learning_rate": 1.7656652227168384e-06, "log_odds_chosen": 1.52334463596344, "log_odds_ratio": -0.2038542479276657, "logits/chosen": -0.5900627374649048, "logits/rejected": -1.299729585647583, "logps/chosen": -1.4452786445617676, "logps/rejected": -2.7677462100982666, "loss": 1.4615, "nll_loss": 1.4410879611968994, "rewards/accuracies": 1.0, "rewards/chosen": -0.144527867436409, "rewards/margins": 0.13224676251411438, "rewards/rejected": -0.2767746150493622, "step": 693 }, { "epoch": 1.9140490162236796, "grad_norm": 0.23582376539707184, "learning_rate": 1.7580082736220235e-06, "log_odds_chosen": 1.6884684562683105, "log_odds_ratio": -0.18581616878509521, "logits/chosen": -0.612487256526947, "logits/rejected": -1.3952116966247559, "logps/chosen": -1.4114580154418945, "logps/rejected": -2.873319149017334, "loss": 1.4383, "nll_loss": 1.419751763343811, "rewards/accuracies": 1.0, "rewards/chosen": -0.14114582538604736, "rewards/margins": 0.14618609845638275, "rewards/rejected": -0.2873319089412689, "step": 694 }, { "epoch": 1.9168104936140835, "grad_norm": 0.23876570165157318, "learning_rate": 1.750358949640221e-06, "log_odds_chosen": 1.416676640510559, "log_odds_ratio": -0.2328736037015915, "logits/chosen": -0.6428385376930237, "logits/rejected": -1.0536079406738281, "logps/chosen": -1.3893574476242065, "logps/rejected": -2.605799674987793, "loss": 1.4294, "nll_loss": 1.4061429500579834, "rewards/accuracies": 1.0, "rewards/chosen": -0.13893577456474304, "rewards/margins": 0.12164422869682312, "rewards/rejected": -0.26058000326156616, "step": 695 }, { "epoch": 1.9195719710044874, "grad_norm": 0.19859705865383148, "learning_rate": 1.74271732938007e-06, "log_odds_chosen": 1.3677996397018433, "log_odds_ratio": -0.23403286933898926, "logits/chosen": -0.6282527446746826, "logits/rejected": -1.2883788347244263, "logps/chosen": -1.3656446933746338, "logps/rejected": -2.5232338905334473, "loss": 1.4136, "nll_loss": 1.3902291059494019, "rewards/accuracies": 1.0, "rewards/chosen": -0.1365644633769989, "rewards/margins": 0.11575894057750702, "rewards/rejected": -0.2523233890533447, "step": 696 }, { "epoch": 1.9223334483948913, "grad_norm": 0.2222430557012558, "learning_rate": 1.7350834913710414e-06, "log_odds_chosen": 1.6454081535339355, "log_odds_ratio": -0.18244285881519318, "logits/chosen": -0.6590233445167542, "logits/rejected": -1.3764431476593018, "logps/chosen": -1.3831430673599243, "logps/rejected": -2.799776077270508, "loss": 1.4158, "nll_loss": 1.397556185722351, "rewards/accuracies": 1.0, "rewards/chosen": -0.13831430673599243, "rewards/margins": 0.1416633427143097, "rewards/rejected": -0.2799776494503021, "step": 697 }, { "epoch": 1.9250949257852952, "grad_norm": 0.22371691465377808, "learning_rate": 1.7274575140626318e-06, "log_odds_chosen": 1.7362313270568848, "log_odds_ratio": -0.1699105203151703, "logits/chosen": -0.618502140045166, "logits/rejected": -1.4262280464172363, "logps/chosen": -1.3551156520843506, "logps/rejected": -2.851775884628296, "loss": 1.3825, "nll_loss": 1.3654714822769165, "rewards/accuracies": 1.0, "rewards/chosen": -0.135511577129364, "rewards/margins": 0.14966602623462677, "rewards/rejected": -0.285177618265152, "step": 698 }, { "epoch": 1.9278564031756988, "grad_norm": 0.25917214155197144, "learning_rate": 1.7198394758235576e-06, "log_odds_chosen": 1.2916865348815918, "log_odds_ratio": -0.2471284568309784, "logits/chosen": -0.7011853456497192, "logits/rejected": -1.3199775218963623, "logps/chosen": -1.433401107788086, "logps/rejected": -2.5367701053619385, "loss": 1.4593, "nll_loss": 1.4345725774765015, "rewards/accuracies": 1.0, "rewards/chosen": -0.1433401107788086, "rewards/margins": 0.11033690720796585, "rewards/rejected": -0.25367704033851624, "step": 699 }, { "epoch": 1.930617880566103, "grad_norm": 0.20016534626483917, "learning_rate": 1.7122294549409486e-06, "log_odds_chosen": 1.4515180587768555, "log_odds_ratio": -0.2157304883003235, "logits/chosen": -0.642143726348877, "logits/rejected": -1.4688291549682617, "logps/chosen": -1.424223780632019, "logps/rejected": -2.671492576599121, "loss": 1.4453, "nll_loss": 1.4237247705459595, "rewards/accuracies": 1.0, "rewards/chosen": -0.1424223780632019, "rewards/margins": 0.12472689151763916, "rewards/rejected": -0.26714926958084106, "step": 700 }, { "epoch": 1.9333793579565066, "grad_norm": 0.22846712172031403, "learning_rate": 1.704627529619543e-06, "log_odds_chosen": 1.3405659198760986, "log_odds_ratio": -0.24245545268058777, "logits/chosen": -0.6561258435249329, "logits/rejected": -1.3835619688034058, "logps/chosen": -1.4769917726516724, "logps/rejected": -2.6347930431365967, "loss": 1.5125, "nll_loss": 1.48822021484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.14769917726516724, "rewards/margins": 0.11578013002872467, "rewards/rejected": -0.2634792923927307, "step": 701 }, { "epoch": 1.9361408353469107, "grad_norm": 0.20465999841690063, "learning_rate": 1.6970337779808862e-06, "log_odds_chosen": 1.307023525238037, "log_odds_ratio": -0.2487718164920807, "logits/chosen": -0.6379750370979309, "logits/rejected": -1.3260835409164429, "logps/chosen": -1.4716124534606934, "logps/rejected": -2.5912721157073975, "loss": 1.5028, "nll_loss": 1.4779243469238281, "rewards/accuracies": 1.0, "rewards/chosen": -0.14716124534606934, "rewards/margins": 0.11196595430374146, "rewards/rejected": -0.2591271996498108, "step": 702 }, { "epoch": 1.9389023127373144, "grad_norm": 0.20917516946792603, "learning_rate": 1.689448278062525e-06, "log_odds_chosen": 1.5543051958084106, "log_odds_ratio": -0.19568473100662231, "logits/chosen": -0.661027193069458, "logits/rejected": -1.4874473810195923, "logps/chosen": -1.4061609506607056, "logps/rejected": -2.74558687210083, "loss": 1.4482, "nll_loss": 1.4286404848098755, "rewards/accuracies": 1.0, "rewards/chosen": -0.14061610400676727, "rewards/margins": 0.1339426189661026, "rewards/rejected": -0.2745587229728699, "step": 703 }, { "epoch": 1.9416637901277183, "grad_norm": 0.20062628388404846, "learning_rate": 1.681871107817208e-06, "log_odds_chosen": 1.5656498670578003, "log_odds_ratio": -0.19489561021327972, "logits/chosen": -0.6353734135627747, "logits/rejected": -1.2792272567749023, "logps/chosen": -1.396148920059204, "logps/rejected": -2.741448163986206, "loss": 1.4399, "nll_loss": 1.42039954662323, "rewards/accuracies": 1.0, "rewards/chosen": -0.13961489498615265, "rewards/margins": 0.13452990353107452, "rewards/rejected": -0.2741447985172272, "step": 704 }, { "epoch": 1.9444252675181222, "grad_norm": 0.22097182273864746, "learning_rate": 1.6743023451120831e-06, "log_odds_chosen": 1.5080511569976807, "log_odds_ratio": -0.2081257402896881, "logits/chosen": -0.5294905304908752, "logits/rejected": -1.5317174196243286, "logps/chosen": -1.526644229888916, "logps/rejected": -2.851184129714966, "loss": 1.5487, "nll_loss": 1.5279297828674316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1526644229888916, "rewards/margins": 0.13245397806167603, "rewards/rejected": -0.2851184010505676, "step": 705 }, { "epoch": 1.947186744908526, "grad_norm": 0.20108787715435028, "learning_rate": 1.6667420677278962e-06, "log_odds_chosen": 1.6470845937728882, "log_odds_ratio": -0.19162878394126892, "logits/chosen": -0.5555588006973267, "logits/rejected": -1.5358052253723145, "logps/chosen": -1.4384005069732666, "logps/rejected": -2.876382827758789, "loss": 1.4594, "nll_loss": 1.4402786493301392, "rewards/accuracies": 1.0, "rewards/chosen": -0.14384004473686218, "rewards/margins": 0.14379821717739105, "rewards/rejected": -0.28763827681541443, "step": 706 }, { "epoch": 1.94994822229893, "grad_norm": 0.22594718635082245, "learning_rate": 1.659190353358196e-06, "log_odds_chosen": 1.831480860710144, "log_odds_ratio": -0.15760438144207, "logits/chosen": -0.574060320854187, "logits/rejected": -1.3566135168075562, "logps/chosen": -1.3334236145019531, "logps/rejected": -2.913168430328369, "loss": 1.3612, "nll_loss": 1.3454718589782715, "rewards/accuracies": 1.0, "rewards/chosen": -0.13334235548973083, "rewards/margins": 0.157974511384964, "rewards/rejected": -0.2913168668746948, "step": 707 }, { "epoch": 1.9527096996893338, "grad_norm": 0.21431465446949005, "learning_rate": 1.6516472796085314e-06, "log_odds_chosen": 1.8894720077514648, "log_odds_ratio": -0.14807267487049103, "logits/chosen": -0.6859555244445801, "logits/rejected": -1.5254415273666382, "logps/chosen": -1.3702484369277954, "logps/rejected": -3.016295909881592, "loss": 1.3931, "nll_loss": 1.3783080577850342, "rewards/accuracies": 1.0, "rewards/chosen": -0.13702484965324402, "rewards/margins": 0.16460473835468292, "rewards/rejected": -0.30162957310676575, "step": 708 }, { "epoch": 1.9554711770797377, "grad_norm": 0.28705501556396484, "learning_rate": 1.644112923995656e-06, "log_odds_chosen": 1.7715154886245728, "log_odds_ratio": -0.17979811131954193, "logits/chosen": -0.6508351564407349, "logits/rejected": -1.3315798044204712, "logps/chosen": -1.4158389568328857, "logps/rejected": -2.9660990238189697, "loss": 1.4362, "nll_loss": 1.418235182762146, "rewards/accuracies": 1.0, "rewards/chosen": -0.1415839046239853, "rewards/margins": 0.15502600371837616, "rewards/rejected": -0.29660993814468384, "step": 709 }, { "epoch": 1.9582326544701414, "grad_norm": 0.20421892404556274, "learning_rate": 1.6365873639467314e-06, "log_odds_chosen": 1.4663060903549194, "log_odds_ratio": -0.22599183022975922, "logits/chosen": -0.5488445162773132, "logits/rejected": -1.296336054801941, "logps/chosen": -1.3076554536819458, "logps/rejected": -2.5398788452148438, "loss": 1.3575, "nll_loss": 1.3349173069000244, "rewards/accuracies": 1.0, "rewards/chosen": -0.13076554238796234, "rewards/margins": 0.12322235107421875, "rewards/rejected": -0.2539878785610199, "step": 710 }, { "epoch": 1.9609941318605455, "grad_norm": 0.20583806931972504, "learning_rate": 1.6290706767985299e-06, "log_odds_chosen": 1.5524039268493652, "log_odds_ratio": -0.20679479837417603, "logits/chosen": -0.5826380848884583, "logits/rejected": -1.4210282564163208, "logps/chosen": -1.3821145296096802, "logps/rejected": -2.711606979370117, "loss": 1.4088, "nll_loss": 1.388145923614502, "rewards/accuracies": 1.0, "rewards/chosen": -0.1382114589214325, "rewards/margins": 0.13294921815395355, "rewards/rejected": -0.27116069197654724, "step": 711 }, { "epoch": 1.9637556092509492, "grad_norm": 0.27205660939216614, "learning_rate": 1.6215629397966432e-06, "log_odds_chosen": 1.65108060836792, "log_odds_ratio": -0.18264544010162354, "logits/chosen": -0.6203844547271729, "logits/rejected": -1.4480795860290527, "logps/chosen": -1.3896100521087646, "logps/rejected": -2.818542718887329, "loss": 1.4164, "nll_loss": 1.3981621265411377, "rewards/accuracies": 1.0, "rewards/chosen": -0.13896100223064423, "rewards/margins": 0.1428932547569275, "rewards/rejected": -0.2818542718887329, "step": 712 }, { "epoch": 1.9665170866413533, "grad_norm": 0.21617811918258667, "learning_rate": 1.614064230094684e-06, "log_odds_chosen": 1.5959744453430176, "log_odds_ratio": -0.1887877881526947, "logits/chosen": -0.6716099381446838, "logits/rejected": -1.382897138595581, "logps/chosen": -1.4408637285232544, "logps/rejected": -2.8292531967163086, "loss": 1.4674, "nll_loss": 1.4485435485839844, "rewards/accuracies": 1.0, "rewards/chosen": -0.14408639073371887, "rewards/margins": 0.13883893191814423, "rewards/rejected": -0.2829253375530243, "step": 713 }, { "epoch": 1.969278564031757, "grad_norm": 0.21602113544940948, "learning_rate": 1.6065746247534986e-06, "log_odds_chosen": 1.7377601861953735, "log_odds_ratio": -0.17576658725738525, "logits/chosen": -0.5683955550193787, "logits/rejected": -1.394814372062683, "logps/chosen": -1.3911443948745728, "logps/rejected": -2.89923095703125, "loss": 1.4372, "nll_loss": 1.4196243286132812, "rewards/accuracies": 1.0, "rewards/chosen": -0.13911445438861847, "rewards/margins": 0.1508086621761322, "rewards/rejected": -0.28992313146591187, "step": 714 }, { "epoch": 1.9720400414221608, "grad_norm": 0.2555505037307739, "learning_rate": 1.599094200740367e-06, "log_odds_chosen": 1.455166220664978, "log_odds_ratio": -0.2124192863702774, "logits/chosen": -0.6386806964874268, "logits/rejected": -1.3638241291046143, "logps/chosen": -1.483328938484192, "logps/rejected": -2.7438817024230957, "loss": 1.51, "nll_loss": 1.488732933998108, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483328938484192, "rewards/margins": 0.1260552704334259, "rewards/rejected": -0.2743881940841675, "step": 715 }, { "epoch": 1.9748015188125647, "grad_norm": 0.2530527412891388, "learning_rate": 1.5916230349282215e-06, "log_odds_chosen": 1.661516547203064, "log_odds_ratio": -0.19038884341716766, "logits/chosen": -0.7034871578216553, "logits/rejected": -1.2856659889221191, "logps/chosen": -1.343193769454956, "logps/rejected": -2.766446828842163, "loss": 1.3847, "nll_loss": 1.3656784296035767, "rewards/accuracies": 1.0, "rewards/chosen": -0.13431937992572784, "rewards/margins": 0.142325296998024, "rewards/rejected": -0.27664467692375183, "step": 716 }, { "epoch": 1.9775629962029686, "grad_norm": 0.21432706713676453, "learning_rate": 1.58416120409485e-06, "log_odds_chosen": 1.6423002481460571, "log_odds_ratio": -0.1803400218486786, "logits/chosen": -0.5831408500671387, "logits/rejected": -1.507871389389038, "logps/chosen": -1.463822603225708, "logps/rejected": -2.8999862670898438, "loss": 1.4658, "nll_loss": 1.447767734527588, "rewards/accuracies": 1.0, "rewards/chosen": -0.14638225734233856, "rewards/margins": 0.14361636340618134, "rewards/rejected": -0.2899986505508423, "step": 717 }, { "epoch": 1.9803244735933725, "grad_norm": 0.20858345925807953, "learning_rate": 1.5767087849221097e-06, "log_odds_chosen": 1.5330266952514648, "log_odds_ratio": -0.20617344975471497, "logits/chosen": -0.6192912459373474, "logits/rejected": -1.3662102222442627, "logps/chosen": -1.3841629028320312, "logps/rejected": -2.702185869216919, "loss": 1.4236, "nll_loss": 1.4029996395111084, "rewards/accuracies": 1.0, "rewards/chosen": -0.13841629028320312, "rewards/margins": 0.1318022906780243, "rewards/rejected": -0.2702185809612274, "step": 718 }, { "epoch": 1.9830859509837762, "grad_norm": 0.2193707972764969, "learning_rate": 1.5692658539951371e-06, "log_odds_chosen": 1.693321704864502, "log_odds_ratio": -0.17277748882770538, "logits/chosen": -0.6784827709197998, "logits/rejected": -1.5714738368988037, "logps/chosen": -1.403821587562561, "logps/rejected": -2.8668127059936523, "loss": 1.4393, "nll_loss": 1.4220073223114014, "rewards/accuracies": 1.0, "rewards/chosen": -0.14038215577602386, "rewards/margins": 0.1462991088628769, "rewards/rejected": -0.28668126463890076, "step": 719 }, { "epoch": 1.9858474283741803, "grad_norm": 0.2205357700586319, "learning_rate": 1.561832487801565e-06, "log_odds_chosen": 1.657831072807312, "log_odds_ratio": -0.19422584772109985, "logits/chosen": -0.6187453269958496, "logits/rejected": -1.5218826532363892, "logps/chosen": -1.416845440864563, "logps/rejected": -2.8605427742004395, "loss": 1.4362, "nll_loss": 1.4167304039001465, "rewards/accuracies": 1.0, "rewards/chosen": -0.14168453216552734, "rewards/margins": 0.14436973631381989, "rewards/rejected": -0.2860542833805084, "step": 720 }, { "epoch": 1.988608905764584, "grad_norm": 0.21532008051872253, "learning_rate": 1.5544087627307308e-06, "log_odds_chosen": 1.8189568519592285, "log_odds_ratio": -0.15981584787368774, "logits/chosen": -0.676050066947937, "logits/rejected": -1.4322230815887451, "logps/chosen": -1.4061801433563232, "logps/rejected": -2.997437000274658, "loss": 1.427, "nll_loss": 1.4110301733016968, "rewards/accuracies": 1.0, "rewards/chosen": -0.14061801135540009, "rewards/margins": 0.15912571549415588, "rewards/rejected": -0.2997437119483948, "step": 721 }, { "epoch": 1.991370383154988, "grad_norm": 0.21574527025222778, "learning_rate": 1.546994755072896e-06, "log_odds_chosen": 1.7873562574386597, "log_odds_ratio": -0.16456766426563263, "logits/chosen": -0.5763784646987915, "logits/rejected": -1.486627459526062, "logps/chosen": -1.3662954568862915, "logps/rejected": -2.9178426265716553, "loss": 1.4079, "nll_loss": 1.391477346420288, "rewards/accuracies": 1.0, "rewards/chosen": -0.13662953674793243, "rewards/margins": 0.1551547348499298, "rewards/rejected": -0.29178428649902344, "step": 722 }, { "epoch": 1.9941318605453917, "grad_norm": 0.24729660153388977, "learning_rate": 1.539590541018461e-06, "log_odds_chosen": 1.766893982887268, "log_odds_ratio": -0.17045418918132782, "logits/chosen": -0.6789031028747559, "logits/rejected": -1.571667194366455, "logps/chosen": -1.4501278400421143, "logps/rejected": -3.004589080810547, "loss": 1.4755, "nll_loss": 1.4584672451019287, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450127810239792, "rewards/margins": 0.1554461419582367, "rewards/rejected": -0.3004589080810547, "step": 723 }, { "epoch": 1.9968933379357956, "grad_norm": 0.21343068778514862, "learning_rate": 1.5321961966571801e-06, "log_odds_chosen": 1.5317389965057373, "log_odds_ratio": -0.20564541220664978, "logits/chosen": -0.7062411308288574, "logits/rejected": -1.3788303136825562, "logps/chosen": -1.4267816543579102, "logps/rejected": -2.7562084197998047, "loss": 1.4517, "nll_loss": 1.4311397075653076, "rewards/accuracies": 1.0, "rewards/chosen": -0.1426781713962555, "rewards/margins": 0.13294267654418945, "rewards/rejected": -0.27562084794044495, "step": 724 }, { "epoch": 1.9996548153261995, "grad_norm": 0.21614350378513336, "learning_rate": 1.524811797977383e-06, "log_odds_chosen": 1.4235830307006836, "log_odds_ratio": -0.2235802412033081, "logits/chosen": -0.6447729468345642, "logits/rejected": -1.2856361865997314, "logps/chosen": -1.464742660522461, "logps/rejected": -2.6955158710479736, "loss": 1.4832, "nll_loss": 1.4608020782470703, "rewards/accuracies": 1.0, "rewards/chosen": -0.14647427201271057, "rewards/margins": 0.12307729572057724, "rewards/rejected": -0.2695516049861908, "step": 725 }, { "epoch": 2.0, "grad_norm": 0.48198679089546204, "learning_rate": 1.5174374208651913e-06, "log_odds_chosen": 1.4216891527175903, "log_odds_ratio": -0.21616411209106445, "logits/chosen": -0.5773603320121765, "logits/rejected": -1.5106768608093262, "logps/chosen": -1.404468059539795, "logps/rejected": -2.6200459003448486, "loss": 1.4125, "nll_loss": 1.3908467292785645, "rewards/accuracies": 1.0, "rewards/chosen": -0.14044681191444397, "rewards/margins": 0.12155777215957642, "rewards/rejected": -0.2620045840740204, "step": 726 }, { "epoch": 2.0027614773904037, "grad_norm": 0.200496107339859, "learning_rate": 1.5100731411037395e-06, "log_odds_chosen": 1.8934729099273682, "log_odds_ratio": -0.15082460641860962, "logits/chosen": -0.6470483541488647, "logits/rejected": -1.5429167747497559, "logps/chosen": -1.4309892654418945, "logps/rejected": -3.100017547607422, "loss": 1.4567, "nll_loss": 1.4416483640670776, "rewards/accuracies": 1.0, "rewards/chosen": -0.14309892058372498, "rewards/margins": 0.1669028103351593, "rewards/rejected": -0.31000176072120667, "step": 727 }, { "epoch": 2.0055229547808078, "grad_norm": 0.22209472954273224, "learning_rate": 1.502719034372396e-06, "log_odds_chosen": 1.340651035308838, "log_odds_ratio": -0.24326053261756897, "logits/chosen": -0.7077615857124329, "logits/rejected": -1.2796984910964966, "logps/chosen": -1.3869967460632324, "logps/rejected": -2.5280184745788574, "loss": 1.4105, "nll_loss": 1.386133074760437, "rewards/accuracies": 1.0, "rewards/chosen": -0.1386996954679489, "rewards/margins": 0.11410216987133026, "rewards/rejected": -0.2528018653392792, "step": 728 }, { "epoch": 2.0082844321712114, "grad_norm": 0.2023441046476364, "learning_rate": 1.4953751762459867e-06, "log_odds_chosen": 1.7777669429779053, "log_odds_ratio": -0.18098145723342896, "logits/chosen": -0.6093227863311768, "logits/rejected": -1.4456218481063843, "logps/chosen": -1.3722914457321167, "logps/rejected": -2.9199676513671875, "loss": 1.4084, "nll_loss": 1.390295386314392, "rewards/accuracies": 1.0, "rewards/chosen": -0.13722914457321167, "rewards/margins": 0.15476764738559723, "rewards/rejected": -0.2919967770576477, "step": 729 }, { "epoch": 2.0110459095616156, "grad_norm": 0.23719917237758636, "learning_rate": 1.4880416421940155e-06, "log_odds_chosen": 1.6621570587158203, "log_odds_ratio": -0.19358786940574646, "logits/chosen": -0.6772423386573792, "logits/rejected": -1.3135812282562256, "logps/chosen": -1.4234864711761475, "logps/rejected": -2.8738884925842285, "loss": 1.445, "nll_loss": 1.4256806373596191, "rewards/accuracies": 1.0, "rewards/chosen": -0.14234864711761475, "rewards/margins": 0.14504022896289825, "rewards/rejected": -0.2873888611793518, "step": 730 }, { "epoch": 2.013807386952019, "grad_norm": 0.2236790806055069, "learning_rate": 1.4807185075798919e-06, "log_odds_chosen": 1.8309162855148315, "log_odds_ratio": -0.15777002274990082, "logits/chosen": -0.756325900554657, "logits/rejected": -1.7289044857025146, "logps/chosen": -1.47125244140625, "logps/rejected": -3.086061716079712, "loss": 1.4895, "nll_loss": 1.4737268686294556, "rewards/accuracies": 1.0, "rewards/chosen": -0.147125244140625, "rewards/margins": 0.16148093342781067, "rewards/rejected": -0.30860617756843567, "step": 731 }, { "epoch": 2.0165688643424233, "grad_norm": 0.2069522887468338, "learning_rate": 1.4734058476601553e-06, "log_odds_chosen": 1.715282678604126, "log_odds_ratio": -0.1702621579170227, "logits/chosen": -0.6738074421882629, "logits/rejected": -1.5364606380462646, "logps/chosen": -1.4186843633651733, "logps/rejected": -2.909392833709717, "loss": 1.4426, "nll_loss": 1.425575852394104, "rewards/accuracies": 1.0, "rewards/chosen": -0.141868457198143, "rewards/margins": 0.1490708440542221, "rewards/rejected": -0.2909393012523651, "step": 732 }, { "epoch": 2.019330341732827, "grad_norm": 0.21304333209991455, "learning_rate": 1.466103737583699e-06, "log_odds_chosen": 1.646624207496643, "log_odds_ratio": -0.18460693955421448, "logits/chosen": -0.6634320616722107, "logits/rejected": -1.5399712324142456, "logps/chosen": -1.4346245527267456, "logps/rejected": -2.868168830871582, "loss": 1.4617, "nll_loss": 1.4432533979415894, "rewards/accuracies": 1.0, "rewards/chosen": -0.14346244931221008, "rewards/margins": 0.1433544158935547, "rewards/rejected": -0.28681689500808716, "step": 733 }, { "epoch": 2.022091819123231, "grad_norm": 0.20874540507793427, "learning_rate": 1.4588122523910032e-06, "log_odds_chosen": 1.3142719268798828, "log_odds_ratio": -0.2400466352701187, "logits/chosen": -0.6790512204170227, "logits/rejected": -1.3589048385620117, "logps/chosen": -1.4558688402175903, "logps/rejected": -2.5818986892700195, "loss": 1.4708, "nll_loss": 1.4467977285385132, "rewards/accuracies": 1.0, "rewards/chosen": -0.14558687806129456, "rewards/margins": 0.11260296404361725, "rewards/rejected": -0.258189857006073, "step": 734 }, { "epoch": 2.0248532965136348, "grad_norm": 0.20284749567508698, "learning_rate": 1.4515314670133582e-06, "log_odds_chosen": 1.8735268115997314, "log_odds_ratio": -0.1615622639656067, "logits/chosen": -0.6692161560058594, "logits/rejected": -1.3985953330993652, "logps/chosen": -1.4208303689956665, "logps/rejected": -3.073046922683716, "loss": 1.4351, "nll_loss": 1.418927788734436, "rewards/accuracies": 1.0, "rewards/chosen": -0.1420830488204956, "rewards/margins": 0.16522164642810822, "rewards/rejected": -0.3073046803474426, "step": 735 }, { "epoch": 2.027614773904039, "grad_norm": 0.24936173856258392, "learning_rate": 1.4442614562721011e-06, "log_odds_chosen": 1.671183705329895, "log_odds_ratio": -0.18805576860904694, "logits/chosen": -0.568038821220398, "logits/rejected": -1.3862274885177612, "logps/chosen": -1.3958425521850586, "logps/rejected": -2.837895631790161, "loss": 1.4195, "nll_loss": 1.4006680250167847, "rewards/accuracies": 1.0, "rewards/chosen": -0.1395842432975769, "rewards/margins": 0.14420530200004578, "rewards/rejected": -0.28378957509994507, "step": 736 }, { "epoch": 2.0303762512944425, "grad_norm": 0.2186705619096756, "learning_rate": 1.4370022948778383e-06, "log_odds_chosen": 1.7043132781982422, "log_odds_ratio": -0.17080500721931458, "logits/chosen": -0.5717258453369141, "logits/rejected": -1.6019500494003296, "logps/chosen": -1.4789414405822754, "logps/rejected": -2.9726955890655518, "loss": 1.4969, "nll_loss": 1.4798444509506226, "rewards/accuracies": 1.0, "rewards/chosen": -0.14789414405822754, "rewards/margins": 0.14937540888786316, "rewards/rejected": -0.2972695529460907, "step": 737 }, { "epoch": 2.033137728684846, "grad_norm": 0.21948732435703278, "learning_rate": 1.429754057429687e-06, "log_odds_chosen": 1.6671240329742432, "log_odds_ratio": -0.1748780906200409, "logits/chosen": -0.7602828741073608, "logits/rejected": -1.3787339925765991, "logps/chosen": -1.3885620832443237, "logps/rejected": -2.8282227516174316, "loss": 1.4242, "nll_loss": 1.4067615270614624, "rewards/accuracies": 1.0, "rewards/chosen": -0.13885623216629028, "rewards/margins": 0.14396606385707855, "rewards/rejected": -0.28282228112220764, "step": 738 }, { "epoch": 2.0358992060752503, "grad_norm": 0.25562384724617004, "learning_rate": 1.422516818414499e-06, "log_odds_chosen": 1.4859294891357422, "log_odds_ratio": -0.20781388878822327, "logits/chosen": -0.6864043474197388, "logits/rejected": -1.3941079378128052, "logps/chosen": -1.373292088508606, "logps/rejected": -2.6389758586883545, "loss": 1.4072, "nll_loss": 1.3864004611968994, "rewards/accuracies": 1.0, "rewards/chosen": -0.13732922077178955, "rewards/margins": 0.12656837701797485, "rewards/rejected": -0.2638975977897644, "step": 739 }, { "epoch": 2.038660683465654, "grad_norm": 0.23449690639972687, "learning_rate": 1.415290652206105e-06, "log_odds_chosen": 1.6655925512313843, "log_odds_ratio": -0.17781947553157806, "logits/chosen": -0.7125600576400757, "logits/rejected": -1.3963764905929565, "logps/chosen": -1.473286747932434, "logps/rejected": -2.933544635772705, "loss": 1.4754, "nll_loss": 1.4576480388641357, "rewards/accuracies": 1.0, "rewards/chosen": -0.1473286896944046, "rewards/margins": 0.14602577686309814, "rewards/rejected": -0.29335445165634155, "step": 740 }, { "epoch": 2.041422160856058, "grad_norm": 0.19780917465686798, "learning_rate": 1.4080756330645424e-06, "log_odds_chosen": 1.666169285774231, "log_odds_ratio": -0.18346275389194489, "logits/chosen": -0.6826483607292175, "logits/rejected": -1.41259765625, "logps/chosen": -1.3261446952819824, "logps/rejected": -2.7477099895477295, "loss": 1.3533, "nll_loss": 1.33500337600708, "rewards/accuracies": 1.0, "rewards/chosen": -0.13261446356773376, "rewards/margins": 0.14215652644634247, "rewards/rejected": -0.27477097511291504, "step": 741 }, { "epoch": 2.0441836382464618, "grad_norm": 0.22608482837677002, "learning_rate": 1.400871835135295e-06, "log_odds_chosen": 1.8851103782653809, "log_odds_ratio": -0.14945101737976074, "logits/chosen": -0.7435231804847717, "logits/rejected": -1.4258017539978027, "logps/chosen": -1.395211935043335, "logps/rejected": -3.0477499961853027, "loss": 1.4147, "nll_loss": 1.3997923135757446, "rewards/accuracies": 1.0, "rewards/chosen": -0.13952121138572693, "rewards/margins": 0.16525380313396454, "rewards/rejected": -0.3047749698162079, "step": 742 }, { "epoch": 2.046945115636866, "grad_norm": 0.2287517786026001, "learning_rate": 1.3936793324485343e-06, "log_odds_chosen": 1.6958822011947632, "log_odds_ratio": -0.17418357729911804, "logits/chosen": -0.6303619146347046, "logits/rejected": -1.4302959442138672, "logps/chosen": -1.375686526298523, "logps/rejected": -2.8380520343780518, "loss": 1.4083, "nll_loss": 1.3909112215042114, "rewards/accuracies": 1.0, "rewards/chosen": -0.1375686377286911, "rewards/margins": 0.1462365835905075, "rewards/rejected": -0.2838052213191986, "step": 743 }, { "epoch": 2.0497065930272695, "grad_norm": 0.23087257146835327, "learning_rate": 1.386498198918352e-06, "log_odds_chosen": 1.705764889717102, "log_odds_ratio": -0.17870433628559113, "logits/chosen": -0.6501262187957764, "logits/rejected": -1.4289541244506836, "logps/chosen": -1.4784560203552246, "logps/rejected": -2.9818336963653564, "loss": 1.5013, "nll_loss": 1.4834237098693848, "rewards/accuracies": 1.0, "rewards/chosen": -0.14784559607505798, "rewards/margins": 0.15033775568008423, "rewards/rejected": -0.2981833517551422, "step": 744 }, { "epoch": 2.0524680704176737, "grad_norm": 0.24153321981430054, "learning_rate": 1.3793285083420077e-06, "log_odds_chosen": 1.4899718761444092, "log_odds_ratio": -0.20986853539943695, "logits/chosen": -0.5868789553642273, "logits/rejected": -1.2829419374465942, "logps/chosen": -1.5311976671218872, "logps/rejected": -2.839613199234009, "loss": 1.5426, "nll_loss": 1.5216182470321655, "rewards/accuracies": 1.0, "rewards/chosen": -0.153119757771492, "rewards/margins": 0.13084158301353455, "rewards/rejected": -0.28396135568618774, "step": 745 }, { "epoch": 2.0552295478080773, "grad_norm": 0.210659921169281, "learning_rate": 1.3721703343991634e-06, "log_odds_chosen": 1.8701311349868774, "log_odds_ratio": -0.15025395154953003, "logits/chosen": -0.7139883041381836, "logits/rejected": -1.5870580673217773, "logps/chosen": -1.406786561012268, "logps/rejected": -3.0462093353271484, "loss": 1.4207, "nll_loss": 1.4056353569030762, "rewards/accuracies": 1.0, "rewards/chosen": -0.14067867398262024, "rewards/margins": 0.16394227743148804, "rewards/rejected": -0.3046209216117859, "step": 746 }, { "epoch": 2.057991025198481, "grad_norm": 0.23197799921035767, "learning_rate": 1.3650237506511333e-06, "log_odds_chosen": 1.718932867050171, "log_odds_ratio": -0.18369780480861664, "logits/chosen": -0.6782840490341187, "logits/rejected": -1.370782732963562, "logps/chosen": -1.3876206874847412, "logps/rejected": -2.881727457046509, "loss": 1.43, "nll_loss": 1.411611795425415, "rewards/accuracies": 1.0, "rewards/chosen": -0.13876208662986755, "rewards/margins": 0.1494106650352478, "rewards/rejected": -0.28817272186279297, "step": 747 }, { "epoch": 2.060752502588885, "grad_norm": 0.2240779846906662, "learning_rate": 1.3578888305401208e-06, "log_odds_chosen": 1.6744976043701172, "log_odds_ratio": -0.19240857660770416, "logits/chosen": -0.643547773361206, "logits/rejected": -1.401029109954834, "logps/chosen": -1.3879692554473877, "logps/rejected": -2.843924045562744, "loss": 1.4186, "nll_loss": 1.3994083404541016, "rewards/accuracies": 1.0, "rewards/chosen": -0.13879692554473877, "rewards/margins": 0.1455954909324646, "rewards/rejected": -0.28439241647720337, "step": 748 }, { "epoch": 2.0635139799792888, "grad_norm": 0.21591530740261078, "learning_rate": 1.3507656473884718e-06, "log_odds_chosen": 1.5864033699035645, "log_odds_ratio": -0.19526658952236176, "logits/chosen": -0.5981451272964478, "logits/rejected": -1.3718003034591675, "logps/chosen": -1.3784244060516357, "logps/rejected": -2.7354776859283447, "loss": 1.3961, "nll_loss": 1.3765381574630737, "rewards/accuracies": 1.0, "rewards/chosen": -0.13784244656562805, "rewards/margins": 0.13570532202720642, "rewards/rejected": -0.2735477685928345, "step": 749 }, { "epoch": 2.066275457369693, "grad_norm": 0.22318555414676666, "learning_rate": 1.3436542743979125e-06, "log_odds_chosen": 1.466707468032837, "log_odds_ratio": -0.22797253727912903, "logits/chosen": -0.6937511563301086, "logits/rejected": -1.2689547538757324, "logps/chosen": -1.3751643896102905, "logps/rejected": -2.6363766193389893, "loss": 1.4241, "nll_loss": 1.4012537002563477, "rewards/accuracies": 1.0, "rewards/chosen": -0.13751643896102905, "rewards/margins": 0.12612125277519226, "rewards/rejected": -0.2636376917362213, "step": 750 }, { "epoch": 2.0690369347600965, "grad_norm": 0.21035663783550262, "learning_rate": 1.3365547846488037e-06, "log_odds_chosen": 1.8789607286453247, "log_odds_ratio": -0.14974045753479004, "logits/chosen": -0.6577342748641968, "logits/rejected": -1.4951293468475342, "logps/chosen": -1.420435905456543, "logps/rejected": -3.070344924926758, "loss": 1.442, "nll_loss": 1.4270422458648682, "rewards/accuracies": 1.0, "rewards/chosen": -0.1420435905456543, "rewards/margins": 0.1649908870458603, "rewards/rejected": -0.3070344924926758, "step": 751 }, { "epoch": 2.0717984121505006, "grad_norm": 0.22803649306297302, "learning_rate": 1.3294672510993862e-06, "log_odds_chosen": 1.7924696207046509, "log_odds_ratio": -0.16550493240356445, "logits/chosen": -0.7013769149780273, "logits/rejected": -1.4228062629699707, "logps/chosen": -1.4931784868240356, "logps/rejected": -3.0807228088378906, "loss": 1.5088, "nll_loss": 1.492207646369934, "rewards/accuracies": 1.0, "rewards/chosen": -0.14931784570217133, "rewards/margins": 0.15875445306301117, "rewards/rejected": -0.3080723285675049, "step": 752 }, { "epoch": 2.0745598895409043, "grad_norm": 0.23071783781051636, "learning_rate": 1.3223917465850344e-06, "log_odds_chosen": 1.6650446653366089, "log_odds_ratio": -0.19160096347332, "logits/chosen": -0.601356565952301, "logits/rejected": -1.3828630447387695, "logps/chosen": -1.4673768281936646, "logps/rejected": -2.931558847427368, "loss": 1.4937, "nll_loss": 1.4745417833328247, "rewards/accuracies": 1.0, "rewards/chosen": -0.1467377096414566, "rewards/margins": 0.14641818404197693, "rewards/rejected": -0.29315587878227234, "step": 753 }, { "epoch": 2.0773213669313084, "grad_norm": 0.25171250104904175, "learning_rate": 1.3153283438175036e-06, "log_odds_chosen": 1.9603334665298462, "log_odds_ratio": -0.15906432271003723, "logits/chosen": -0.6608366370201111, "logits/rejected": -1.4777100086212158, "logps/chosen": -1.5009139776229858, "logps/rejected": -3.2551229000091553, "loss": 1.5009, "nll_loss": 1.4849778413772583, "rewards/accuracies": 1.0, "rewards/chosen": -0.15009139478206635, "rewards/margins": 0.17542089521884918, "rewards/rejected": -0.3255122900009155, "step": 754 }, { "epoch": 2.080082844321712, "grad_norm": 0.23521378636360168, "learning_rate": 1.3082771153841872e-06, "log_odds_chosen": 1.688461184501648, "log_odds_ratio": -0.17777958512306213, "logits/chosen": -0.7100945115089417, "logits/rejected": -1.3997730016708374, "logps/chosen": -1.4548128843307495, "logps/rejected": -2.9329257011413574, "loss": 1.4959, "nll_loss": 1.4781546592712402, "rewards/accuracies": 1.0, "rewards/chosen": -0.14548128843307495, "rewards/margins": 0.14781127870082855, "rewards/rejected": -0.2932925820350647, "step": 755 }, { "epoch": 2.082844321712116, "grad_norm": 0.2298353761434555, "learning_rate": 1.3012381337473656e-06, "log_odds_chosen": 2.0814948081970215, "log_odds_ratio": -0.13590532541275024, "logits/chosen": -0.5940237641334534, "logits/rejected": -1.514527440071106, "logps/chosen": -1.2526910305023193, "logps/rejected": -3.027501106262207, "loss": 1.2946, "nll_loss": 1.2809951305389404, "rewards/accuracies": 1.0, "rewards/chosen": -0.1252691149711609, "rewards/margins": 0.1774810403585434, "rewards/rejected": -0.3027501404285431, "step": 756 }, { "epoch": 2.08560579910252, "grad_norm": 0.23549045622348785, "learning_rate": 1.294211471243466e-06, "log_odds_chosen": 1.7326384782791138, "log_odds_ratio": -0.16713333129882812, "logits/chosen": -0.7175915241241455, "logits/rejected": -1.645504117012024, "logps/chosen": -1.3638975620269775, "logps/rejected": -2.85831880569458, "loss": 1.3821, "nll_loss": 1.3654155731201172, "rewards/accuracies": 1.0, "rewards/chosen": -0.13638976216316223, "rewards/margins": 0.14944210648536682, "rewards/rejected": -0.28583186864852905, "step": 757 }, { "epoch": 2.0883672764929235, "grad_norm": 0.22167526185512543, "learning_rate": 1.2871972000823197e-06, "log_odds_chosen": 1.7270795106887817, "log_odds_ratio": -0.1653687059879303, "logits/chosen": -0.6410531997680664, "logits/rejected": -1.518877387046814, "logps/chosen": -1.366107702255249, "logps/rejected": -2.857667922973633, "loss": 1.3955, "nll_loss": 1.3789598941802979, "rewards/accuracies": 1.0, "rewards/chosen": -0.13661079108715057, "rewards/margins": 0.14915600419044495, "rewards/rejected": -0.2857667803764343, "step": 758 }, { "epoch": 2.0911287538833276, "grad_norm": 0.2042517364025116, "learning_rate": 1.2801953923464139e-06, "log_odds_chosen": 1.6421754360198975, "log_odds_ratio": -0.21181078255176544, "logits/chosen": -0.6920580267906189, "logits/rejected": -1.2391947507858276, "logps/chosen": -1.365949034690857, "logps/rejected": -2.7887167930603027, "loss": 1.4111, "nll_loss": 1.3898826837539673, "rewards/accuracies": 1.0, "rewards/chosen": -0.13659492135047913, "rewards/margins": 0.14227676391601562, "rewards/rejected": -0.27887168526649475, "step": 759 }, { "epoch": 2.0938902312737313, "grad_norm": 0.23130136728286743, "learning_rate": 1.2732061199901563e-06, "log_odds_chosen": 1.554824948310852, "log_odds_ratio": -0.2032986879348755, "logits/chosen": -0.638692319393158, "logits/rejected": -1.1528555154800415, "logps/chosen": -1.4463180303573608, "logps/rejected": -2.794060230255127, "loss": 1.475, "nll_loss": 1.4546287059783936, "rewards/accuracies": 1.0, "rewards/chosen": -0.14463180303573608, "rewards/margins": 0.13477420806884766, "rewards/rejected": -0.27940601110458374, "step": 760 }, { "epoch": 2.0966517086641354, "grad_norm": 0.23756970465183258, "learning_rate": 1.266229454839133e-06, "log_odds_chosen": 1.5451769828796387, "log_odds_ratio": -0.2032744586467743, "logits/chosen": -0.725233256816864, "logits/rejected": -1.2763677835464478, "logps/chosen": -1.3985607624053955, "logps/rejected": -2.7310702800750732, "loss": 1.4177, "nll_loss": 1.3973684310913086, "rewards/accuracies": 1.0, "rewards/chosen": -0.13985608518123627, "rewards/margins": 0.13325095176696777, "rewards/rejected": -0.27310702204704285, "step": 761 }, { "epoch": 2.099413186054539, "grad_norm": 0.29439690709114075, "learning_rate": 1.2592654685893757e-06, "log_odds_chosen": 1.8006912469863892, "log_odds_ratio": -0.15753738582134247, "logits/chosen": -0.581764817237854, "logits/rejected": -1.4160394668579102, "logps/chosen": -1.3997222185134888, "logps/rejected": -2.968790054321289, "loss": 1.4238, "nll_loss": 1.4080818891525269, "rewards/accuracies": 1.0, "rewards/chosen": -0.13997222483158112, "rewards/margins": 0.15690679848194122, "rewards/rejected": -0.29687902331352234, "step": 762 }, { "epoch": 2.102174663444943, "grad_norm": 0.28845125436782837, "learning_rate": 1.252314232806615e-06, "log_odds_chosen": 1.7727992534637451, "log_odds_ratio": -0.1816820651292801, "logits/chosen": -0.7264664769172668, "logits/rejected": -1.5648151636123657, "logps/chosen": -1.4663630723953247, "logps/rejected": -3.028686046600342, "loss": 1.4677, "nll_loss": 1.4494929313659668, "rewards/accuracies": 1.0, "rewards/chosen": -0.14663633704185486, "rewards/margins": 0.1562322974205017, "rewards/rejected": -0.3028685748577118, "step": 763 }, { "epoch": 2.104936140835347, "grad_norm": 0.2214813530445099, "learning_rate": 1.2453758189255568e-06, "log_odds_chosen": 1.6121776103973389, "log_odds_ratio": -0.1847487837076187, "logits/chosen": -0.6465283632278442, "logits/rejected": -1.612844467163086, "logps/chosen": -1.381580114364624, "logps/rejected": -2.7677597999572754, "loss": 1.4082, "nll_loss": 1.389732837677002, "rewards/accuracies": 1.0, "rewards/chosen": -0.13815802335739136, "rewards/margins": 0.13861796259880066, "rewards/rejected": -0.276775985956192, "step": 764 }, { "epoch": 2.107697618225751, "grad_norm": 0.22107169032096863, "learning_rate": 1.2384502982491359e-06, "log_odds_chosen": 1.815138578414917, "log_odds_ratio": -0.1663791388273239, "logits/chosen": -0.6659849882125854, "logits/rejected": -1.6195026636123657, "logps/chosen": -1.508223295211792, "logps/rejected": -3.122704267501831, "loss": 1.5323, "nll_loss": 1.5157110691070557, "rewards/accuracies": 1.0, "rewards/chosen": -0.15082234144210815, "rewards/margins": 0.16144809126853943, "rewards/rejected": -0.3122704327106476, "step": 765 }, { "epoch": 2.1104590956161546, "grad_norm": 0.23909150063991547, "learning_rate": 1.231537741947795e-06, "log_odds_chosen": 1.5823622941970825, "log_odds_ratio": -0.19673101603984833, "logits/chosen": -0.6739899516105652, "logits/rejected": -1.3748270273208618, "logps/chosen": -1.4273467063903809, "logps/rejected": -2.796895742416382, "loss": 1.4555, "nll_loss": 1.4358404874801636, "rewards/accuracies": 1.0, "rewards/chosen": -0.14273467659950256, "rewards/margins": 0.13695493340492249, "rewards/rejected": -0.27968961000442505, "step": 766 }, { "epoch": 2.1132205730065583, "grad_norm": 0.21944522857666016, "learning_rate": 1.2246382210587432e-06, "log_odds_chosen": 1.7679436206817627, "log_odds_ratio": -0.16243533790111542, "logits/chosen": -0.6667824387550354, "logits/rejected": -1.563746452331543, "logps/chosen": -1.4124077558517456, "logps/rejected": -2.9556376934051514, "loss": 1.434, "nll_loss": 1.4177988767623901, "rewards/accuracies": 1.0, "rewards/chosen": -0.14124079048633575, "rewards/margins": 0.15432299673557281, "rewards/rejected": -0.2955637574195862, "step": 767 }, { "epoch": 2.1159820503969624, "grad_norm": 0.2131681889295578, "learning_rate": 1.217751806485235e-06, "log_odds_chosen": 1.739852786064148, "log_odds_ratio": -0.17105865478515625, "logits/chosen": -0.688417375087738, "logits/rejected": -1.5048264265060425, "logps/chosen": -1.3845089673995972, "logps/rejected": -2.8921008110046387, "loss": 1.4113, "nll_loss": 1.394192099571228, "rewards/accuracies": 1.0, "rewards/chosen": -0.13845090568065643, "rewards/margins": 0.15075917541980743, "rewards/rejected": -0.28921011090278625, "step": 768 }, { "epoch": 2.118743527787366, "grad_norm": 0.2258172482252121, "learning_rate": 1.2108785689958337e-06, "log_odds_chosen": 1.5787158012390137, "log_odds_ratio": -0.19934047758579254, "logits/chosen": -0.5557816028594971, "logits/rejected": -1.35032057762146, "logps/chosen": -1.4583086967468262, "logps/rejected": -2.8342854976654053, "loss": 1.4793, "nll_loss": 1.4593558311462402, "rewards/accuracies": 1.0, "rewards/chosen": -0.14583086967468262, "rewards/margins": 0.1375976800918579, "rewards/rejected": -0.2834285497665405, "step": 769 }, { "epoch": 2.12150500517777, "grad_norm": 0.21874947845935822, "learning_rate": 1.2040185792236874e-06, "log_odds_chosen": 1.7133225202560425, "log_odds_ratio": -0.1765982210636139, "logits/chosen": -0.6616573333740234, "logits/rejected": -1.369747519493103, "logps/chosen": -1.3503413200378418, "logps/rejected": -2.8248019218444824, "loss": 1.3826, "nll_loss": 1.364945650100708, "rewards/accuracies": 1.0, "rewards/chosen": -0.13503412902355194, "rewards/margins": 0.14744606614112854, "rewards/rejected": -0.2824801802635193, "step": 770 }, { "epoch": 2.124266482568174, "grad_norm": 0.22342216968536377, "learning_rate": 1.197171907665808e-06, "log_odds_chosen": 1.7739996910095215, "log_odds_ratio": -0.16516156494617462, "logits/chosen": -0.726353108882904, "logits/rejected": -1.3804839849472046, "logps/chosen": -1.395891785621643, "logps/rejected": -2.9393043518066406, "loss": 1.4172, "nll_loss": 1.4006787538528442, "rewards/accuracies": 1.0, "rewards/chosen": -0.13958917558193207, "rewards/margins": 0.15434125065803528, "rewards/rejected": -0.29393044114112854, "step": 771 }, { "epoch": 2.127027959958578, "grad_norm": 0.21872061491012573, "learning_rate": 1.1903386246823363e-06, "log_odds_chosen": 2.091187000274658, "log_odds_ratio": -0.1254856139421463, "logits/chosen": -0.6326149702072144, "logits/rejected": -1.5015616416931152, "logps/chosen": -1.4537407159805298, "logps/rejected": -3.3129756450653076, "loss": 1.47, "nll_loss": 1.4574370384216309, "rewards/accuracies": 1.0, "rewards/chosen": -0.1453741043806076, "rewards/margins": 0.1859234869480133, "rewards/rejected": -0.3312975764274597, "step": 772 }, { "epoch": 2.1297894373489816, "grad_norm": 0.2313195914030075, "learning_rate": 1.1835188004958298e-06, "log_odds_chosen": 1.8796064853668213, "log_odds_ratio": -0.1479826271533966, "logits/chosen": -0.7985919713973999, "logits/rejected": -1.536999225616455, "logps/chosen": -1.376255989074707, "logps/rejected": -3.0128173828125, "loss": 1.3963, "nll_loss": 1.381474256515503, "rewards/accuracies": 1.0, "rewards/chosen": -0.137625589966774, "rewards/margins": 0.16365613043308258, "rewards/rejected": -0.30128172039985657, "step": 773 }, { "epoch": 2.1325509147393857, "grad_norm": 0.25985774397850037, "learning_rate": 1.1767125051905314e-06, "log_odds_chosen": 1.7584837675094604, "log_odds_ratio": -0.17202264070510864, "logits/chosen": -0.7673121690750122, "logits/rejected": -1.5591264963150024, "logps/chosen": -1.3487608432769775, "logps/rejected": -2.865849733352661, "loss": 1.3915, "nll_loss": 1.3743022680282593, "rewards/accuracies": 1.0, "rewards/chosen": -0.13487608730793, "rewards/margins": 0.15170887112617493, "rewards/rejected": -0.2865849733352661, "step": 774 }, { "epoch": 2.1353123921297894, "grad_norm": 0.20605385303497314, "learning_rate": 1.169919808711659e-06, "log_odds_chosen": 1.748727560043335, "log_odds_ratio": -0.16438297927379608, "logits/chosen": -0.6482577323913574, "logits/rejected": -1.4626585245132446, "logps/chosen": -1.4660745859146118, "logps/rejected": -3.0015721321105957, "loss": 1.4861, "nll_loss": 1.4696786403656006, "rewards/accuracies": 1.0, "rewards/chosen": -0.14660745859146118, "rewards/margins": 0.15354973077774048, "rewards/rejected": -0.30015721917152405, "step": 775 }, { "epoch": 2.138073869520193, "grad_norm": 0.22998836636543274, "learning_rate": 1.1631407808646758e-06, "log_odds_chosen": 1.8879942893981934, "log_odds_ratio": -0.14921978116035461, "logits/chosen": -0.7377246618270874, "logits/rejected": -1.4987722635269165, "logps/chosen": -1.4832043647766113, "logps/rejected": -3.1577095985412598, "loss": 1.4948, "nll_loss": 1.4799178838729858, "rewards/accuracies": 1.0, "rewards/chosen": -0.14832043647766113, "rewards/margins": 0.16745048761367798, "rewards/rejected": -0.3157709240913391, "step": 776 }, { "epoch": 2.140835346910597, "grad_norm": 0.2312014400959015, "learning_rate": 1.156375491314585e-06, "log_odds_chosen": 1.778292179107666, "log_odds_ratio": -0.16804289817810059, "logits/chosen": -0.7264923453330994, "logits/rejected": -1.5217747688293457, "logps/chosen": -1.417649269104004, "logps/rejected": -2.973391532897949, "loss": 1.4366, "nll_loss": 1.4198280572891235, "rewards/accuracies": 1.0, "rewards/chosen": -0.14176492393016815, "rewards/margins": 0.1555742472410202, "rewards/rejected": -0.29733917117118835, "step": 777 }, { "epoch": 2.143596824301001, "grad_norm": 0.2197488397359848, "learning_rate": 1.1496240095852002e-06, "log_odds_chosen": 1.8204808235168457, "log_odds_ratio": -0.1687086969614029, "logits/chosen": -0.6910520195960999, "logits/rejected": -1.582482933998108, "logps/chosen": -1.4920127391815186, "logps/rejected": -3.107755184173584, "loss": 1.5153, "nll_loss": 1.4984556436538696, "rewards/accuracies": 1.0, "rewards/chosen": -0.14920127391815186, "rewards/margins": 0.16157422959804535, "rewards/rejected": -0.3107755184173584, "step": 778 }, { "epoch": 2.146358301691405, "grad_norm": 0.19428651034832, "learning_rate": 1.142886405058445e-06, "log_odds_chosen": 1.9751733541488647, "log_odds_ratio": -0.15449506044387817, "logits/chosen": -0.6920450329780579, "logits/rejected": -1.6103492975234985, "logps/chosen": -1.4166853427886963, "logps/rejected": -3.164785146713257, "loss": 1.4369, "nll_loss": 1.4214531183242798, "rewards/accuracies": 1.0, "rewards/chosen": -0.14166852831840515, "rewards/margins": 0.17480997741222382, "rewards/rejected": -0.31647852063179016, "step": 779 }, { "epoch": 2.1491197790818086, "grad_norm": 0.2259286642074585, "learning_rate": 1.1361627469736286e-06, "log_odds_chosen": 1.5404680967330933, "log_odds_ratio": -0.20875439047813416, "logits/chosen": -0.6094971299171448, "logits/rejected": -1.4675345420837402, "logps/chosen": -1.4678707122802734, "logps/rejected": -2.813509464263916, "loss": 1.5092, "nll_loss": 1.4883314371109009, "rewards/accuracies": 1.0, "rewards/chosen": -0.14678707718849182, "rewards/margins": 0.1345638781785965, "rewards/rejected": -0.2813509404659271, "step": 780 }, { "epoch": 2.1518812564722127, "grad_norm": 0.22231267392635345, "learning_rate": 1.1294531044267418e-06, "log_odds_chosen": 1.4947420358657837, "log_odds_ratio": -0.215992733836174, "logits/chosen": -0.6394674777984619, "logits/rejected": -1.3283731937408447, "logps/chosen": -1.4447788000106812, "logps/rejected": -2.739471435546875, "loss": 1.4606, "nll_loss": 1.4390475749969482, "rewards/accuracies": 1.0, "rewards/chosen": -0.14447790384292603, "rewards/margins": 0.12946924567222595, "rewards/rejected": -0.273947149515152, "step": 781 }, { "epoch": 2.1546427338626164, "grad_norm": 0.2176862359046936, "learning_rate": 1.122757546369744e-06, "log_odds_chosen": 1.6393433809280396, "log_odds_ratio": -0.1933254450559616, "logits/chosen": -0.7200911045074463, "logits/rejected": -1.2375181913375854, "logps/chosen": -1.447574496269226, "logps/rejected": -2.8759191036224365, "loss": 1.474, "nll_loss": 1.4546246528625488, "rewards/accuracies": 1.0, "rewards/chosen": -0.1447574496269226, "rewards/margins": 0.14283446967601776, "rewards/rejected": -0.28759193420410156, "step": 782 }, { "epoch": 2.1574042112530205, "grad_norm": 0.2970297336578369, "learning_rate": 1.1160761416098518e-06, "log_odds_chosen": 1.7072476148605347, "log_odds_ratio": -0.18795332312583923, "logits/chosen": -0.7140779495239258, "logits/rejected": -1.3339444398880005, "logps/chosen": -1.4725708961486816, "logps/rejected": -2.9791150093078613, "loss": 1.476, "nll_loss": 1.4572080373764038, "rewards/accuracies": 1.0, "rewards/chosen": -0.14725708961486816, "rewards/margins": 0.1506544053554535, "rewards/rejected": -0.29791149497032166, "step": 783 }, { "epoch": 2.160165688643424, "grad_norm": 0.2440439611673355, "learning_rate": 1.1094089588088383e-06, "log_odds_chosen": 2.0397443771362305, "log_odds_ratio": -0.13796721398830414, "logits/chosen": -0.6706158518791199, "logits/rejected": -1.417219877243042, "logps/chosen": -1.4731453657150269, "logps/rejected": -3.2909488677978516, "loss": 1.4898, "nll_loss": 1.4760286808013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.14731454849243164, "rewards/margins": 0.1817803680896759, "rewards/rejected": -0.32909488677978516, "step": 784 }, { "epoch": 2.1629271660338283, "grad_norm": 0.22097818553447723, "learning_rate": 1.1027560664823208e-06, "log_odds_chosen": 1.7268452644348145, "log_odds_ratio": -0.1696825921535492, "logits/chosen": -0.6993489265441895, "logits/rejected": -1.5169973373413086, "logps/chosen": -1.4171922206878662, "logps/rejected": -2.9188015460968018, "loss": 1.4386, "nll_loss": 1.4215881824493408, "rewards/accuracies": 1.0, "rewards/chosen": -0.14171923696994781, "rewards/margins": 0.15016093850135803, "rewards/rejected": -0.29188019037246704, "step": 785 }, { "epoch": 2.165688643424232, "grad_norm": 0.21593864262104034, "learning_rate": 1.0961175329990631e-06, "log_odds_chosen": 2.133613109588623, "log_odds_ratio": -0.12089455127716064, "logits/chosen": -0.6076424717903137, "logits/rejected": -1.537540078163147, "logps/chosen": -1.465749979019165, "logps/rejected": -3.3683509826660156, "loss": 1.4713, "nll_loss": 1.4591611623764038, "rewards/accuracies": 1.0, "rewards/chosen": -0.14657500386238098, "rewards/margins": 0.19026008248329163, "rewards/rejected": -0.3368350863456726, "step": 786 }, { "epoch": 2.1684501208146356, "grad_norm": 0.2135252058506012, "learning_rate": 1.0894934265802675e-06, "log_odds_chosen": 1.7045549154281616, "log_odds_ratio": -0.20010031759738922, "logits/chosen": -0.6813486814498901, "logits/rejected": -1.3304309844970703, "logps/chosen": -1.4754891395568848, "logps/rejected": -2.9836699962615967, "loss": 1.4924, "nll_loss": 1.4723405838012695, "rewards/accuracies": 1.0, "rewards/chosen": -0.14754891395568848, "rewards/margins": 0.15081806480884552, "rewards/rejected": -0.2983669936656952, "step": 787 }, { "epoch": 2.1712115982050397, "grad_norm": 0.20768733322620392, "learning_rate": 1.082883815298876e-06, "log_odds_chosen": 1.5430421829223633, "log_odds_ratio": -0.21051926910877228, "logits/chosen": -0.7255920171737671, "logits/rejected": -1.3659623861312866, "logps/chosen": -1.3575688600540161, "logps/rejected": -2.6764371395111084, "loss": 1.3936, "nll_loss": 1.372506022453308, "rewards/accuracies": 1.0, "rewards/chosen": -0.13575690984725952, "rewards/margins": 0.13188683986663818, "rewards/rejected": -0.2676437199115753, "step": 788 }, { "epoch": 2.1739730755954434, "grad_norm": 0.2235284298658371, "learning_rate": 1.0762887670788702e-06, "log_odds_chosen": 1.5856214761734009, "log_odds_ratio": -0.20439858734607697, "logits/chosen": -0.63809734582901, "logits/rejected": -1.2384825944900513, "logps/chosen": -1.402519941329956, "logps/rejected": -2.773668050765991, "loss": 1.4518, "nll_loss": 1.43135666847229, "rewards/accuracies": 1.0, "rewards/chosen": -0.1402519792318344, "rewards/margins": 0.13711482286453247, "rewards/rejected": -0.2773668169975281, "step": 789 }, { "epoch": 2.1767345529858475, "grad_norm": 0.2250007539987564, "learning_rate": 1.0697083496945766e-06, "log_odds_chosen": 1.6885381937026978, "log_odds_ratio": -0.17479003965854645, "logits/chosen": -0.5647572875022888, "logits/rejected": -1.3688006401062012, "logps/chosen": -1.3765947818756104, "logps/rejected": -2.8349385261535645, "loss": 1.4156, "nll_loss": 1.3981612920761108, "rewards/accuracies": 1.0, "rewards/chosen": -0.13765949010849, "rewards/margins": 0.14583437144756317, "rewards/rejected": -0.28349384665489197, "step": 790 }, { "epoch": 2.179496030376251, "grad_norm": 0.23045428097248077, "learning_rate": 1.063142630769963e-06, "log_odds_chosen": 1.5515148639678955, "log_odds_ratio": -0.20182965695858002, "logits/chosen": -0.6778091192245483, "logits/rejected": -1.4308048486709595, "logps/chosen": -1.3463788032531738, "logps/rejected": -2.666795492172241, "loss": 1.3856, "nll_loss": 1.3654264211654663, "rewards/accuracies": 1.0, "rewards/chosen": -0.13463789224624634, "rewards/margins": 0.13204166293144226, "rewards/rejected": -0.266679584980011, "step": 791 }, { "epoch": 2.1822575077666553, "grad_norm": 0.22130078077316284, "learning_rate": 1.0565916777779519e-06, "log_odds_chosen": 1.693711519241333, "log_odds_ratio": -0.17481404542922974, "logits/chosen": -0.6389051675796509, "logits/rejected": -1.389184594154358, "logps/chosen": -1.4762823581695557, "logps/rejected": -2.961742877960205, "loss": 1.4927, "nll_loss": 1.4752366542816162, "rewards/accuracies": 1.0, "rewards/chosen": -0.14762824773788452, "rewards/margins": 0.14854606986045837, "rewards/rejected": -0.2961743175983429, "step": 792 }, { "epoch": 2.185018985157059, "grad_norm": 0.21255378425121307, "learning_rate": 1.0500555580397193e-06, "log_odds_chosen": 2.018782615661621, "log_odds_ratio": -0.14547225832939148, "logits/chosen": -0.6314041018486023, "logits/rejected": -1.3904365301132202, "logps/chosen": -1.414720892906189, "logps/rejected": -3.201308012008667, "loss": 1.4353, "nll_loss": 1.42076575756073, "rewards/accuracies": 1.0, "rewards/chosen": -0.14147208631038666, "rewards/margins": 0.17865869402885437, "rewards/rejected": -0.32013076543807983, "step": 793 }, { "epoch": 2.187780462547463, "grad_norm": 0.23878920078277588, "learning_rate": 1.04353433872401e-06, "log_odds_chosen": 1.61065673828125, "log_odds_ratio": -0.18626853823661804, "logits/chosen": -0.6909284591674805, "logits/rejected": -1.4615914821624756, "logps/chosen": -1.4604319334030151, "logps/rejected": -2.865694761276245, "loss": 1.4706, "nll_loss": 1.4520207643508911, "rewards/accuracies": 1.0, "rewards/chosen": -0.14604318141937256, "rewards/margins": 0.14052629470825195, "rewards/rejected": -0.2865694761276245, "step": 794 }, { "epoch": 2.1905419399378667, "grad_norm": 0.2130230814218521, "learning_rate": 1.0370280868464405e-06, "log_odds_chosen": 2.15653657913208, "log_odds_ratio": -0.1305469423532486, "logits/chosen": -0.6620782017707825, "logits/rejected": -1.6787021160125732, "logps/chosen": -1.4217039346694946, "logps/rejected": -3.339536666870117, "loss": 1.4306, "nll_loss": 1.4175418615341187, "rewards/accuracies": 1.0, "rewards/chosen": -0.14217039942741394, "rewards/margins": 0.19178327918052673, "rewards/rejected": -0.3339536786079407, "step": 795 }, { "epoch": 2.193303417328271, "grad_norm": 0.23843464255332947, "learning_rate": 1.0305368692688175e-06, "log_odds_chosen": 1.962391972541809, "log_odds_ratio": -0.14453676342964172, "logits/chosen": -0.6452916264533997, "logits/rejected": -1.4301228523254395, "logps/chosen": -1.3313332796096802, "logps/rejected": -3.036635637283325, "loss": 1.3541, "nll_loss": 1.3396140336990356, "rewards/accuracies": 1.0, "rewards/chosen": -0.13313333690166473, "rewards/margins": 0.17053021490573883, "rewards/rejected": -0.30366355180740356, "step": 796 }, { "epoch": 2.1960648947186745, "grad_norm": 0.25129440426826477, "learning_rate": 1.0240607526984437e-06, "log_odds_chosen": 2.080508232116699, "log_odds_ratio": -0.13161601126194, "logits/chosen": -0.735266387462616, "logits/rejected": -1.5201730728149414, "logps/chosen": -1.398453712463379, "logps/rejected": -3.239894151687622, "loss": 1.4129, "nll_loss": 1.399692177772522, "rewards/accuracies": 1.0, "rewards/chosen": -0.1398453712463379, "rewards/margins": 0.18414406478405, "rewards/rejected": -0.3239894509315491, "step": 797 }, { "epoch": 2.198826372109078, "grad_norm": 0.23414210975170135, "learning_rate": 1.0175998036874357e-06, "log_odds_chosen": 1.7237422466278076, "log_odds_ratio": -0.17014260590076447, "logits/chosen": -0.7563005685806274, "logits/rejected": -1.409740924835205, "logps/chosen": -1.3735893964767456, "logps/rejected": -2.8649048805236816, "loss": 1.3966, "nll_loss": 1.3795509338378906, "rewards/accuracies": 1.0, "rewards/chosen": -0.13735894858837128, "rewards/margins": 0.14913155138492584, "rewards/rejected": -0.2864904999732971, "step": 798 }, { "epoch": 2.2015878494994823, "grad_norm": 0.24155975878238678, "learning_rate": 1.0111540886320433e-06, "log_odds_chosen": 1.7105250358581543, "log_odds_ratio": -0.16820327937602997, "logits/chosen": -0.6661118268966675, "logits/rejected": -1.3397963047027588, "logps/chosen": -1.4416940212249756, "logps/rejected": -2.9333674907684326, "loss": 1.4623, "nll_loss": 1.4454357624053955, "rewards/accuracies": 1.0, "rewards/chosen": -0.144169420003891, "rewards/margins": 0.14916735887527466, "rewards/rejected": -0.29333674907684326, "step": 799 }, { "epoch": 2.204349326889886, "grad_norm": 0.25716984272003174, "learning_rate": 1.00472367377196e-06, "log_odds_chosen": 1.631860375404358, "log_odds_ratio": -0.19165007770061493, "logits/chosen": -0.7379617094993591, "logits/rejected": -1.3591991662979126, "logps/chosen": -1.428329586982727, "logps/rejected": -2.8428709506988525, "loss": 1.436, "nll_loss": 1.4168293476104736, "rewards/accuracies": 1.0, "rewards/chosen": -0.14283296465873718, "rewards/margins": 0.14145414531230927, "rewards/rejected": -0.28428712487220764, "step": 800 }, { "epoch": 2.20711080428029, "grad_norm": 0.20880986750125885, "learning_rate": 9.983086251896493e-07, "log_odds_chosen": 1.8200026750564575, "log_odds_ratio": -0.16156738996505737, "logits/chosen": -0.7508988380432129, "logits/rejected": -1.405930995941162, "logps/chosen": -1.4232152700424194, "logps/rejected": -3.019883632659912, "loss": 1.4318, "nll_loss": 1.4156582355499268, "rewards/accuracies": 1.0, "rewards/chosen": -0.14232154190540314, "rewards/margins": 0.15966683626174927, "rewards/rejected": -0.3019883632659912, "step": 801 }, { "epoch": 2.2098722816706937, "grad_norm": 0.22158882021903992, "learning_rate": 9.91909008809659e-07, "log_odds_chosen": 1.7073739767074585, "log_odds_ratio": -0.19637973606586456, "logits/chosen": -0.6305781602859497, "logits/rejected": -1.481411337852478, "logps/chosen": -1.4764958620071411, "logps/rejected": -2.9831013679504395, "loss": 1.4898, "nll_loss": 1.4701651334762573, "rewards/accuracies": 1.0, "rewards/chosen": -0.1476495862007141, "rewards/margins": 0.15066054463386536, "rewards/rejected": -0.2983101010322571, "step": 802 }, { "epoch": 2.212633759061098, "grad_norm": 0.23141610622406006, "learning_rate": 9.855248903979505e-07, "log_odds_chosen": 1.726916790008545, "log_odds_ratio": -0.16445443034172058, "logits/chosen": -0.719879686832428, "logits/rejected": -1.4791364669799805, "logps/chosen": -1.4652245044708252, "logps/rejected": -2.978423833847046, "loss": 1.492, "nll_loss": 1.4755498170852661, "rewards/accuracies": 1.0, "rewards/chosen": -0.14652244746685028, "rewards/margins": 0.1513199359178543, "rewards/rejected": -0.2978423833847046, "step": 803 }, { "epoch": 2.2153952364515015, "grad_norm": 0.2521270215511322, "learning_rate": 9.791563355612171e-07, "log_odds_chosen": 1.5406780242919922, "log_odds_ratio": -0.20107091963291168, "logits/chosen": -0.616162121295929, "logits/rejected": -1.395881175994873, "logps/chosen": -1.5550295114517212, "logps/rejected": -2.9151062965393066, "loss": 1.5668, "nll_loss": 1.5467073917388916, "rewards/accuracies": 1.0, "rewards/chosen": -0.15550296008586884, "rewards/margins": 0.1360076516866684, "rewards/rejected": -0.2915106415748596, "step": 804 }, { "epoch": 2.2181567138419056, "grad_norm": 0.22672006487846375, "learning_rate": 9.728034097462144e-07, "log_odds_chosen": 2.2885305881500244, "log_odds_ratio": -0.11347518116235733, "logits/chosen": -0.6619569063186646, "logits/rejected": -1.5772463083267212, "logps/chosen": -1.3924167156219482, "logps/rejected": -3.4299628734588623, "loss": 1.4265, "nll_loss": 1.4151071310043335, "rewards/accuracies": 1.0, "rewards/chosen": -0.13924165070056915, "rewards/margins": 0.20375463366508484, "rewards/rejected": -0.3429962992668152, "step": 805 }, { "epoch": 2.2209181912323093, "grad_norm": 0.24770788848400116, "learning_rate": 9.66466178239084e-07, "log_odds_chosen": 1.8905153274536133, "log_odds_ratio": -0.16993935406208038, "logits/chosen": -0.6723768711090088, "logits/rejected": -1.6450607776641846, "logps/chosen": -1.443047285079956, "logps/rejected": -3.1152286529541016, "loss": 1.4514, "nll_loss": 1.4343808889389038, "rewards/accuracies": 1.0, "rewards/chosen": -0.14430472254753113, "rewards/margins": 0.1672181338071823, "rewards/rejected": -0.31152284145355225, "step": 806 }, { "epoch": 2.2236796686227134, "grad_norm": 0.2251327782869339, "learning_rate": 9.601447061646838e-07, "log_odds_chosen": 1.5325648784637451, "log_odds_ratio": -0.2000221312046051, "logits/chosen": -0.8097426891326904, "logits/rejected": -1.3274612426757812, "logps/chosen": -1.384391188621521, "logps/rejected": -2.6999096870422363, "loss": 1.4107, "nll_loss": 1.3906692266464233, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384391337633133, "rewards/margins": 0.1315518468618393, "rewards/rejected": -0.2699909806251526, "step": 807 }, { "epoch": 2.226441146013117, "grad_norm": 0.21888528764247894, "learning_rate": 9.538390584859213e-07, "log_odds_chosen": 1.9272725582122803, "log_odds_ratio": -0.1462123990058899, "logits/chosen": -0.6305885314941406, "logits/rejected": -1.3897682428359985, "logps/chosen": -1.3526287078857422, "logps/rejected": -3.029764413833618, "loss": 1.3872, "nll_loss": 1.3726171255111694, "rewards/accuracies": 1.0, "rewards/chosen": -0.1352628618478775, "rewards/margins": 0.16771353781223297, "rewards/rejected": -0.30297642946243286, "step": 808 }, { "epoch": 2.2292026234035207, "grad_norm": 0.22049713134765625, "learning_rate": 9.47549300003082e-07, "log_odds_chosen": 1.9108651876449585, "log_odds_ratio": -0.144037663936615, "logits/chosen": -0.7778617143630981, "logits/rejected": -1.5370293855667114, "logps/chosen": -1.384128451347351, "logps/rejected": -3.0530431270599365, "loss": 1.4054, "nll_loss": 1.391005039215088, "rewards/accuracies": 1.0, "rewards/chosen": -0.13841284811496735, "rewards/margins": 0.16689150035381317, "rewards/rejected": -0.3053043484687805, "step": 809 }, { "epoch": 2.231964100793925, "grad_norm": 0.23895511031150818, "learning_rate": 9.412754953531664e-07, "log_odds_chosen": 2.207277774810791, "log_odds_ratio": -0.11890644580125809, "logits/chosen": -0.6919041275978088, "logits/rejected": -1.4998337030410767, "logps/chosen": -1.368714690208435, "logps/rejected": -3.315192937850952, "loss": 1.3889, "nll_loss": 1.3770052194595337, "rewards/accuracies": 1.0, "rewards/chosen": -0.13687147200107574, "rewards/margins": 0.19464783370494843, "rewards/rejected": -0.33151930570602417, "step": 810 }, { "epoch": 2.2347255781843285, "grad_norm": 0.22447697818279266, "learning_rate": 9.350177090092233e-07, "log_odds_chosen": 1.961172342300415, "log_odds_ratio": -0.14047497510910034, "logits/chosen": -0.5644626617431641, "logits/rejected": -1.4700074195861816, "logps/chosen": -1.4129343032836914, "logps/rejected": -3.1357169151306152, "loss": 1.4287, "nll_loss": 1.4147019386291504, "rewards/accuracies": 1.0, "rewards/chosen": -0.14129343628883362, "rewards/margins": 0.1722782701253891, "rewards/rejected": -0.3135717213153839, "step": 811 }, { "epoch": 2.2374870555747326, "grad_norm": 0.23871681094169617, "learning_rate": 9.28776005279691e-07, "log_odds_chosen": 1.983229160308838, "log_odds_ratio": -0.14253847301006317, "logits/chosen": -0.7053898572921753, "logits/rejected": -1.6556423902511597, "logps/chosen": -1.4783096313476562, "logps/rejected": -3.2417516708374023, "loss": 1.4814, "nll_loss": 1.467186450958252, "rewards/accuracies": 1.0, "rewards/chosen": -0.14783096313476562, "rewards/margins": 0.17634418606758118, "rewards/rejected": -0.3241751492023468, "step": 812 }, { "epoch": 2.2402485329651363, "grad_norm": 0.2291187047958374, "learning_rate": 9.225504483077311e-07, "log_odds_chosen": 2.0333430767059326, "log_odds_ratio": -0.1376514732837677, "logits/chosen": -0.7155540585517883, "logits/rejected": -1.509591817855835, "logps/chosen": -1.3866668939590454, "logps/rejected": -3.1796817779541016, "loss": 1.397, "nll_loss": 1.383190393447876, "rewards/accuracies": 1.0, "rewards/chosen": -0.13866668939590454, "rewards/margins": 0.17930153012275696, "rewards/rejected": -0.3179682195186615, "step": 813 }, { "epoch": 2.2430100103555404, "grad_norm": 0.21340611577033997, "learning_rate": 9.163411020705762e-07, "log_odds_chosen": 1.6701360940933228, "log_odds_ratio": -0.1889922171831131, "logits/chosen": -0.6479472517967224, "logits/rejected": -1.4593391418457031, "logps/chosen": -1.3769521713256836, "logps/rejected": -2.8227381706237793, "loss": 1.4047, "nll_loss": 1.3857942819595337, "rewards/accuracies": 1.0, "rewards/chosen": -0.13769520819187164, "rewards/margins": 0.14457860589027405, "rewards/rejected": -0.2822738289833069, "step": 814 }, { "epoch": 2.245771487745944, "grad_norm": 0.24716490507125854, "learning_rate": 9.101480303788623e-07, "log_odds_chosen": 1.667574167251587, "log_odds_ratio": -0.18151864409446716, "logits/chosen": -0.6538175344467163, "logits/rejected": -1.1881874799728394, "logps/chosen": -1.383458137512207, "logps/rejected": -2.8226757049560547, "loss": 1.4127, "nll_loss": 1.394594430923462, "rewards/accuracies": 1.0, "rewards/chosen": -0.13834582269191742, "rewards/margins": 0.14392174780368805, "rewards/rejected": -0.28226757049560547, "step": 815 }, { "epoch": 2.248532965136348, "grad_norm": 0.21116560697555542, "learning_rate": 9.039712968759864e-07, "log_odds_chosen": 2.080641984939575, "log_odds_ratio": -0.13355468213558197, "logits/chosen": -0.6301463842391968, "logits/rejected": -1.5739259719848633, "logps/chosen": -1.4324538707733154, "logps/rejected": -3.279850482940674, "loss": 1.4646, "nll_loss": 1.4512317180633545, "rewards/accuracies": 1.0, "rewards/chosen": -0.1432453840970993, "rewards/margins": 0.18473967909812927, "rewards/rejected": -0.32798507809638977, "step": 816 }, { "epoch": 2.251294442526752, "grad_norm": 0.21408426761627197, "learning_rate": 8.978109650374398e-07, "log_odds_chosen": 2.019441604614258, "log_odds_ratio": -0.12933209538459778, "logits/chosen": -0.6689860224723816, "logits/rejected": -1.5236905813217163, "logps/chosen": -1.434238314628601, "logps/rejected": -3.2189900875091553, "loss": 1.4553, "nll_loss": 1.4423965215682983, "rewards/accuracies": 1.0, "rewards/chosen": -0.14342382550239563, "rewards/margins": 0.17847517132759094, "rewards/rejected": -0.3218989968299866, "step": 817 }, { "epoch": 2.254055919917156, "grad_norm": 0.33266690373420715, "learning_rate": 8.916670981701656e-07, "log_odds_chosen": 1.722630500793457, "log_odds_ratio": -0.16768687963485718, "logits/chosen": -0.703956127166748, "logits/rejected": -1.5727592706680298, "logps/chosen": -1.5194380283355713, "logps/rejected": -3.042926073074341, "loss": 1.5194, "nll_loss": 1.5026812553405762, "rewards/accuracies": 1.0, "rewards/chosen": -0.15194381773471832, "rewards/margins": 0.15234880149364471, "rewards/rejected": -0.30429261922836304, "step": 818 }, { "epoch": 2.2568173973075596, "grad_norm": 0.22801189124584198, "learning_rate": 8.855397594118995e-07, "log_odds_chosen": 1.7129765748977661, "log_odds_ratio": -0.18919311463832855, "logits/chosen": -0.6381175518035889, "logits/rejected": -1.472898244857788, "logps/chosen": -1.3879361152648926, "logps/rejected": -2.878157615661621, "loss": 1.4185, "nll_loss": 1.3996291160583496, "rewards/accuracies": 1.0, "rewards/chosen": -0.13879360258579254, "rewards/margins": 0.1490221619606018, "rewards/rejected": -0.28781577944755554, "step": 819 }, { "epoch": 2.2595788746979633, "grad_norm": 0.21346208453178406, "learning_rate": 8.794290117305296e-07, "log_odds_chosen": 1.9627821445465088, "log_odds_ratio": -0.1346716284751892, "logits/chosen": -0.7583671808242798, "logits/rejected": -1.527765154838562, "logps/chosen": -1.3527830839157104, "logps/rejected": -3.060767412185669, "loss": 1.3811, "nll_loss": 1.36765718460083, "rewards/accuracies": 1.0, "rewards/chosen": -0.13527831435203552, "rewards/margins": 0.1707984358072281, "rewards/rejected": -0.3060767352581024, "step": 820 }, { "epoch": 2.2623403520883674, "grad_norm": 0.21708491444587708, "learning_rate": 8.733349179234412e-07, "log_odds_chosen": 1.800171971321106, "log_odds_ratio": -0.1747136116027832, "logits/chosen": -0.7816946506500244, "logits/rejected": -1.3457294702529907, "logps/chosen": -1.3653067350387573, "logps/rejected": -2.929758071899414, "loss": 1.4016, "nll_loss": 1.3840950727462769, "rewards/accuracies": 1.0, "rewards/chosen": -0.13653066754341125, "rewards/margins": 0.15644511580467224, "rewards/rejected": -0.2929757833480835, "step": 821 }, { "epoch": 2.265101829478771, "grad_norm": 0.25856104493141174, "learning_rate": 8.672575406168782e-07, "log_odds_chosen": 1.700215458869934, "log_odds_ratio": -0.18735305964946747, "logits/chosen": -0.6814907193183899, "logits/rejected": -1.1507182121276855, "logps/chosen": -1.454673409461975, "logps/rejected": -2.942253351211548, "loss": 1.4712, "nll_loss": 1.4524434804916382, "rewards/accuracies": 1.0, "rewards/chosen": -0.1454673558473587, "rewards/margins": 0.14875799417495728, "rewards/rejected": -0.2942253351211548, "step": 822 }, { "epoch": 2.267863306869175, "grad_norm": 0.22061756253242493, "learning_rate": 8.611969422652966e-07, "log_odds_chosen": 1.9852367639541626, "log_odds_ratio": -0.16037441790103912, "logits/chosen": -0.6748781204223633, "logits/rejected": -1.3667995929718018, "logps/chosen": -1.3745545148849487, "logps/rejected": -3.124960422515869, "loss": 1.4194, "nll_loss": 1.4033291339874268, "rewards/accuracies": 1.0, "rewards/chosen": -0.13745544850826263, "rewards/margins": 0.1750405877828598, "rewards/rejected": -0.31249600648880005, "step": 823 }, { "epoch": 2.270624784259579, "grad_norm": 0.2141323685646057, "learning_rate": 8.551531851507186e-07, "log_odds_chosen": 1.643298864364624, "log_odds_ratio": -0.1879320740699768, "logits/chosen": -0.6681318283081055, "logits/rejected": -1.25947904586792, "logps/chosen": -1.4017415046691895, "logps/rejected": -2.826103448867798, "loss": 1.4386, "nll_loss": 1.419830560684204, "rewards/accuracies": 1.0, "rewards/chosen": -0.14017415046691895, "rewards/margins": 0.1424362063407898, "rewards/rejected": -0.28261035680770874, "step": 824 }, { "epoch": 2.273386261649983, "grad_norm": 0.22332267463207245, "learning_rate": 8.491263313821021e-07, "log_odds_chosen": 1.7793594598770142, "log_odds_ratio": -0.1738714724779129, "logits/chosen": -0.7445805072784424, "logits/rejected": -1.3957643508911133, "logps/chosen": -1.4328621625900269, "logps/rejected": -2.983755111694336, "loss": 1.4588, "nll_loss": 1.441409945487976, "rewards/accuracies": 1.0, "rewards/chosen": -0.14328621327877045, "rewards/margins": 0.15508933365345, "rewards/rejected": -0.29837551712989807, "step": 825 }, { "epoch": 2.2761477390403866, "grad_norm": 0.2836918830871582, "learning_rate": 8.431164428946928e-07, "log_odds_chosen": 1.8783183097839355, "log_odds_ratio": -0.15984989702701569, "logits/chosen": -0.8311631083488464, "logits/rejected": -1.4585754871368408, "logps/chosen": -1.4197965860366821, "logps/rejected": -3.066434860229492, "loss": 1.4406, "nll_loss": 1.4245829582214355, "rewards/accuracies": 1.0, "rewards/chosen": -0.1419796645641327, "rewards/margins": 0.16466382145881653, "rewards/rejected": -0.3066434860229492, "step": 826 }, { "epoch": 2.2789092164307903, "grad_norm": 0.36856019496917725, "learning_rate": 8.371235814493947e-07, "log_odds_chosen": 2.0193748474121094, "log_odds_ratio": -0.13674215972423553, "logits/chosen": -0.7503007054328918, "logits/rejected": -1.6379897594451904, "logps/chosen": -1.4184544086456299, "logps/rejected": -3.204625368118286, "loss": 1.4412, "nll_loss": 1.4275577068328857, "rewards/accuracies": 1.0, "rewards/chosen": -0.14184542000293732, "rewards/margins": 0.17861713469028473, "rewards/rejected": -0.32046258449554443, "step": 827 }, { "epoch": 2.2816706938211944, "grad_norm": 0.23496706783771515, "learning_rate": 8.311478086321301e-07, "log_odds_chosen": 1.7712256908416748, "log_odds_ratio": -0.16040578484535217, "logits/chosen": -0.6742019057273865, "logits/rejected": -1.4523690938949585, "logps/chosen": -1.3749206066131592, "logps/rejected": -2.910532236099243, "loss": 1.3956, "nll_loss": 1.3795607089996338, "rewards/accuracies": 1.0, "rewards/chosen": -0.1374920755624771, "rewards/margins": 0.15356114506721497, "rewards/rejected": -0.29105323553085327, "step": 828 }, { "epoch": 2.284432171211598, "grad_norm": 0.2047111839056015, "learning_rate": 8.251891858532124e-07, "log_odds_chosen": 1.9578138589859009, "log_odds_ratio": -0.13443009555339813, "logits/chosen": -0.7370569705963135, "logits/rejected": -1.5584810972213745, "logps/chosen": -1.3687400817871094, "logps/rejected": -3.0788469314575195, "loss": 1.3918, "nll_loss": 1.3783162832260132, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368740200996399, "rewards/margins": 0.17101068794727325, "rewards/rejected": -0.30788472294807434, "step": 829 }, { "epoch": 2.287193648602002, "grad_norm": 0.34288427233695984, "learning_rate": 8.192477743467078e-07, "log_odds_chosen": 1.7836705446243286, "log_odds_ratio": -0.17603155970573425, "logits/chosen": -0.801503598690033, "logits/rejected": -1.2663257122039795, "logps/chosen": -1.4529765844345093, "logps/rejected": -3.0252788066864014, "loss": 1.4806, "nll_loss": 1.4629716873168945, "rewards/accuracies": 1.0, "rewards/chosen": -0.14529766142368317, "rewards/margins": 0.1572302281856537, "rewards/rejected": -0.30252787470817566, "step": 830 }, { "epoch": 2.289955125992406, "grad_norm": 0.23711544275283813, "learning_rate": 8.133236351698143e-07, "log_odds_chosen": 1.8252547979354858, "log_odds_ratio": -0.1635345220565796, "logits/chosen": -0.6941596269607544, "logits/rejected": -1.4703350067138672, "logps/chosen": -1.4196501970291138, "logps/rejected": -3.019972324371338, "loss": 1.4509, "nll_loss": 1.4345570802688599, "rewards/accuracies": 1.0, "rewards/chosen": -0.14196503162384033, "rewards/margins": 0.1600322127342224, "rewards/rejected": -0.30199727416038513, "step": 831 }, { "epoch": 2.29271660338281, "grad_norm": 0.24412986636161804, "learning_rate": 8.074168292022269e-07, "log_odds_chosen": 1.767791509628296, "log_odds_ratio": -0.16313113272190094, "logits/chosen": -0.7188095450401306, "logits/rejected": -1.4435595273971558, "logps/chosen": -1.4730356931686401, "logps/rejected": -3.027297019958496, "loss": 1.4911, "nll_loss": 1.4747589826583862, "rewards/accuracies": 1.0, "rewards/chosen": -0.14730356633663177, "rewards/margins": 0.15542614459991455, "rewards/rejected": -0.3027297258377075, "step": 832 }, { "epoch": 2.2954780807732136, "grad_norm": 0.21996836364269257, "learning_rate": 8.015274171455151e-07, "log_odds_chosen": 2.0732696056365967, "log_odds_ratio": -0.13683564960956573, "logits/chosen": -0.6584558486938477, "logits/rejected": -1.5454857349395752, "logps/chosen": -1.40179443359375, "logps/rejected": -3.2378737926483154, "loss": 1.4137, "nll_loss": 1.4000232219696045, "rewards/accuracies": 1.0, "rewards/chosen": -0.14017944037914276, "rewards/margins": 0.18360795080661774, "rewards/rejected": -0.3237873911857605, "step": 833 }, { "epoch": 2.2982395581636177, "grad_norm": 0.2522999346256256, "learning_rate": 7.956554595225017e-07, "log_odds_chosen": 1.7723925113677979, "log_odds_ratio": -0.16435307264328003, "logits/chosen": -0.8220636248588562, "logits/rejected": -1.3743396997451782, "logps/chosen": -1.3158262968063354, "logps/rejected": -2.8384509086608887, "loss": 1.3425, "nll_loss": 1.3260283470153809, "rewards/accuracies": 1.0, "rewards/chosen": -0.13158264756202698, "rewards/margins": 0.15226244926452637, "rewards/rejected": -0.28384509682655334, "step": 834 }, { "epoch": 2.3010010355540214, "grad_norm": 0.24403604865074158, "learning_rate": 7.898010166766348e-07, "log_odds_chosen": 1.926077127456665, "log_odds_ratio": -0.16370324790477753, "logits/chosen": -0.7142081260681152, "logits/rejected": -1.3692036867141724, "logps/chosen": -1.4117451906204224, "logps/rejected": -3.110757350921631, "loss": 1.4333, "nll_loss": 1.4169120788574219, "rewards/accuracies": 1.0, "rewards/chosen": -0.1411745250225067, "rewards/margins": 0.16990122199058533, "rewards/rejected": -0.31107574701309204, "step": 835 }, { "epoch": 2.3037625129444255, "grad_norm": 0.2190071940422058, "learning_rate": 7.839641487713745e-07, "log_odds_chosen": 1.7217925786972046, "log_odds_ratio": -0.17948487401008606, "logits/chosen": -0.708673894405365, "logits/rejected": -1.4397025108337402, "logps/chosen": -1.3636999130249023, "logps/rejected": -2.8528902530670166, "loss": 1.3824, "nll_loss": 1.3644510507583618, "rewards/accuracies": 1.0, "rewards/chosen": -0.136369988322258, "rewards/margins": 0.148919016122818, "rewards/rejected": -0.2852889895439148, "step": 836 }, { "epoch": 2.306523990334829, "grad_norm": 0.22402732074260712, "learning_rate": 7.781449157895677e-07, "log_odds_chosen": 1.8288544416427612, "log_odds_ratio": -0.15313786268234253, "logits/chosen": -0.7850973606109619, "logits/rejected": -1.4632296562194824, "logps/chosen": -1.455615520477295, "logps/rejected": -3.0640172958374023, "loss": 1.4745, "nll_loss": 1.4591940641403198, "rewards/accuracies": 1.0, "rewards/chosen": -0.14556154608726501, "rewards/margins": 0.16084018349647522, "rewards/rejected": -0.30640169978141785, "step": 837 }, { "epoch": 2.309285467725233, "grad_norm": 0.21703249216079712, "learning_rate": 7.723433775328385e-07, "log_odds_chosen": 2.595933437347412, "log_odds_ratio": -0.09295584261417389, "logits/chosen": -0.6684934496879578, "logits/rejected": -1.7044930458068848, "logps/chosen": -1.3342933654785156, "logps/rejected": -3.6580612659454346, "loss": 1.3639, "nll_loss": 1.3546359539031982, "rewards/accuracies": 1.0, "rewards/chosen": -0.13342934846878052, "rewards/margins": 0.2323768138885498, "rewards/rejected": -0.3658061623573303, "step": 838 }, { "epoch": 2.312046945115637, "grad_norm": 0.2520872950553894, "learning_rate": 7.665595936209674e-07, "log_odds_chosen": 1.6445107460021973, "log_odds_ratio": -0.18092487752437592, "logits/chosen": -0.6424407362937927, "logits/rejected": -1.4391764402389526, "logps/chosen": -1.5141448974609375, "logps/rejected": -2.9545748233795166, "loss": 1.5245, "nll_loss": 1.506402850151062, "rewards/accuracies": 1.0, "rewards/chosen": -0.15141448378562927, "rewards/margins": 0.14404302835464478, "rewards/rejected": -0.29545751214027405, "step": 839 }, { "epoch": 2.3148084225060406, "grad_norm": 0.23018507659435272, "learning_rate": 7.607936234912841e-07, "log_odds_chosen": 1.8494082689285278, "log_odds_ratio": -0.16123023629188538, "logits/chosen": -0.7451273202896118, "logits/rejected": -1.39532470703125, "logps/chosen": -1.3400492668151855, "logps/rejected": -2.936408281326294, "loss": 1.3611, "nll_loss": 1.345024585723877, "rewards/accuracies": 1.0, "rewards/chosen": -0.13400493562221527, "rewards/margins": 0.15963591635227203, "rewards/rejected": -0.2936408221721649, "step": 840 }, { "epoch": 2.3175698998964447, "grad_norm": 0.21800625324249268, "learning_rate": 7.550455263980525e-07, "log_odds_chosen": 1.822676181793213, "log_odds_ratio": -0.15284201502799988, "logits/chosen": -0.7327954769134521, "logits/rejected": -1.6014782190322876, "logps/chosen": -1.473343849182129, "logps/rejected": -3.082242012023926, "loss": 1.4879, "nll_loss": 1.4726181030273438, "rewards/accuracies": 1.0, "rewards/chosen": -0.14733438193798065, "rewards/margins": 0.16088984906673431, "rewards/rejected": -0.3082242012023926, "step": 841 }, { "epoch": 2.3203313772868484, "grad_norm": 0.23880954086780548, "learning_rate": 7.493153614118634e-07, "log_odds_chosen": 2.012880325317383, "log_odds_ratio": -0.13126929104328156, "logits/chosen": -0.7932169437408447, "logits/rejected": -1.454060673713684, "logps/chosen": -1.346867561340332, "logps/rejected": -3.1008596420288086, "loss": 1.3859, "nll_loss": 1.3727854490280151, "rewards/accuracies": 1.0, "rewards/chosen": -0.13468675315380096, "rewards/margins": 0.17539921402931213, "rewards/rejected": -0.3100859522819519, "step": 842 }, { "epoch": 2.3230928546772525, "grad_norm": 0.2280740588903427, "learning_rate": 7.436031874190272e-07, "log_odds_chosen": 2.012399435043335, "log_odds_ratio": -0.13420476019382477, "logits/chosen": -0.6295364499092102, "logits/rejected": -1.5652472972869873, "logps/chosen": -1.4837929010391235, "logps/rejected": -3.278496026992798, "loss": 1.5017, "nll_loss": 1.4883002042770386, "rewards/accuracies": 1.0, "rewards/chosen": -0.14837929606437683, "rewards/margins": 0.17947034537792206, "rewards/rejected": -0.3278496265411377, "step": 843 }, { "epoch": 2.325854332067656, "grad_norm": 0.22231662273406982, "learning_rate": 7.379090631209712e-07, "log_odds_chosen": 1.9772436618804932, "log_odds_ratio": -0.13941211998462677, "logits/chosen": -0.7257593870162964, "logits/rejected": -1.5163140296936035, "logps/chosen": -1.352843165397644, "logps/rejected": -3.0788145065307617, "loss": 1.3787, "nll_loss": 1.364717721939087, "rewards/accuracies": 1.0, "rewards/chosen": -0.13528433442115784, "rewards/margins": 0.17259712517261505, "rewards/rejected": -0.3078814446926117, "step": 844 }, { "epoch": 2.3286158094580602, "grad_norm": 0.22180134057998657, "learning_rate": 7.322330470336314e-07, "log_odds_chosen": 1.9030696153640747, "log_odds_ratio": -0.15212993323802948, "logits/chosen": -0.7098222374916077, "logits/rejected": -1.4904342889785767, "logps/chosen": -1.318537950515747, "logps/rejected": -2.9626846313476562, "loss": 1.352, "nll_loss": 1.3368245363235474, "rewards/accuracies": 1.0, "rewards/chosen": -0.13185378909111023, "rewards/margins": 0.1644146740436554, "rewards/rejected": -0.2962684631347656, "step": 845 }, { "epoch": 2.331377286848464, "grad_norm": 0.23776382207870483, "learning_rate": 7.265751974868554e-07, "log_odds_chosen": 1.758538007736206, "log_odds_ratio": -0.1724071055650711, "logits/chosen": -0.6992306709289551, "logits/rejected": -1.2794487476348877, "logps/chosen": -1.3556448221206665, "logps/rejected": -2.8772082328796387, "loss": 1.3915, "nll_loss": 1.3742895126342773, "rewards/accuracies": 1.0, "rewards/chosen": -0.13556447625160217, "rewards/margins": 0.15215636789798737, "rewards/rejected": -0.28772082924842834, "step": 846 }, { "epoch": 2.334138764238868, "grad_norm": 0.23745930194854736, "learning_rate": 7.209355726238024e-07, "log_odds_chosen": 2.016737222671509, "log_odds_ratio": -0.14237958192825317, "logits/chosen": -0.7469203472137451, "logits/rejected": -1.4836128950119019, "logps/chosen": -1.4688273668289185, "logps/rejected": -3.266719102859497, "loss": 1.4708, "nll_loss": 1.4565364122390747, "rewards/accuracies": 1.0, "rewards/chosen": -0.14688274264335632, "rewards/margins": 0.179789200425148, "rewards/rejected": -0.32667192816734314, "step": 847 }, { "epoch": 2.3369002416292717, "grad_norm": 0.22686554491519928, "learning_rate": 7.153142304003419e-07, "log_odds_chosen": 1.8804246187210083, "log_odds_ratio": -0.16532699763774872, "logits/chosen": -0.7108600735664368, "logits/rejected": -1.5009112358093262, "logps/chosen": -1.4534810781478882, "logps/rejected": -3.1220381259918213, "loss": 1.4755, "nll_loss": 1.4589351415634155, "rewards/accuracies": 1.0, "rewards/chosen": -0.14534810185432434, "rewards/margins": 0.16685572266578674, "rewards/rejected": -0.31220385432243347, "step": 848 }, { "epoch": 2.3396617190196753, "grad_norm": 0.2192954570055008, "learning_rate": 7.097112285844643e-07, "log_odds_chosen": 1.8429396152496338, "log_odds_ratio": -0.16627661883831024, "logits/chosen": -0.7136497497558594, "logits/rejected": -1.2857369184494019, "logps/chosen": -1.3913533687591553, "logps/rejected": -3.001941680908203, "loss": 1.4131, "nll_loss": 1.3965197801589966, "rewards/accuracies": 1.0, "rewards/chosen": -0.13913533091545105, "rewards/margins": 0.16105885803699493, "rewards/rejected": -0.3001942038536072, "step": 849 }, { "epoch": 2.3424231964100795, "grad_norm": 0.2234192192554474, "learning_rate": 7.041266247556814e-07, "log_odds_chosen": 1.8960402011871338, "log_odds_ratio": -0.15018339455127716, "logits/chosen": -0.6093388795852661, "logits/rejected": -1.4846446514129639, "logps/chosen": -1.3976705074310303, "logps/rejected": -3.058961868286133, "loss": 1.4178, "nll_loss": 1.4027414321899414, "rewards/accuracies": 1.0, "rewards/chosen": -0.13976703584194183, "rewards/margins": 0.16612914204597473, "rewards/rejected": -0.30589616298675537, "step": 850 }, { "epoch": 2.345184673800483, "grad_norm": 0.22210471332073212, "learning_rate": 6.985604763044382e-07, "log_odds_chosen": 1.8614165782928467, "log_odds_ratio": -0.15925967693328857, "logits/chosen": -0.7381261587142944, "logits/rejected": -1.404652714729309, "logps/chosen": -1.3932688236236572, "logps/rejected": -3.0221099853515625, "loss": 1.4129, "nll_loss": 1.3969311714172363, "rewards/accuracies": 1.0, "rewards/chosen": -0.13932688534259796, "rewards/margins": 0.16288413107395172, "rewards/rejected": -0.3022110164165497, "step": 851 }, { "epoch": 2.3479461511908872, "grad_norm": 0.23071973025798798, "learning_rate": 6.930128404315214e-07, "log_odds_chosen": 1.8302394151687622, "log_odds_ratio": -0.15850433707237244, "logits/chosen": -0.7668700814247131, "logits/rejected": -1.4880889654159546, "logps/chosen": -1.446001410484314, "logps/rejected": -3.056840181350708, "loss": 1.4647, "nll_loss": 1.4488654136657715, "rewards/accuracies": 1.0, "rewards/chosen": -0.14460015296936035, "rewards/margins": 0.1610838770866394, "rewards/rejected": -0.30568403005599976, "step": 852 }, { "epoch": 2.350707628581291, "grad_norm": 0.21466240286827087, "learning_rate": 6.874837741474744e-07, "log_odds_chosen": 1.916505217552185, "log_odds_ratio": -0.15309549868106842, "logits/chosen": -0.7764966487884521, "logits/rejected": -1.4326413869857788, "logps/chosen": -1.3638489246368408, "logps/rejected": -3.0410642623901367, "loss": 1.3888, "nll_loss": 1.3734557628631592, "rewards/accuracies": 1.0, "rewards/chosen": -0.13638490438461304, "rewards/margins": 0.16772153973579407, "rewards/rejected": -0.3041064441204071, "step": 853 }, { "epoch": 2.353469105971695, "grad_norm": 0.23499643802642822, "learning_rate": 6.819733342720067e-07, "log_odds_chosen": 1.9612218141555786, "log_odds_ratio": -0.14202916622161865, "logits/chosen": -0.8191208839416504, "logits/rejected": -1.3780124187469482, "logps/chosen": -1.4065805673599243, "logps/rejected": -3.1329569816589355, "loss": 1.4078, "nll_loss": 1.3936467170715332, "rewards/accuracies": 1.0, "rewards/chosen": -0.14065806567668915, "rewards/margins": 0.17263765633106232, "rewards/rejected": -0.3132956922054291, "step": 854 }, { "epoch": 2.3562305833620987, "grad_norm": 0.2415383756160736, "learning_rate": 6.764815774334149e-07, "log_odds_chosen": 1.653936743736267, "log_odds_ratio": -0.18320457637310028, "logits/chosen": -0.7520954608917236, "logits/rejected": -1.4456123113632202, "logps/chosen": -1.4636807441711426, "logps/rejected": -2.9096784591674805, "loss": 1.4744, "nll_loss": 1.4560686349868774, "rewards/accuracies": 1.0, "rewards/chosen": -0.14636807143688202, "rewards/margins": 0.1445997655391693, "rewards/rejected": -0.29096782207489014, "step": 855 }, { "epoch": 2.3589920607525023, "grad_norm": 0.21849149465560913, "learning_rate": 6.710085600679967e-07, "log_odds_chosen": 1.8925392627716064, "log_odds_ratio": -0.15811075270175934, "logits/chosen": -0.6251943111419678, "logits/rejected": -1.3049890995025635, "logps/chosen": -1.3626008033752441, "logps/rejected": -3.0153450965881348, "loss": 1.3946, "nll_loss": 1.3788237571716309, "rewards/accuracies": 1.0, "rewards/chosen": -0.13626009225845337, "rewards/margins": 0.16527444124221802, "rewards/rejected": -0.3015345335006714, "step": 856 }, { "epoch": 2.3617535381429065, "grad_norm": 0.23991793394088745, "learning_rate": 6.65554338419474e-07, "log_odds_chosen": 1.8524774312973022, "log_odds_ratio": -0.1661483198404312, "logits/chosen": -0.7439674139022827, "logits/rejected": -1.289766788482666, "logps/chosen": -1.4877867698669434, "logps/rejected": -3.1363027095794678, "loss": 1.4959, "nll_loss": 1.4793007373809814, "rewards/accuracies": 1.0, "rewards/chosen": -0.14877867698669434, "rewards/margins": 0.1648516058921814, "rewards/rejected": -0.31363028287887573, "step": 857 }, { "epoch": 2.3645150155333106, "grad_norm": 0.22789804637432098, "learning_rate": 6.601189685384127e-07, "log_odds_chosen": 1.8686882257461548, "log_odds_ratio": -0.15300583839416504, "logits/chosen": -0.7133684754371643, "logits/rejected": -1.3966625928878784, "logps/chosen": -1.4279682636260986, "logps/rejected": -3.070241928100586, "loss": 1.4421, "nll_loss": 1.4268466234207153, "rewards/accuracies": 1.0, "rewards/chosen": -0.1427968442440033, "rewards/margins": 0.16422736644744873, "rewards/rejected": -0.30702418088912964, "step": 858 }, { "epoch": 2.3672764929237142, "grad_norm": 0.24170999228954315, "learning_rate": 6.547025062816487e-07, "log_odds_chosen": 2.15585994720459, "log_odds_ratio": -0.1237705647945404, "logits/chosen": -0.7704872488975525, "logits/rejected": -1.4828965663909912, "logps/chosen": -1.4098989963531494, "logps/rejected": -3.3233230113983154, "loss": 1.4192, "nll_loss": 1.4068257808685303, "rewards/accuracies": 1.0, "rewards/chosen": -0.14098989963531494, "rewards/margins": 0.19134239852428436, "rewards/rejected": -0.3323322832584381, "step": 859 }, { "epoch": 2.370037970314118, "grad_norm": 0.23123739659786224, "learning_rate": 6.493050073117115e-07, "log_odds_chosen": 1.7533104419708252, "log_odds_ratio": -0.16749663650989532, "logits/chosen": -0.7206525206565857, "logits/rejected": -1.4456825256347656, "logps/chosen": -1.3810667991638184, "logps/rejected": -2.898430109024048, "loss": 1.4111, "nll_loss": 1.394317626953125, "rewards/accuracies": 1.0, "rewards/chosen": -0.13810668885707855, "rewards/margins": 0.151736319065094, "rewards/rejected": -0.28984299302101135, "step": 860 }, { "epoch": 2.372799447704522, "grad_norm": 0.22431981563568115, "learning_rate": 6.439265270962538e-07, "log_odds_chosen": 1.934770941734314, "log_odds_ratio": -0.14685390889644623, "logits/chosen": -0.6608517169952393, "logits/rejected": -1.512792944908142, "logps/chosen": -1.3392962217330933, "logps/rejected": -3.020613670349121, "loss": 1.3744, "nll_loss": 1.3596735000610352, "rewards/accuracies": 1.0, "rewards/chosen": -0.13392964005470276, "rewards/margins": 0.1681317389011383, "rewards/rejected": -0.30206137895584106, "step": 861 }, { "epoch": 2.3755609250949257, "grad_norm": 0.214019313454628, "learning_rate": 6.385671209074829e-07, "log_odds_chosen": 2.0845863819122314, "log_odds_ratio": -0.12033503502607346, "logits/chosen": -0.6356188058853149, "logits/rejected": -1.4263606071472168, "logps/chosen": -1.3355646133422852, "logps/rejected": -3.151883840560913, "loss": 1.3562, "nll_loss": 1.3441494703292847, "rewards/accuracies": 1.0, "rewards/chosen": -0.13355647027492523, "rewards/margins": 0.1816319227218628, "rewards/rejected": -0.3151884078979492, "step": 862 }, { "epoch": 2.37832240248533, "grad_norm": 0.25246158242225647, "learning_rate": 6.332268438215878e-07, "log_odds_chosen": 2.1039905548095703, "log_odds_ratio": -0.12250887602567673, "logits/chosen": -0.7237115502357483, "logits/rejected": -1.569027066230774, "logps/chosen": -1.4344761371612549, "logps/rejected": -3.3050599098205566, "loss": 1.4482, "nll_loss": 1.4359971284866333, "rewards/accuracies": 1.0, "rewards/chosen": -0.143447607755661, "rewards/margins": 0.18705838918685913, "rewards/rejected": -0.33050599694252014, "step": 863 }, { "epoch": 2.3810838798757334, "grad_norm": 0.3048895001411438, "learning_rate": 6.279057507181796e-07, "log_odds_chosen": 2.0891194343566895, "log_odds_ratio": -0.12143637239933014, "logits/chosen": -0.7660921812057495, "logits/rejected": -1.422662377357483, "logps/chosen": -1.4185000658035278, "logps/rejected": -3.2688467502593994, "loss": 1.4259, "nll_loss": 1.4137659072875977, "rewards/accuracies": 1.0, "rewards/chosen": -0.14185000956058502, "rewards/margins": 0.18503469228744507, "rewards/rejected": -0.3268846869468689, "step": 864 }, { "epoch": 2.3838453572661376, "grad_norm": 0.23266346752643585, "learning_rate": 6.226038962797218e-07, "log_odds_chosen": 1.8690474033355713, "log_odds_ratio": -0.16027548909187317, "logits/chosen": -0.7030742168426514, "logits/rejected": -1.4343698024749756, "logps/chosen": -1.32356595993042, "logps/rejected": -2.9428305625915527, "loss": 1.3624, "nll_loss": 1.3463884592056274, "rewards/accuracies": 1.0, "rewards/chosen": -0.13235659897327423, "rewards/margins": 0.16192646324634552, "rewards/rejected": -0.29428306221961975, "step": 865 }, { "epoch": 2.386606834656541, "grad_norm": 0.23464691638946533, "learning_rate": 6.17321334990973e-07, "log_odds_chosen": 1.8331468105316162, "log_odds_ratio": -0.15031777322292328, "logits/chosen": -0.6735308170318604, "logits/rejected": -1.4742405414581299, "logps/chosen": -1.422837257385254, "logps/rejected": -3.0269343852996826, "loss": 1.4369, "nll_loss": 1.4218212366104126, "rewards/accuracies": 1.0, "rewards/chosen": -0.14228372275829315, "rewards/margins": 0.16040970385074615, "rewards/rejected": -0.3026934266090393, "step": 866 }, { "epoch": 2.389368312046945, "grad_norm": 0.22783829271793365, "learning_rate": 6.120581211384222e-07, "log_odds_chosen": 2.177433490753174, "log_odds_ratio": -0.14788126945495605, "logits/chosen": -0.6796799302101135, "logits/rejected": -1.447847843170166, "logps/chosen": -1.3840019702911377, "logps/rejected": -3.322763442993164, "loss": 1.4173, "nll_loss": 1.4024797677993774, "rewards/accuracies": 1.0, "rewards/chosen": -0.13840022683143616, "rewards/margins": 0.19387611746788025, "rewards/rejected": -0.3322763442993164, "step": 867 }, { "epoch": 2.392129789437349, "grad_norm": 0.2507234513759613, "learning_rate": 6.068143088097372e-07, "log_odds_chosen": 1.9803252220153809, "log_odds_ratio": -0.1346602588891983, "logits/chosen": -0.7259140610694885, "logits/rejected": -1.5199178457260132, "logps/chosen": -1.361494779586792, "logps/rejected": -3.0922255516052246, "loss": 1.4, "nll_loss": 1.3865290880203247, "rewards/accuracies": 1.0, "rewards/chosen": -0.13614948093891144, "rewards/margins": 0.17307308316230774, "rewards/rejected": -0.309222549200058, "step": 868 }, { "epoch": 2.3948912668277527, "grad_norm": 0.2223178893327713, "learning_rate": 6.015899518932005e-07, "log_odds_chosen": 2.0595333576202393, "log_odds_ratio": -0.1295865774154663, "logits/chosen": -0.7543760538101196, "logits/rejected": -1.4544379711151123, "logps/chosen": -1.3678077459335327, "logps/rejected": -3.1737380027770996, "loss": 1.3782, "nll_loss": 1.365261435508728, "rewards/accuracies": 1.0, "rewards/chosen": -0.1367807686328888, "rewards/margins": 0.18059304356575012, "rewards/rejected": -0.31737378239631653, "step": 869 }, { "epoch": 2.3976527442181568, "grad_norm": 0.2590086758136749, "learning_rate": 5.963851040771639e-07, "log_odds_chosen": 1.9339418411254883, "log_odds_ratio": -0.1566099226474762, "logits/chosen": -0.6920959949493408, "logits/rejected": -1.4544062614440918, "logps/chosen": -1.3701428174972534, "logps/rejected": -3.0655672550201416, "loss": 1.3975, "nll_loss": 1.3818511962890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.13701428472995758, "rewards/margins": 0.16954243183135986, "rewards/rejected": -0.30655673146247864, "step": 870 }, { "epoch": 2.4004142216085604, "grad_norm": 0.24093873798847198, "learning_rate": 5.911998188494924e-07, "log_odds_chosen": 2.186475992202759, "log_odds_ratio": -0.12003044039011002, "logits/chosen": -0.7521616220474243, "logits/rejected": -1.651241421699524, "logps/chosen": -1.4376925230026245, "logps/rejected": -3.3883304595947266, "loss": 1.4389, "nll_loss": 1.4268479347229004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1437692642211914, "rewards/margins": 0.19506379961967468, "rewards/rejected": -0.3388330340385437, "step": 871 }, { "epoch": 2.4031756989989645, "grad_norm": 0.2139623761177063, "learning_rate": 5.860341494970132e-07, "log_odds_chosen": 2.038058280944824, "log_odds_ratio": -0.1349775195121765, "logits/chosen": -0.7193518280982971, "logits/rejected": -1.445834755897522, "logps/chosen": -1.3804961442947388, "logps/rejected": -3.1719768047332764, "loss": 1.4083, "nll_loss": 1.3947699069976807, "rewards/accuracies": 1.0, "rewards/chosen": -0.13804961740970612, "rewards/margins": 0.17914807796478271, "rewards/rejected": -0.31719768047332764, "step": 872 }, { "epoch": 2.405937176389368, "grad_norm": 0.23532477021217346, "learning_rate": 5.808881491049723e-07, "log_odds_chosen": 1.8303382396697998, "log_odds_ratio": -0.16767007112503052, "logits/chosen": -0.6835562586784363, "logits/rejected": -1.241523027420044, "logps/chosen": -1.4320024251937866, "logps/rejected": -3.0420501232147217, "loss": 1.4483, "nll_loss": 1.4315794706344604, "rewards/accuracies": 1.0, "rewards/chosen": -0.14320024847984314, "rewards/margins": 0.16100478172302246, "rewards/rejected": -0.3042050302028656, "step": 873 }, { "epoch": 2.4086986537797723, "grad_norm": 0.21597245335578918, "learning_rate": 5.757618705564849e-07, "log_odds_chosen": 1.991483449935913, "log_odds_ratio": -0.13923542201519012, "logits/chosen": -0.667121410369873, "logits/rejected": -1.4004806280136108, "logps/chosen": -1.359999656677246, "logps/rejected": -3.0989041328430176, "loss": 1.3889, "nll_loss": 1.3749873638153076, "rewards/accuracies": 1.0, "rewards/chosen": -0.13599996268749237, "rewards/margins": 0.17389044165611267, "rewards/rejected": -0.30989041924476624, "step": 874 }, { "epoch": 2.411460131170176, "grad_norm": 0.22084733843803406, "learning_rate": 5.706553665319955e-07, "log_odds_chosen": 2.106426239013672, "log_odds_ratio": -0.13185368478298187, "logits/chosen": -0.6541144847869873, "logits/rejected": -1.5660196542739868, "logps/chosen": -1.4047715663909912, "logps/rejected": -3.271341562271118, "loss": 1.4275, "nll_loss": 1.414359211921692, "rewards/accuracies": 1.0, "rewards/chosen": -0.14047716557979584, "rewards/margins": 0.18665701150894165, "rewards/rejected": -0.3271341919898987, "step": 875 }, { "epoch": 2.41422160856058, "grad_norm": 0.2582799792289734, "learning_rate": 5.655686895087328e-07, "log_odds_chosen": 2.125744342803955, "log_odds_ratio": -0.13128440082073212, "logits/chosen": -0.690162718296051, "logits/rejected": -1.4393540620803833, "logps/chosen": -1.4361943006515503, "logps/rejected": -3.330951690673828, "loss": 1.457, "nll_loss": 1.4439157247543335, "rewards/accuracies": 1.0, "rewards/chosen": -0.14361944794654846, "rewards/margins": 0.18947574496269226, "rewards/rejected": -0.3330951929092407, "step": 876 }, { "epoch": 2.4169830859509838, "grad_norm": 0.24690301716327667, "learning_rate": 5.605018917601754e-07, "log_odds_chosen": 1.907195806503296, "log_odds_ratio": -0.15724773705005646, "logits/chosen": -0.617090106010437, "logits/rejected": -1.4507627487182617, "logps/chosen": -1.426545262336731, "logps/rejected": -3.1035234928131104, "loss": 1.451, "nll_loss": 1.4353033304214478, "rewards/accuracies": 1.0, "rewards/chosen": -0.14265453815460205, "rewards/margins": 0.16769783198833466, "rewards/rejected": -0.3103523254394531, "step": 877 }, { "epoch": 2.4197445633413874, "grad_norm": 0.2848946750164032, "learning_rate": 5.554550253555066e-07, "log_odds_chosen": 1.730438232421875, "log_odds_ratio": -0.16927726566791534, "logits/chosen": -0.7669472694396973, "logits/rejected": -1.1995865106582642, "logps/chosen": -1.3500781059265137, "logps/rejected": -2.8405096530914307, "loss": 1.3948, "nll_loss": 1.3778367042541504, "rewards/accuracies": 1.0, "rewards/chosen": -0.1350078135728836, "rewards/margins": 0.14904318749904633, "rewards/rejected": -0.28405100107192993, "step": 878 }, { "epoch": 2.4225060407317915, "grad_norm": 0.22108380496501923, "learning_rate": 5.504281421590898e-07, "log_odds_chosen": 2.0505218505859375, "log_odds_ratio": -0.14654164016246796, "logits/chosen": -0.7096814513206482, "logits/rejected": -1.4525039196014404, "logps/chosen": -1.3486748933792114, "logps/rejected": -3.150135040283203, "loss": 1.3745, "nll_loss": 1.359838843345642, "rewards/accuracies": 1.0, "rewards/chosen": -0.13486750423908234, "rewards/margins": 0.1801460087299347, "rewards/rejected": -0.3150135278701782, "step": 879 }, { "epoch": 2.425267518122195, "grad_norm": 0.24004238843917847, "learning_rate": 5.454212938299256e-07, "log_odds_chosen": 2.0681121349334717, "log_odds_ratio": -0.1318766325712204, "logits/chosen": -0.8047155737876892, "logits/rejected": -1.4712103605270386, "logps/chosen": -1.4120354652404785, "logps/rejected": -3.240454912185669, "loss": 1.4273, "nll_loss": 1.4140803813934326, "rewards/accuracies": 1.0, "rewards/chosen": -0.14120355248451233, "rewards/margins": 0.1828419268131256, "rewards/rejected": -0.3240455090999603, "step": 880 }, { "epoch": 2.4280289955125993, "grad_norm": 0.20914945006370544, "learning_rate": 5.404345318211294e-07, "log_odds_chosen": 1.9783142805099487, "log_odds_ratio": -0.14570730924606323, "logits/chosen": -0.790532112121582, "logits/rejected": -1.5577503442764282, "logps/chosen": -1.4861538410186768, "logps/rejected": -3.248924493789673, "loss": 1.4882, "nll_loss": 1.4735862016677856, "rewards/accuracies": 1.0, "rewards/chosen": -0.14861539006233215, "rewards/margins": 0.1762770563364029, "rewards/rejected": -0.32489246129989624, "step": 881 }, { "epoch": 2.430790472903003, "grad_norm": 0.250592440366745, "learning_rate": 5.354679073793942e-07, "log_odds_chosen": 1.9158984422683716, "log_odds_ratio": -0.15960346162319183, "logits/chosen": -0.7558537721633911, "logits/rejected": -1.2639480829238892, "logps/chosen": -1.3667840957641602, "logps/rejected": -3.0473833084106445, "loss": 1.3937, "nll_loss": 1.377746820449829, "rewards/accuracies": 1.0, "rewards/chosen": -0.13667839765548706, "rewards/margins": 0.16805997490882874, "rewards/rejected": -0.3047383427619934, "step": 882 }, { "epoch": 2.433551950293407, "grad_norm": 0.2307986617088318, "learning_rate": 5.305214715444726e-07, "log_odds_chosen": 1.5907667875289917, "log_odds_ratio": -0.21429482102394104, "logits/chosen": -0.6395963430404663, "logits/rejected": -1.3173006772994995, "logps/chosen": -1.471729040145874, "logps/rejected": -2.8622403144836426, "loss": 1.4893, "nll_loss": 1.4678597450256348, "rewards/accuracies": 1.0, "rewards/chosen": -0.14717289805412292, "rewards/margins": 0.1390511393547058, "rewards/rejected": -0.28622403740882874, "step": 883 }, { "epoch": 2.4363134276838108, "grad_norm": 0.22951407730579376, "learning_rate": 5.255952751486442e-07, "log_odds_chosen": 1.7555440664291382, "log_odds_ratio": -0.17985780537128448, "logits/chosen": -0.6468096971511841, "logits/rejected": -1.3483422994613647, "logps/chosen": -1.4305295944213867, "logps/rejected": -2.9695870876312256, "loss": 1.4549, "nll_loss": 1.4369113445281982, "rewards/accuracies": 1.0, "rewards/chosen": -0.14305296540260315, "rewards/margins": 0.1539057493209839, "rewards/rejected": -0.29695871472358704, "step": 884 }, { "epoch": 2.439074905074215, "grad_norm": 0.2899661064147949, "learning_rate": 5.20689368816201e-07, "log_odds_chosen": 1.976393461227417, "log_odds_ratio": -0.13636034727096558, "logits/chosen": -0.6773353219032288, "logits/rejected": -1.4898436069488525, "logps/chosen": -1.5012954473495483, "logps/rejected": -3.263596296310425, "loss": 1.5075, "nll_loss": 1.4938145875930786, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501295566558838, "rewards/margins": 0.1762300729751587, "rewards/rejected": -0.3263595998287201, "step": 885 }, { "epoch": 2.4418363824646185, "grad_norm": 0.2315601408481598, "learning_rate": 5.158038029629195e-07, "log_odds_chosen": 1.7780689001083374, "log_odds_ratio": -0.1640690416097641, "logits/chosen": -0.7260640859603882, "logits/rejected": -1.4447975158691406, "logps/chosen": -1.4619710445404053, "logps/rejected": -3.0283594131469727, "loss": 1.4667, "nll_loss": 1.4503343105316162, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461970955133438, "rewards/margins": 0.15663886070251465, "rewards/rejected": -0.30283597111701965, "step": 886 }, { "epoch": 2.4445978598550226, "grad_norm": 0.2502164840698242, "learning_rate": 5.109386277955477e-07, "log_odds_chosen": 1.865227222442627, "log_odds_ratio": -0.1598530411720276, "logits/chosen": -0.6746166348457336, "logits/rejected": -1.4709534645080566, "logps/chosen": -1.3897067308425903, "logps/rejected": -3.023939609527588, "loss": 1.4048, "nll_loss": 1.3888545036315918, "rewards/accuracies": 1.0, "rewards/chosen": -0.13897068798542023, "rewards/margins": 0.16342328488826752, "rewards/rejected": -0.30239397287368774, "step": 887 }, { "epoch": 2.4473593372454263, "grad_norm": 0.2393447905778885, "learning_rate": 5.060938933112891e-07, "log_odds_chosen": 1.754583477973938, "log_odds_ratio": -0.1865866631269455, "logits/chosen": -0.8034491539001465, "logits/rejected": -1.3690917491912842, "logps/chosen": -1.410452127456665, "logps/rejected": -2.9483401775360107, "loss": 1.4369, "nll_loss": 1.4182454347610474, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410452127456665, "rewards/margins": 0.15378880500793457, "rewards/rejected": -0.29483404755592346, "step": 888 }, { "epoch": 2.45012081463583, "grad_norm": 0.23287135362625122, "learning_rate": 5.012696492972852e-07, "log_odds_chosen": 1.831277847290039, "log_odds_ratio": -0.17254289984703064, "logits/chosen": -0.7749835252761841, "logits/rejected": -1.3622221946716309, "logps/chosen": -1.3812397718429565, "logps/rejected": -2.9796550273895264, "loss": 1.4061, "nll_loss": 1.3887989521026611, "rewards/accuracies": 1.0, "rewards/chosen": -0.1381239891052246, "rewards/margins": 0.15984152257442474, "rewards/rejected": -0.29796549677848816, "step": 889 }, { "epoch": 2.452882292026234, "grad_norm": 0.20733584463596344, "learning_rate": 4.964659453301088e-07, "log_odds_chosen": 2.3830249309539795, "log_odds_ratio": -0.10768404603004456, "logits/chosen": -0.599168062210083, "logits/rejected": -1.6268471479415894, "logps/chosen": -1.352327823638916, "logps/rejected": -3.4672396183013916, "loss": 1.3757, "nll_loss": 1.3648960590362549, "rewards/accuracies": 1.0, "rewards/chosen": -0.13523279130458832, "rewards/margins": 0.2114911675453186, "rewards/rejected": -0.3467239439487457, "step": 890 }, { "epoch": 2.4556437694166378, "grad_norm": 0.21920622885227203, "learning_rate": 4.916828307752489e-07, "log_odds_chosen": 1.8195345401763916, "log_odds_ratio": -0.16349290311336517, "logits/chosen": -0.7525181770324707, "logits/rejected": -1.4633281230926514, "logps/chosen": -1.415106177330017, "logps/rejected": -3.0068209171295166, "loss": 1.4276, "nll_loss": 1.4112106561660767, "rewards/accuracies": 1.0, "rewards/chosen": -0.14151060581207275, "rewards/margins": 0.159171462059021, "rewards/rejected": -0.30068206787109375, "step": 891 }, { "epoch": 2.458405246807042, "grad_norm": 0.2449709177017212, "learning_rate": 4.869203547866097e-07, "log_odds_chosen": 1.9815382957458496, "log_odds_ratio": -0.15336796641349792, "logits/chosen": -0.6627548933029175, "logits/rejected": -1.4913512468338013, "logps/chosen": -1.4432140588760376, "logps/rejected": -3.2066502571105957, "loss": 1.452, "nll_loss": 1.4366645812988281, "rewards/accuracies": 1.0, "rewards/chosen": -0.14432139694690704, "rewards/margins": 0.176343634724617, "rewards/rejected": -0.32066506147384644, "step": 892 }, { "epoch": 2.4611667241974455, "grad_norm": 0.23717710375785828, "learning_rate": 4.821785663059991e-07, "log_odds_chosen": 1.9567972421646118, "log_odds_ratio": -0.14491046965122223, "logits/chosen": -0.6873578429222107, "logits/rejected": -1.4887856245040894, "logps/chosen": -1.4284566640853882, "logps/rejected": -3.1556148529052734, "loss": 1.4357, "nll_loss": 1.4212026596069336, "rewards/accuracies": 1.0, "rewards/chosen": -0.14284567534923553, "rewards/margins": 0.17271582782268524, "rewards/rejected": -0.3155614733695984, "step": 893 }, { "epoch": 2.4639282015878496, "grad_norm": 0.24953578412532806, "learning_rate": 4.774575140626317e-07, "log_odds_chosen": 2.0727789402008057, "log_odds_ratio": -0.14257028698921204, "logits/chosen": -0.688542902469635, "logits/rejected": -1.5315947532653809, "logps/chosen": -1.496612548828125, "logps/rejected": -3.357083320617676, "loss": 1.5011, "nll_loss": 1.4868371486663818, "rewards/accuracies": 1.0, "rewards/chosen": -0.14966124296188354, "rewards/margins": 0.18604709208011627, "rewards/rejected": -0.3357083201408386, "step": 894 }, { "epoch": 2.4666896789782533, "grad_norm": 0.3211827576160431, "learning_rate": 4.7275724657262293e-07, "log_odds_chosen": 2.1751515865325928, "log_odds_ratio": -0.12539197504520416, "logits/chosen": -0.6506063938140869, "logits/rejected": -1.5646467208862305, "logps/chosen": -1.3849880695343018, "logps/rejected": -3.3103394508361816, "loss": 1.3927, "nll_loss": 1.3801162242889404, "rewards/accuracies": 1.0, "rewards/chosen": -0.13849881291389465, "rewards/margins": 0.1925351768732071, "rewards/rejected": -0.33103397488594055, "step": 895 }, { "epoch": 2.4694511563686574, "grad_norm": 0.2389291673898697, "learning_rate": 4.6807781213849354e-07, "log_odds_chosen": 2.02905535697937, "log_odds_ratio": -0.1466422975063324, "logits/chosen": -0.7351397275924683, "logits/rejected": -1.468057632446289, "logps/chosen": -1.4022436141967773, "logps/rejected": -3.194044828414917, "loss": 1.4313, "nll_loss": 1.4166133403778076, "rewards/accuracies": 1.0, "rewards/chosen": -0.14022435247898102, "rewards/margins": 0.17918014526367188, "rewards/rejected": -0.3194044828414917, "step": 896 }, { "epoch": 2.472212633759061, "grad_norm": 0.29943838715553284, "learning_rate": 4.63419258848673e-07, "log_odds_chosen": 1.904563546180725, "log_odds_ratio": -0.16220958530902863, "logits/chosen": -0.7450888752937317, "logits/rejected": -1.3831934928894043, "logps/chosen": -1.4067978858947754, "logps/rejected": -3.0835928916931152, "loss": 1.4257, "nll_loss": 1.4095184803009033, "rewards/accuracies": 1.0, "rewards/chosen": -0.14067979156970978, "rewards/margins": 0.16767950356006622, "rewards/rejected": -0.3083593249320984, "step": 897 }, { "epoch": 2.474974111149465, "grad_norm": 0.23424910008907318, "learning_rate": 4.5878163457700327e-07, "log_odds_chosen": 2.1382720470428467, "log_odds_ratio": -0.14067284762859344, "logits/chosen": -0.6637120842933655, "logits/rejected": -1.4677492380142212, "logps/chosen": -1.4509437084197998, "logps/rejected": -3.363903760910034, "loss": 1.4458, "nll_loss": 1.4317675828933716, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450943797826767, "rewards/margins": 0.19129599630832672, "rewards/rejected": -0.3363903760910034, "step": 898 }, { "epoch": 2.477735588539869, "grad_norm": 0.21604116261005402, "learning_rate": 4.5416498698225034e-07, "log_odds_chosen": 2.007662057876587, "log_odds_ratio": -0.14554226398468018, "logits/chosen": -0.6356925964355469, "logits/rejected": -1.6265672445297241, "logps/chosen": -1.4042253494262695, "logps/rejected": -3.178027868270874, "loss": 1.4282, "nll_loss": 1.413652777671814, "rewards/accuracies": 1.0, "rewards/chosen": -0.140422523021698, "rewards/margins": 0.1773802489042282, "rewards/rejected": -0.3178027868270874, "step": 899 }, { "epoch": 2.4804970659302725, "grad_norm": 0.23354452848434448, "learning_rate": 4.495693635076101e-07, "log_odds_chosen": 1.8639171123504639, "log_odds_ratio": -0.15216907858848572, "logits/chosen": -0.7952009439468384, "logits/rejected": -1.5274772644042969, "logps/chosen": -1.3769278526306152, "logps/rejected": -3.0038204193115234, "loss": 1.4059, "nll_loss": 1.3907115459442139, "rewards/accuracies": 1.0, "rewards/chosen": -0.13769277930259705, "rewards/margins": 0.1626892387866974, "rewards/rejected": -0.30038201808929443, "step": 900 }, { "epoch": 2.4832585433206766, "grad_norm": 0.2217761129140854, "learning_rate": 4.4499481138022546e-07, "log_odds_chosen": 2.391526222229004, "log_odds_ratio": -0.10179422795772552, "logits/chosen": -0.75086510181427, "logits/rejected": -1.6921579837799072, "logps/chosen": -1.3733224868774414, "logps/rejected": -3.499643325805664, "loss": 1.3784, "nll_loss": 1.3682270050048828, "rewards/accuracies": 1.0, "rewards/chosen": -0.1373322457075119, "rewards/margins": 0.21263210475444794, "rewards/rejected": -0.34996435046195984, "step": 901 }, { "epoch": 2.4860200207110803, "grad_norm": 0.2436271458864212, "learning_rate": 4.4044137761069584e-07, "log_odds_chosen": 1.7281368970870972, "log_odds_ratio": -0.1783587783575058, "logits/chosen": -0.6785058379173279, "logits/rejected": -1.422428846359253, "logps/chosen": -1.4997588396072388, "logps/rejected": -3.030074119567871, "loss": 1.5146, "nll_loss": 1.496799349784851, "rewards/accuracies": 1.0, "rewards/chosen": -0.14997588098049164, "rewards/margins": 0.15303152799606323, "rewards/rejected": -0.3030073940753937, "step": 902 }, { "epoch": 2.4887814981014844, "grad_norm": 0.2390860766172409, "learning_rate": 4.359091089925999e-07, "log_odds_chosen": 2.2289576530456543, "log_odds_ratio": -0.10778172314167023, "logits/chosen": -0.700814962387085, "logits/rejected": -1.6030974388122559, "logps/chosen": -1.4542346000671387, "logps/rejected": -3.4503207206726074, "loss": 1.4504, "nll_loss": 1.4396095275878906, "rewards/accuracies": 1.0, "rewards/chosen": -0.14542347192764282, "rewards/margins": 0.19960863888263702, "rewards/rejected": -0.34503209590911865, "step": 903 }, { "epoch": 2.491542975491888, "grad_norm": 0.2725076675415039, "learning_rate": 4.3139805210200917e-07, "log_odds_chosen": 2.1438865661621094, "log_odds_ratio": -0.1179514229297638, "logits/chosen": -0.6978856325149536, "logits/rejected": -1.513223648071289, "logps/chosen": -1.444118857383728, "logps/rejected": -3.3557324409484863, "loss": 1.4682, "nll_loss": 1.4563794136047363, "rewards/accuracies": 1.0, "rewards/chosen": -0.14441190659999847, "rewards/margins": 0.1911613643169403, "rewards/rejected": -0.3355732858181, "step": 904 }, { "epoch": 2.494304452882292, "grad_norm": 0.2490164041519165, "learning_rate": 4.2690825329701313e-07, "log_odds_chosen": 1.8853440284729004, "log_odds_ratio": -0.1477978676557541, "logits/chosen": -0.6538559794425964, "logits/rejected": -1.5023826360702515, "logps/chosen": -1.3664219379425049, "logps/rejected": -3.0052311420440674, "loss": 1.3901, "nll_loss": 1.3753145933151245, "rewards/accuracies": 1.0, "rewards/chosen": -0.136642187833786, "rewards/margins": 0.16388095915317535, "rewards/rejected": -0.30052316188812256, "step": 905 }, { "epoch": 2.497065930272696, "grad_norm": 0.23536191880702972, "learning_rate": 4.2243975871724017e-07, "log_odds_chosen": 1.70067298412323, "log_odds_ratio": -0.1889999508857727, "logits/chosen": -0.7580082416534424, "logits/rejected": -1.254365086555481, "logps/chosen": -1.4177350997924805, "logps/rejected": -2.906013250350952, "loss": 1.4512, "nll_loss": 1.4322596788406372, "rewards/accuracies": 1.0, "rewards/chosen": -0.141773521900177, "rewards/margins": 0.14882780611515045, "rewards/rejected": -0.29060134291648865, "step": 906 }, { "epoch": 2.4998274076630995, "grad_norm": 0.22875359654426575, "learning_rate": 4.179926142833876e-07, "log_odds_chosen": 1.846494436264038, "log_odds_ratio": -0.14916576445102692, "logits/chosen": -0.6784324645996094, "logits/rejected": -1.5528572797775269, "logps/chosen": -1.4663653373718262, "logps/rejected": -3.0941779613494873, "loss": 1.4772, "nll_loss": 1.462315559387207, "rewards/accuracies": 1.0, "rewards/chosen": -0.14663653075695038, "rewards/margins": 0.1627812534570694, "rewards/rejected": -0.3094177544116974, "step": 907 }, { "epoch": 2.5025888850535036, "grad_norm": 0.24101197719573975, "learning_rate": 4.1356686569674344e-07, "log_odds_chosen": 1.912613034248352, "log_odds_ratio": -0.16061508655548096, "logits/chosen": -0.7745194435119629, "logits/rejected": -1.4570187330245972, "logps/chosen": -1.4302928447723389, "logps/rejected": -3.1224308013916016, "loss": 1.4443, "nll_loss": 1.4282448291778564, "rewards/accuracies": 1.0, "rewards/chosen": -0.14302927255630493, "rewards/margins": 0.16921381652355194, "rewards/rejected": -0.3122430741786957, "step": 908 }, { "epoch": 2.5053503624439077, "grad_norm": 0.24438601732254028, "learning_rate": 4.091625584387238e-07, "log_odds_chosen": 2.0320229530334473, "log_odds_ratio": -0.1365109235048294, "logits/chosen": -0.719463050365448, "logits/rejected": -1.586406946182251, "logps/chosen": -1.4272314310073853, "logps/rejected": -3.225419044494629, "loss": 1.4488, "nll_loss": 1.4351040124893188, "rewards/accuracies": 1.0, "rewards/chosen": -0.14272314310073853, "rewards/margins": 0.17981873452663422, "rewards/rejected": -0.32254189252853394, "step": 909 }, { "epoch": 2.5081118398343114, "grad_norm": 0.2180737406015396, "learning_rate": 4.0477973777039854e-07, "log_odds_chosen": 1.8226643800735474, "log_odds_ratio": -0.16508425772190094, "logits/chosen": -0.7682521343231201, "logits/rejected": -1.4299850463867188, "logps/chosen": -1.4092507362365723, "logps/rejected": -3.000713586807251, "loss": 1.4221, "nll_loss": 1.4056212902069092, "rewards/accuracies": 1.0, "rewards/chosen": -0.1409250795841217, "rewards/margins": 0.1591462790966034, "rewards/rejected": -0.3000713586807251, "step": 910 }, { "epoch": 2.510873317224715, "grad_norm": 0.26936936378479004, "learning_rate": 4.004184487320317e-07, "log_odds_chosen": 2.0352725982666016, "log_odds_ratio": -0.1316160410642624, "logits/chosen": -0.6806354522705078, "logits/rejected": -1.510576844215393, "logps/chosen": -1.4752784967422485, "logps/rejected": -3.2884604930877686, "loss": 1.4792, "nll_loss": 1.4659947156906128, "rewards/accuracies": 1.0, "rewards/chosen": -0.14752785861492157, "rewards/margins": 0.18131817877292633, "rewards/rejected": -0.3288460373878479, "step": 911 }, { "epoch": 2.513634794615119, "grad_norm": 0.2261756807565689, "learning_rate": 3.9607873614261717e-07, "log_odds_chosen": 2.139875650405884, "log_odds_ratio": -0.11657200753688812, "logits/chosen": -0.6413018703460693, "logits/rejected": -1.4591153860092163, "logps/chosen": -1.431268572807312, "logps/rejected": -3.3357994556427, "loss": 1.443, "nll_loss": 1.4313855171203613, "rewards/accuracies": 1.0, "rewards/chosen": -0.14312686026096344, "rewards/margins": 0.19045312702655792, "rewards/rejected": -0.333579957485199, "step": 912 }, { "epoch": 2.516396272005523, "grad_norm": 0.2712996006011963, "learning_rate": 3.917606445994146e-07, "log_odds_chosen": 1.9414974451065063, "log_odds_ratio": -0.14728671312332153, "logits/chosen": -0.7389076948165894, "logits/rejected": -1.4981968402862549, "logps/chosen": -1.4183313846588135, "logps/rejected": -3.130592107772827, "loss": 1.4343, "nll_loss": 1.41960871219635, "rewards/accuracies": 1.0, "rewards/chosen": -0.1418331265449524, "rewards/margins": 0.17122608423233032, "rewards/rejected": -0.3130591809749603, "step": 913 }, { "epoch": 2.519157749395927, "grad_norm": 0.5788634419441223, "learning_rate": 3.874642184774977e-07, "log_odds_chosen": 1.9497270584106445, "log_odds_ratio": -0.14281640946865082, "logits/chosen": -0.7612333297729492, "logits/rejected": -1.5962014198303223, "logps/chosen": -1.4015917778015137, "logps/rejected": -3.114823818206787, "loss": 1.4195, "nll_loss": 1.4051827192306519, "rewards/accuracies": 1.0, "rewards/chosen": -0.14015917479991913, "rewards/margins": 0.17132321000099182, "rewards/rejected": -0.31148236989974976, "step": 914 }, { "epoch": 2.5219192267863306, "grad_norm": 0.2559986114501953, "learning_rate": 3.831895019292897e-07, "log_odds_chosen": 2.3182804584503174, "log_odds_ratio": -0.1324596256017685, "logits/chosen": -0.7431677579879761, "logits/rejected": -1.4666855335235596, "logps/chosen": -1.4558906555175781, "logps/rejected": -3.5455775260925293, "loss": 1.4791, "nll_loss": 1.4658503532409668, "rewards/accuracies": 1.0, "rewards/chosen": -0.14558906853199005, "rewards/margins": 0.20896868407726288, "rewards/rejected": -0.35455775260925293, "step": 915 }, { "epoch": 2.5246807041767347, "grad_norm": 0.23064690828323364, "learning_rate": 3.789365388841193e-07, "log_odds_chosen": 2.1233859062194824, "log_odds_ratio": -0.13091759383678436, "logits/chosen": -0.7361263632774353, "logits/rejected": -1.5945910215377808, "logps/chosen": -1.43380606174469, "logps/rejected": -3.323601007461548, "loss": 1.4562, "nll_loss": 1.4430584907531738, "rewards/accuracies": 1.0, "rewards/chosen": -0.14338059723377228, "rewards/margins": 0.18897950649261475, "rewards/rejected": -0.33236008882522583, "step": 916 }, { "epoch": 2.5274421815671384, "grad_norm": 0.24398201704025269, "learning_rate": 3.7470537304776037e-07, "log_odds_chosen": 1.8010647296905518, "log_odds_ratio": -0.1628636121749878, "logits/chosen": -0.6727527379989624, "logits/rejected": -1.5267282724380493, "logps/chosen": -1.4984031915664673, "logps/rejected": -3.09403920173645, "loss": 1.5113, "nll_loss": 1.4950461387634277, "rewards/accuracies": 1.0, "rewards/chosen": -0.1498403400182724, "rewards/margins": 0.1595636010169983, "rewards/rejected": -0.3094039559364319, "step": 917 }, { "epoch": 2.530203658957542, "grad_norm": 0.2269071340560913, "learning_rate": 3.7049604790198976e-07, "log_odds_chosen": 2.120920181274414, "log_odds_ratio": -0.12410043179988861, "logits/chosen": -0.7179909944534302, "logits/rejected": -1.606101155281067, "logps/chosen": -1.4044445753097534, "logps/rejected": -3.2781612873077393, "loss": 1.429, "nll_loss": 1.4166154861450195, "rewards/accuracies": 1.0, "rewards/chosen": -0.14044447243213654, "rewards/margins": 0.18737170100212097, "rewards/rejected": -0.3278161585330963, "step": 918 }, { "epoch": 2.532965136347946, "grad_norm": 0.22726218402385712, "learning_rate": 3.663086067041352e-07, "log_odds_chosen": 2.1184229850769043, "log_odds_ratio": -0.1318613439798355, "logits/chosen": -0.7021893262863159, "logits/rejected": -1.4056490659713745, "logps/chosen": -1.4406737089157104, "logps/rejected": -3.3295974731445312, "loss": 1.4455, "nll_loss": 1.43229341506958, "rewards/accuracies": 1.0, "rewards/chosen": -0.14406737685203552, "rewards/margins": 0.18889237940311432, "rewards/rejected": -0.33295974135398865, "step": 919 }, { "epoch": 2.5357266137383503, "grad_norm": 0.2331918329000473, "learning_rate": 3.621430924866348e-07, "log_odds_chosen": 1.9700649976730347, "log_odds_ratio": -0.13639576733112335, "logits/chosen": -0.7762292623519897, "logits/rejected": -1.4200176000595093, "logps/chosen": -1.3168025016784668, "logps/rejected": -3.018784999847412, "loss": 1.3587, "nll_loss": 1.345030426979065, "rewards/accuracies": 1.0, "rewards/chosen": -0.13168025016784668, "rewards/margins": 0.1701982468366623, "rewards/rejected": -0.3018784821033478, "step": 920 }, { "epoch": 2.538488091128754, "grad_norm": 0.28572413325309753, "learning_rate": 3.579995480565909e-07, "log_odds_chosen": 1.7836999893188477, "log_odds_ratio": -0.1679093837738037, "logits/chosen": -0.5951927900314331, "logits/rejected": -1.3846195936203003, "logps/chosen": -1.4186499118804932, "logps/rejected": -2.970689535140991, "loss": 1.4413, "nll_loss": 1.4245105981826782, "rewards/accuracies": 1.0, "rewards/chosen": -0.14186498522758484, "rewards/margins": 0.15520399808883667, "rewards/rejected": -0.2970689833164215, "step": 921 }, { "epoch": 2.5412495685191576, "grad_norm": 0.24972712993621826, "learning_rate": 3.538780159953348e-07, "log_odds_chosen": 2.0445008277893066, "log_odds_ratio": -0.15223285555839539, "logits/chosen": -0.6911048889160156, "logits/rejected": -1.3544981479644775, "logps/chosen": -1.3023552894592285, "logps/rejected": -3.063093662261963, "loss": 1.3226, "nll_loss": 1.3074004650115967, "rewards/accuracies": 1.0, "rewards/chosen": -0.13023550808429718, "rewards/margins": 0.1760738044977188, "rewards/rejected": -0.3063093423843384, "step": 922 }, { "epoch": 2.5440110459095617, "grad_norm": 0.32826775312423706, "learning_rate": 3.4977853865798466e-07, "log_odds_chosen": 1.8704793453216553, "log_odds_ratio": -0.1738256961107254, "logits/chosen": -0.694957971572876, "logits/rejected": -1.2733983993530273, "logps/chosen": -1.3503116369247437, "logps/rejected": -2.9744861125946045, "loss": 1.3885, "nll_loss": 1.3711236715316772, "rewards/accuracies": 1.0, "rewards/chosen": -0.13503116369247437, "rewards/margins": 0.162417471408844, "rewards/rejected": -0.29744863510131836, "step": 923 }, { "epoch": 2.5467725232999654, "grad_norm": 0.23226679861545563, "learning_rate": 3.457011581730124e-07, "log_odds_chosen": 2.072930097579956, "log_odds_ratio": -0.14823932945728302, "logits/chosen": -0.704539954662323, "logits/rejected": -1.4955695867538452, "logps/chosen": -1.399446725845337, "logps/rejected": -3.2422397136688232, "loss": 1.4177, "nll_loss": 1.4029247760772705, "rewards/accuracies": 1.0, "rewards/chosen": -0.1399446576833725, "rewards/margins": 0.18427930772304535, "rewards/rejected": -0.32422396540641785, "step": 924 }, { "epoch": 2.5495340006903695, "grad_norm": 0.287977010011673, "learning_rate": 3.4164591644181233e-07, "log_odds_chosen": 1.9011677503585815, "log_odds_ratio": -0.1436043679714203, "logits/chosen": -0.6969839334487915, "logits/rejected": -1.3186784982681274, "logps/chosen": -1.3666952848434448, "logps/rejected": -3.0241379737854004, "loss": 1.3913, "nll_loss": 1.3769299983978271, "rewards/accuracies": 1.0, "rewards/chosen": -0.13666953146457672, "rewards/margins": 0.16574428975582123, "rewards/rejected": -0.30241382122039795, "step": 925 }, { "epoch": 2.552295478080773, "grad_norm": 0.2413380742073059, "learning_rate": 3.3761285513826627e-07, "log_odds_chosen": 1.959080457687378, "log_odds_ratio": -0.14119106531143188, "logits/chosen": -0.6814147233963013, "logits/rejected": -1.3941816091537476, "logps/chosen": -1.3565312623977661, "logps/rejected": -3.0659008026123047, "loss": 1.3666, "nll_loss": 1.3524693250656128, "rewards/accuracies": 1.0, "rewards/chosen": -0.13565312325954437, "rewards/margins": 0.17093698680400848, "rewards/rejected": -0.30659011006355286, "step": 926 }, { "epoch": 2.5550569554711773, "grad_norm": 0.23813655972480774, "learning_rate": 3.3360201570832e-07, "log_odds_chosen": 2.2451252937316895, "log_odds_ratio": -0.13731753826141357, "logits/chosen": -0.6869548559188843, "logits/rejected": -1.4549494981765747, "logps/chosen": -1.3396923542022705, "logps/rejected": -3.3268046379089355, "loss": 1.3624, "nll_loss": 1.3486777544021606, "rewards/accuracies": 1.0, "rewards/chosen": -0.13396921753883362, "rewards/margins": 0.19871124625205994, "rewards/rejected": -0.33268046379089355, "step": 927 }, { "epoch": 2.557818432861581, "grad_norm": 0.2544892132282257, "learning_rate": 3.2961343936955384e-07, "log_odds_chosen": 2.040139675140381, "log_odds_ratio": -0.12957261502742767, "logits/chosen": -0.6556599736213684, "logits/rejected": -1.3787751197814941, "logps/chosen": -1.436802625656128, "logps/rejected": -3.2414321899414062, "loss": 1.4615, "nll_loss": 1.4485217332839966, "rewards/accuracies": 1.0, "rewards/chosen": -0.14368024468421936, "rewards/margins": 0.18046295642852783, "rewards/rejected": -0.3241432309150696, "step": 928 }, { "epoch": 2.5605799102519846, "grad_norm": 0.23739026486873627, "learning_rate": 3.256471671107617e-07, "log_odds_chosen": 1.708619236946106, "log_odds_ratio": -0.17978349328041077, "logits/chosen": -0.6689194440841675, "logits/rejected": -1.3071646690368652, "logps/chosen": -1.396398663520813, "logps/rejected": -2.879096031188965, "loss": 1.4197, "nll_loss": 1.4016958475112915, "rewards/accuracies": 1.0, "rewards/chosen": -0.13963986933231354, "rewards/margins": 0.14826972782611847, "rewards/rejected": -0.287909597158432, "step": 929 }, { "epoch": 2.5633413876423887, "grad_norm": 0.2338370978832245, "learning_rate": 3.217032396915265e-07, "log_odds_chosen": 1.675088882446289, "log_odds_ratio": -0.1810155212879181, "logits/chosen": -0.6721214652061462, "logits/rejected": -1.256127953529358, "logps/chosen": -1.3703727722167969, "logps/rejected": -2.816499710083008, "loss": 1.3895, "nll_loss": 1.3713994026184082, "rewards/accuracies": 1.0, "rewards/chosen": -0.1370372772216797, "rewards/margins": 0.14461272954940796, "rewards/rejected": -0.28165000677108765, "step": 930 }, { "epoch": 2.5661028650327924, "grad_norm": 0.20992733538150787, "learning_rate": 3.1778169764180575e-07, "log_odds_chosen": 1.661775827407837, "log_odds_ratio": -0.18945685029029846, "logits/chosen": -0.645239531993866, "logits/rejected": -1.3507040739059448, "logps/chosen": -1.4091472625732422, "logps/rejected": -2.852393388748169, "loss": 1.428, "nll_loss": 1.4091001749038696, "rewards/accuracies": 1.0, "rewards/chosen": -0.14091472327709198, "rewards/margins": 0.1443246454000473, "rewards/rejected": -0.2852393388748169, "step": 931 }, { "epoch": 2.5688643424231965, "grad_norm": 0.23702718317508698, "learning_rate": 3.1388258126151093e-07, "log_odds_chosen": 2.0044362545013428, "log_odds_ratio": -0.1336042582988739, "logits/chosen": -0.722419261932373, "logits/rejected": -1.5265545845031738, "logps/chosen": -1.3928831815719604, "logps/rejected": -3.1494266986846924, "loss": 1.4255, "nll_loss": 1.4121726751327515, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392883062362671, "rewards/margins": 0.17565438151359558, "rewards/rejected": -0.31494268774986267, "step": 932 }, { "epoch": 2.5716258198136, "grad_norm": 0.21528197824954987, "learning_rate": 3.100059306200959e-07, "log_odds_chosen": 1.985241174697876, "log_odds_ratio": -0.13415706157684326, "logits/chosen": -0.7101287841796875, "logits/rejected": -1.5072249174118042, "logps/chosen": -1.4175748825073242, "logps/rejected": -3.168888568878174, "loss": 1.4406, "nll_loss": 1.427135705947876, "rewards/accuracies": 1.0, "rewards/chosen": -0.14175750315189362, "rewards/margins": 0.17513138055801392, "rewards/rejected": -0.31688886880874634, "step": 933 }, { "epoch": 2.5743872972040043, "grad_norm": 0.23891428112983704, "learning_rate": 3.06151785556143e-07, "log_odds_chosen": 1.8469765186309814, "log_odds_ratio": -0.153033047914505, "logits/chosen": -0.7471803426742554, "logits/rejected": -1.581573724746704, "logps/chosen": -1.4183661937713623, "logps/rejected": -3.037837505340576, "loss": 1.4222, "nll_loss": 1.406899094581604, "rewards/accuracies": 1.0, "rewards/chosen": -0.14183662831783295, "rewards/margins": 0.1619471311569214, "rewards/rejected": -0.30378374457359314, "step": 934 }, { "epoch": 2.577148774594408, "grad_norm": 0.22244679927825928, "learning_rate": 3.02320185676957e-07, "log_odds_chosen": 1.7946596145629883, "log_odds_ratio": -0.16726499795913696, "logits/chosen": -0.6250181794166565, "logits/rejected": -1.3342721462249756, "logps/chosen": -1.4222698211669922, "logps/rejected": -2.994075059890747, "loss": 1.4375, "nll_loss": 1.4207617044448853, "rewards/accuracies": 1.0, "rewards/chosen": -0.14222699403762817, "rewards/margins": 0.15718048810958862, "rewards/rejected": -0.2994074821472168, "step": 935 }, { "epoch": 2.5799102519848116, "grad_norm": 0.2594182789325714, "learning_rate": 2.98511170358155e-07, "log_odds_chosen": 1.966003656387329, "log_odds_ratio": -0.14290575683116913, "logits/chosen": -0.7776312828063965, "logits/rejected": -1.5495796203613281, "logps/chosen": -1.5186223983764648, "logps/rejected": -3.2718241214752197, "loss": 1.5209, "nll_loss": 1.50662362575531, "rewards/accuracies": 1.0, "rewards/chosen": -0.1518622636795044, "rewards/margins": 0.17532019317150116, "rewards/rejected": -0.32718244194984436, "step": 936 }, { "epoch": 2.5826717293752157, "grad_norm": 0.24045488238334656, "learning_rate": 2.947247787432625e-07, "log_odds_chosen": 1.8805721998214722, "log_odds_ratio": -0.1481732279062271, "logits/chosen": -0.6075320243835449, "logits/rejected": -1.3898406028747559, "logps/chosen": -1.459001064300537, "logps/rejected": -3.1222083568573, "loss": 1.486, "nll_loss": 1.4711474180221558, "rewards/accuracies": 1.0, "rewards/chosen": -0.14590011537075043, "rewards/margins": 0.16632072627544403, "rewards/rejected": -0.31222084164619446, "step": 937 }, { "epoch": 2.58543320676562, "grad_norm": 0.2967565059661865, "learning_rate": 2.9096104974331186e-07, "log_odds_chosen": 2.049731731414795, "log_odds_ratio": -0.12587478756904602, "logits/chosen": -0.7390790581703186, "logits/rejected": -1.4449262619018555, "logps/chosen": -1.5110831260681152, "logps/rejected": -3.3400719165802, "loss": 1.5083, "nll_loss": 1.4957406520843506, "rewards/accuracies": 1.0, "rewards/chosen": -0.15110832452774048, "rewards/margins": 0.1828988641500473, "rewards/rejected": -0.334007203578949, "step": 938 }, { "epoch": 2.5881946841560235, "grad_norm": 0.243703693151474, "learning_rate": 2.872200220364413e-07, "log_odds_chosen": 1.9720426797866821, "log_odds_ratio": -0.13713492453098297, "logits/chosen": -0.696630597114563, "logits/rejected": -1.5544451475143433, "logps/chosen": -1.339968204498291, "logps/rejected": -3.0573863983154297, "loss": 1.3611, "nll_loss": 1.3474037647247314, "rewards/accuracies": 1.0, "rewards/chosen": -0.13399682939052582, "rewards/margins": 0.17174184322357178, "rewards/rejected": -0.3057386875152588, "step": 939 }, { "epoch": 2.590956161546427, "grad_norm": 0.2366427183151245, "learning_rate": 2.8350173406749975e-07, "log_odds_chosen": 1.898164987564087, "log_odds_ratio": -0.1474345624446869, "logits/chosen": -0.7421746253967285, "logits/rejected": -1.3548458814620972, "logps/chosen": -1.3803224563598633, "logps/rejected": -3.039956569671631, "loss": 1.4082, "nll_loss": 1.3934555053710938, "rewards/accuracies": 1.0, "rewards/chosen": -0.13803225755691528, "rewards/margins": 0.16596341133117676, "rewards/rejected": -0.30399566888809204, "step": 940 }, { "epoch": 2.5937176389368313, "grad_norm": 0.2338128685951233, "learning_rate": 2.7980622404764876e-07, "log_odds_chosen": 1.9797059297561646, "log_odds_ratio": -0.13334111869335175, "logits/chosen": -0.6873725652694702, "logits/rejected": -1.417863130569458, "logps/chosen": -1.44368314743042, "logps/rejected": -3.193904399871826, "loss": 1.4515, "nll_loss": 1.4382107257843018, "rewards/accuracies": 1.0, "rewards/chosen": -0.14436830580234528, "rewards/margins": 0.17502214014530182, "rewards/rejected": -0.3193904459476471, "step": 941 }, { "epoch": 2.596479116327235, "grad_norm": 0.2700754702091217, "learning_rate": 2.761335299539708e-07, "log_odds_chosen": 1.822613000869751, "log_odds_ratio": -0.16057495772838593, "logits/chosen": -0.6777271628379822, "logits/rejected": -1.466383457183838, "logps/chosen": -1.379992961883545, "logps/rejected": -2.967174530029297, "loss": 1.3969, "nll_loss": 1.3808046579360962, "rewards/accuracies": 1.0, "rewards/chosen": -0.1379992961883545, "rewards/margins": 0.15871816873550415, "rewards/rejected": -0.29671746492385864, "step": 942 }, { "epoch": 2.599240593717639, "grad_norm": 0.3026716709136963, "learning_rate": 2.7248368952908055e-07, "log_odds_chosen": 1.8705741167068481, "log_odds_ratio": -0.1605122983455658, "logits/chosen": -0.7535709738731384, "logits/rejected": -1.325878620147705, "logps/chosen": -1.4469115734100342, "logps/rejected": -3.101172924041748, "loss": 1.4593, "nll_loss": 1.4432421922683716, "rewards/accuracies": 1.0, "rewards/chosen": -0.14469115436077118, "rewards/margins": 0.165426105260849, "rewards/rejected": -0.31011727452278137, "step": 943 }, { "epoch": 2.6020020711080427, "grad_norm": 0.3109094202518463, "learning_rate": 2.688567402807357e-07, "log_odds_chosen": 2.250108242034912, "log_odds_ratio": -0.11274486780166626, "logits/chosen": -0.7702718377113342, "logits/rejected": -1.455101490020752, "logps/chosen": -1.3588594198226929, "logps/rejected": -3.3507652282714844, "loss": 1.3786, "nll_loss": 1.3673537969589233, "rewards/accuracies": 1.0, "rewards/chosen": -0.13588595390319824, "rewards/margins": 0.19919057190418243, "rewards/rejected": -0.33507654070854187, "step": 944 }, { "epoch": 2.604763548498447, "grad_norm": 0.2435607761144638, "learning_rate": 2.652527194814511e-07, "log_odds_chosen": 1.6397302150726318, "log_odds_ratio": -0.1893599033355713, "logits/chosen": -0.7129907608032227, "logits/rejected": -1.3441267013549805, "logps/chosen": -1.3824410438537598, "logps/rejected": -2.796844005584717, "loss": 1.4179, "nll_loss": 1.3990039825439453, "rewards/accuracies": 1.0, "rewards/chosen": -0.13824410736560822, "rewards/margins": 0.141440287232399, "rewards/rejected": -0.2796843945980072, "step": 945 }, { "epoch": 2.6075250258888505, "grad_norm": 0.2249520868062973, "learning_rate": 2.6167166416811745e-07, "log_odds_chosen": 2.0449821949005127, "log_odds_ratio": -0.1300472915172577, "logits/chosen": -0.7219584584236145, "logits/rejected": -1.3695610761642456, "logps/chosen": -1.3880071640014648, "logps/rejected": -3.1895861625671387, "loss": 1.4166, "nll_loss": 1.403563380241394, "rewards/accuracies": 1.0, "rewards/chosen": -0.1388007402420044, "rewards/margins": 0.18015789985656738, "rewards/rejected": -0.3189586400985718, "step": 946 }, { "epoch": 2.610286503279254, "grad_norm": 0.2573613226413727, "learning_rate": 2.5811361114161745e-07, "log_odds_chosen": 2.0810060501098633, "log_odds_ratio": -0.12215421348810196, "logits/chosen": -0.7206903696060181, "logits/rejected": -1.512025237083435, "logps/chosen": -1.421273946762085, "logps/rejected": -3.26515531539917, "loss": 1.4342, "nll_loss": 1.4219614267349243, "rewards/accuracies": 1.0, "rewards/chosen": -0.1421274095773697, "rewards/margins": 0.1843881607055664, "rewards/rejected": -0.3265155553817749, "step": 947 }, { "epoch": 2.6130479806696583, "grad_norm": 0.23119576275348663, "learning_rate": 2.5457859696645245e-07, "log_odds_chosen": 1.802943468093872, "log_odds_ratio": -0.16530674695968628, "logits/chosen": -0.7648516893386841, "logits/rejected": -1.3953312635421753, "logps/chosen": -1.3938331604003906, "logps/rejected": -2.9691152572631836, "loss": 1.411, "nll_loss": 1.3944562673568726, "rewards/accuracies": 1.0, "rewards/chosen": -0.13938331604003906, "rewards/margins": 0.15752822160720825, "rewards/rejected": -0.2969115376472473, "step": 948 }, { "epoch": 2.6158094580600624, "grad_norm": 0.27547046542167664, "learning_rate": 2.510666579703616e-07, "log_odds_chosen": 2.218683958053589, "log_odds_ratio": -0.12011230736970901, "logits/chosen": -0.7947883009910583, "logits/rejected": -1.286645770072937, "logps/chosen": -1.3454455137252808, "logps/rejected": -3.3007609844207764, "loss": 1.3609, "nll_loss": 1.3489000797271729, "rewards/accuracies": 1.0, "rewards/chosen": -0.13454456627368927, "rewards/margins": 0.19553157687187195, "rewards/rejected": -0.3300761282444, "step": 949 }, { "epoch": 2.618570935450466, "grad_norm": 0.2219359576702118, "learning_rate": 2.4757783024395244e-07, "log_odds_chosen": 2.1360833644866943, "log_odds_ratio": -0.12794803082942963, "logits/chosen": -0.6761844158172607, "logits/rejected": -1.64288330078125, "logps/chosen": -1.4937316179275513, "logps/rejected": -3.4092178344726562, "loss": 1.5087, "nll_loss": 1.4959032535552979, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493731588125229, "rewards/margins": 0.19154861569404602, "rewards/rejected": -0.3409217894077301, "step": 950 }, { "epoch": 2.6213324128408697, "grad_norm": 0.24184520542621613, "learning_rate": 2.441121496403273e-07, "log_odds_chosen": 2.0272634029388428, "log_odds_ratio": -0.13392458856105804, "logits/chosen": -0.7640489339828491, "logits/rejected": -1.4910739660263062, "logps/chosen": -1.4390066862106323, "logps/rejected": -3.2367093563079834, "loss": 1.449, "nll_loss": 1.4356297254562378, "rewards/accuracies": 1.0, "rewards/chosen": -0.14390066266059875, "rewards/margins": 0.17977027595043182, "rewards/rejected": -0.3236709535121918, "step": 951 }, { "epoch": 2.624093890231274, "grad_norm": 0.2330784648656845, "learning_rate": 2.4066965177471645e-07, "log_odds_chosen": 2.0284595489501953, "log_odds_ratio": -0.14267298579216003, "logits/chosen": -0.6908578872680664, "logits/rejected": -1.3764220476150513, "logps/chosen": -1.3548219203948975, "logps/rejected": -3.1374692916870117, "loss": 1.3809, "nll_loss": 1.3666284084320068, "rewards/accuracies": 1.0, "rewards/chosen": -0.13548217713832855, "rewards/margins": 0.1782647669315338, "rewards/rejected": -0.31374692916870117, "step": 952 }, { "epoch": 2.6268553676216775, "grad_norm": 0.23255251348018646, "learning_rate": 2.372503720241126e-07, "log_odds_chosen": 1.9383974075317383, "log_odds_ratio": -0.13968880474567413, "logits/chosen": -0.6115953326225281, "logits/rejected": -1.4383209943771362, "logps/chosen": -1.4576678276062012, "logps/rejected": -3.1733152866363525, "loss": 1.4797, "nll_loss": 1.4657305479049683, "rewards/accuracies": 1.0, "rewards/chosen": -0.14576677978038788, "rewards/margins": 0.1715647578239441, "rewards/rejected": -0.3173315227031708, "step": 953 }, { "epoch": 2.6296168450120816, "grad_norm": 0.23408931493759155, "learning_rate": 2.338543455269046e-07, "log_odds_chosen": 2.0486228466033936, "log_odds_ratio": -0.1272156834602356, "logits/chosen": -0.6825540065765381, "logits/rejected": -1.3209443092346191, "logps/chosen": -1.3331632614135742, "logps/rejected": -3.11747407913208, "loss": 1.3754, "nll_loss": 1.3626675605773926, "rewards/accuracies": 1.0, "rewards/chosen": -0.13331632316112518, "rewards/margins": 0.17843109369277954, "rewards/rejected": -0.31174740195274353, "step": 954 }, { "epoch": 2.6323783224024853, "grad_norm": 0.22708265483379364, "learning_rate": 2.3048160718252e-07, "log_odds_chosen": 2.3425686359405518, "log_odds_ratio": -0.09593919664621353, "logits/chosen": -0.8216720819473267, "logits/rejected": -1.6335172653198242, "logps/chosen": -1.3655447959899902, "logps/rejected": -3.4457623958587646, "loss": 1.3953, "nll_loss": 1.3857141733169556, "rewards/accuracies": 1.0, "rewards/chosen": -0.13655449450016022, "rewards/margins": 0.20802175998687744, "rewards/rejected": -0.3445762097835541, "step": 955 }, { "epoch": 2.6351397997928894, "grad_norm": 0.2601434290409088, "learning_rate": 2.271321916510627e-07, "log_odds_chosen": 2.531157970428467, "log_odds_ratio": -0.08654358237981796, "logits/chosen": -0.808270275592804, "logits/rejected": -1.571951150894165, "logps/chosen": -1.3457801342010498, "logps/rejected": -3.604663610458374, "loss": 1.3691, "nll_loss": 1.3604406118392944, "rewards/accuracies": 1.0, "rewards/chosen": -0.13457801938056946, "rewards/margins": 0.22588835656642914, "rewards/rejected": -0.3604663908481598, "step": 956 }, { "epoch": 2.637901277183293, "grad_norm": 0.23196592926979065, "learning_rate": 2.2380613335296037e-07, "log_odds_chosen": 2.195645809173584, "log_odds_ratio": -0.11568791419267654, "logits/chosen": -0.6892609000205994, "logits/rejected": -1.716423749923706, "logps/chosen": -1.475083827972412, "logps/rejected": -3.445784568786621, "loss": 1.474, "nll_loss": 1.4624130725860596, "rewards/accuracies": 1.0, "rewards/chosen": -0.1475083827972412, "rewards/margins": 0.19707010686397552, "rewards/rejected": -0.34457850456237793, "step": 957 }, { "epoch": 2.6406627545736967, "grad_norm": 0.247060626745224, "learning_rate": 2.205034664686076e-07, "log_odds_chosen": 2.134578227996826, "log_odds_ratio": -0.1356714367866516, "logits/chosen": -0.722184419631958, "logits/rejected": -1.424928903579712, "logps/chosen": -1.3981451988220215, "logps/rejected": -3.2957096099853516, "loss": 1.4362, "nll_loss": 1.4225947856903076, "rewards/accuracies": 1.0, "rewards/chosen": -0.13981452584266663, "rewards/margins": 0.18975645303726196, "rewards/rejected": -0.3295709788799286, "step": 958 }, { "epoch": 2.643424231964101, "grad_norm": 0.22230222821235657, "learning_rate": 2.1722422493801716e-07, "log_odds_chosen": 2.3888792991638184, "log_odds_ratio": -0.10574869066476822, "logits/chosen": -0.7165695428848267, "logits/rejected": -1.5392249822616577, "logps/chosen": -1.4026858806610107, "logps/rejected": -3.5417540073394775, "loss": 1.4379, "nll_loss": 1.42732834815979, "rewards/accuracies": 1.0, "rewards/chosen": -0.14026859402656555, "rewards/margins": 0.21390680968761444, "rewards/rejected": -0.3541753888130188, "step": 959 }, { "epoch": 2.646185709354505, "grad_norm": 0.2664812207221985, "learning_rate": 2.1396844246046904e-07, "log_odds_chosen": 2.0528769493103027, "log_odds_ratio": -0.13420844078063965, "logits/chosen": -0.7231025099754333, "logits/rejected": -1.3817631006240845, "logps/chosen": -1.4738715887069702, "logps/rejected": -3.304985284805298, "loss": 1.4941, "nll_loss": 1.48072350025177, "rewards/accuracies": 1.0, "rewards/chosen": -0.14738714694976807, "rewards/margins": 0.18311138451099396, "rewards/rejected": -0.3304985463619232, "step": 960 }, { "epoch": 2.6489471867449086, "grad_norm": 0.22853970527648926, "learning_rate": 2.1073615249416536e-07, "log_odds_chosen": 1.9294743537902832, "log_odds_ratio": -0.14145371317863464, "logits/chosen": -0.6596424579620361, "logits/rejected": -1.4080650806427002, "logps/chosen": -1.3255747556686401, "logps/rejected": -2.986544370651245, "loss": 1.3485, "nll_loss": 1.334335207939148, "rewards/accuracies": 1.0, "rewards/chosen": -0.1325574666261673, "rewards/margins": 0.1660969853401184, "rewards/rejected": -0.2986544370651245, "step": 961 }, { "epoch": 2.6517086641353123, "grad_norm": 0.2289370745420456, "learning_rate": 2.075273882558873e-07, "log_odds_chosen": 2.0377657413482666, "log_odds_ratio": -0.14710485935211182, "logits/chosen": -0.7615177631378174, "logits/rejected": -1.3432258367538452, "logps/chosen": -1.4437446594238281, "logps/rejected": -3.2567975521087646, "loss": 1.4649, "nll_loss": 1.4501651525497437, "rewards/accuracies": 1.0, "rewards/chosen": -0.14437447488307953, "rewards/margins": 0.18130528926849365, "rewards/rejected": -0.325679749250412, "step": 962 }, { "epoch": 2.6544701415257164, "grad_norm": 0.2514185607433319, "learning_rate": 2.0434218272065154e-07, "log_odds_chosen": 2.2104461193084717, "log_odds_ratio": -0.11657778918743134, "logits/chosen": -0.6318598985671997, "logits/rejected": -1.4899601936340332, "logps/chosen": -1.4056737422943115, "logps/rejected": -3.3749942779541016, "loss": 1.4286, "nll_loss": 1.416942834854126, "rewards/accuracies": 1.0, "rewards/chosen": -0.1405673772096634, "rewards/margins": 0.19693204760551453, "rewards/rejected": -0.3374994397163391, "step": 963 }, { "epoch": 2.65723161891612, "grad_norm": 0.3384753167629242, "learning_rate": 2.0118056862137358e-07, "log_odds_chosen": 2.126129388809204, "log_odds_ratio": -0.11987830698490143, "logits/chosen": -0.7707469463348389, "logits/rejected": -1.564520001411438, "logps/chosen": -1.3614217042922974, "logps/rejected": -3.2318568229675293, "loss": 1.363, "nll_loss": 1.3509749174118042, "rewards/accuracies": 1.0, "rewards/chosen": -0.13614217936992645, "rewards/margins": 0.18704350292682648, "rewards/rejected": -0.3231857120990753, "step": 964 }, { "epoch": 2.659993096306524, "grad_norm": 0.29518210887908936, "learning_rate": 1.980425784485293e-07, "log_odds_chosen": 1.963320255279541, "log_odds_ratio": -0.14825530350208282, "logits/chosen": -0.6631380319595337, "logits/rejected": -1.4375109672546387, "logps/chosen": -1.352154016494751, "logps/rejected": -3.060062885284424, "loss": 1.3672, "nll_loss": 1.3524130582809448, "rewards/accuracies": 1.0, "rewards/chosen": -0.1352154016494751, "rewards/margins": 0.1707909107208252, "rewards/rejected": -0.3060063123703003, "step": 965 }, { "epoch": 2.662754573696928, "grad_norm": 0.22125937044620514, "learning_rate": 1.9492824444982378e-07, "log_odds_chosen": 2.1015822887420654, "log_odds_ratio": -0.12708497047424316, "logits/chosen": -0.6685047745704651, "logits/rejected": -1.5997382402420044, "logps/chosen": -1.3968567848205566, "logps/rejected": -3.2564969062805176, "loss": 1.4126, "nll_loss": 1.3999114036560059, "rewards/accuracies": 1.0, "rewards/chosen": -0.13968569040298462, "rewards/margins": 0.18596400320529938, "rewards/rejected": -0.3256496787071228, "step": 966 }, { "epoch": 2.665516051087332, "grad_norm": 0.24795520305633545, "learning_rate": 1.918375986298565e-07, "log_odds_chosen": 2.119626045227051, "log_odds_ratio": -0.1333272010087967, "logits/chosen": -0.6242780089378357, "logits/rejected": -1.4385974407196045, "logps/chosen": -1.4338008165359497, "logps/rejected": -3.3221771717071533, "loss": 1.439, "nll_loss": 1.425707221031189, "rewards/accuracies": 1.0, "rewards/chosen": -0.1433800905942917, "rewards/margins": 0.18883763253688812, "rewards/rejected": -0.3322177231311798, "step": 967 }, { "epoch": 2.6682775284777356, "grad_norm": 0.2591713070869446, "learning_rate": 1.887706727497965e-07, "log_odds_chosen": 1.9582922458648682, "log_odds_ratio": -0.1632286161184311, "logits/chosen": -0.6759252548217773, "logits/rejected": -1.2422153949737549, "logps/chosen": -1.3070485591888428, "logps/rejected": -3.0063652992248535, "loss": 1.3374, "nll_loss": 1.321088194847107, "rewards/accuracies": 1.0, "rewards/chosen": -0.1307048499584198, "rewards/margins": 0.16993169486522675, "rewards/rejected": -0.30063652992248535, "step": 968 }, { "epoch": 2.6710390058681392, "grad_norm": 0.22042769193649292, "learning_rate": 1.8572749832705074e-07, "log_odds_chosen": 2.227886915206909, "log_odds_ratio": -0.11963072419166565, "logits/chosen": -0.7234524488449097, "logits/rejected": -1.704094648361206, "logps/chosen": -1.3736193180084229, "logps/rejected": -3.350835084915161, "loss": 1.3888, "nll_loss": 1.3768812417984009, "rewards/accuracies": 1.0, "rewards/chosen": -0.13736194372177124, "rewards/margins": 0.19772157073020935, "rewards/rejected": -0.3350834548473358, "step": 969 }, { "epoch": 2.6738004832585434, "grad_norm": 0.22718189656734467, "learning_rate": 1.8270810663494591e-07, "log_odds_chosen": 2.0061402320861816, "log_odds_ratio": -0.13173674046993256, "logits/chosen": -0.6896448135375977, "logits/rejected": -1.557668685913086, "logps/chosen": -1.4352554082870483, "logps/rejected": -3.210336685180664, "loss": 1.4567, "nll_loss": 1.443495750427246, "rewards/accuracies": 1.0, "rewards/chosen": -0.14352554082870483, "rewards/margins": 0.1775081604719162, "rewards/rejected": -0.3210337162017822, "step": 970 }, { "epoch": 2.676561960648947, "grad_norm": 0.2464963048696518, "learning_rate": 1.7971252870240292e-07, "log_odds_chosen": 2.144315004348755, "log_odds_ratio": -0.11838545650243759, "logits/chosen": -0.632702648639679, "logits/rejected": -1.7164009809494019, "logps/chosen": -1.4189828634262085, "logps/rejected": -3.323493242263794, "loss": 1.4359, "nll_loss": 1.4240491390228271, "rewards/accuracies": 1.0, "rewards/chosen": -0.1418982893228531, "rewards/margins": 0.19045105576515198, "rewards/rejected": -0.33234933018684387, "step": 971 }, { "epoch": 2.679323438039351, "grad_norm": 0.2409830242395401, "learning_rate": 1.767407953136202e-07, "log_odds_chosen": 1.814550757408142, "log_odds_ratio": -0.16405977308750153, "logits/chosen": -0.7265454530715942, "logits/rejected": -1.4708313941955566, "logps/chosen": -1.4768238067626953, "logps/rejected": -3.0833613872528076, "loss": 1.4958, "nll_loss": 1.479364275932312, "rewards/accuracies": 1.0, "rewards/chosen": -0.14768238365650177, "rewards/margins": 0.1606537401676178, "rewards/rejected": -0.30833613872528076, "step": 972 }, { "epoch": 2.682084915429755, "grad_norm": 0.2497211992740631, "learning_rate": 1.737929370077554e-07, "log_odds_chosen": 1.94965660572052, "log_odds_ratio": -0.15755438804626465, "logits/chosen": -0.6820305585861206, "logits/rejected": -1.1982239484786987, "logps/chosen": -1.369242548942566, "logps/rejected": -3.0800867080688477, "loss": 1.3936, "nll_loss": 1.3778311014175415, "rewards/accuracies": 1.0, "rewards/chosen": -0.13692426681518555, "rewards/margins": 0.17108440399169922, "rewards/rejected": -0.30800867080688477, "step": 973 }, { "epoch": 2.684846392820159, "grad_norm": 0.2439846694469452, "learning_rate": 1.7086898407861486e-07, "log_odds_chosen": 2.3629441261291504, "log_odds_ratio": -0.10013991594314575, "logits/chosen": -0.7143979668617249, "logits/rejected": -1.5864475965499878, "logps/chosen": -1.4378968477249146, "logps/rejected": -3.5587515830993652, "loss": 1.4553, "nll_loss": 1.4453214406967163, "rewards/accuracies": 1.0, "rewards/chosen": -0.14378967881202698, "rewards/margins": 0.21208547055721283, "rewards/rejected": -0.355875164270401, "step": 974 }, { "epoch": 2.6876078702105626, "grad_norm": 0.2228151559829712, "learning_rate": 1.679689665743381e-07, "log_odds_chosen": 1.8965401649475098, "log_odds_ratio": -0.15074481070041656, "logits/chosen": -0.6746334433555603, "logits/rejected": -1.4987746477127075, "logps/chosen": -1.428466796875, "logps/rejected": -3.092202663421631, "loss": 1.435, "nll_loss": 1.419920563697815, "rewards/accuracies": 1.0, "rewards/chosen": -0.14284668862819672, "rewards/margins": 0.1663735806941986, "rewards/rejected": -0.3092202842235565, "step": 975 }, { "epoch": 2.6903693476009662, "grad_norm": 0.252189040184021, "learning_rate": 1.6509291429709224e-07, "log_odds_chosen": 1.8932888507843018, "log_odds_ratio": -0.14494739472866058, "logits/chosen": -0.7434114813804626, "logits/rejected": -1.5368683338165283, "logps/chosen": -1.4556902647018433, "logps/rejected": -3.125835657119751, "loss": 1.466, "nll_loss": 1.4514588117599487, "rewards/accuracies": 1.0, "rewards/chosen": -0.14556902647018433, "rewards/margins": 0.16701450943946838, "rewards/rejected": -0.3125835359096527, "step": 976 }, { "epoch": 2.6931308249913704, "grad_norm": 0.2237035185098648, "learning_rate": 1.6224085680276418e-07, "log_odds_chosen": 2.128500461578369, "log_odds_ratio": -0.11983887106180191, "logits/chosen": -0.8135519027709961, "logits/rejected": -1.6658682823181152, "logps/chosen": -1.3914153575897217, "logps/rejected": -3.274703025817871, "loss": 1.412, "nll_loss": 1.4000346660614014, "rewards/accuracies": 1.0, "rewards/chosen": -0.13914154469966888, "rewards/margins": 0.18832874298095703, "rewards/rejected": -0.3274702727794647, "step": 977 }, { "epoch": 2.6958923023817745, "grad_norm": 0.23502354323863983, "learning_rate": 1.59412823400657e-07, "log_odds_chosen": 1.8072452545166016, "log_odds_ratio": -0.16262619197368622, "logits/chosen": -0.7987112402915955, "logits/rejected": -1.256929874420166, "logps/chosen": -1.3641008138656616, "logps/rejected": -2.933443069458008, "loss": 1.3803, "nll_loss": 1.3640851974487305, "rewards/accuracies": 1.0, "rewards/chosen": -0.13641008734703064, "rewards/margins": 0.1569342315196991, "rewards/rejected": -0.29334431886672974, "step": 978 }, { "epoch": 2.698653779772178, "grad_norm": 0.25098490715026855, "learning_rate": 1.5660884315319036e-07, "log_odds_chosen": 2.040311336517334, "log_odds_ratio": -0.1397894024848938, "logits/chosen": -0.6582561135292053, "logits/rejected": -1.50811767578125, "logps/chosen": -1.4971377849578857, "logps/rejected": -3.3207790851593018, "loss": 1.4785, "nll_loss": 1.4645262956619263, "rewards/accuracies": 1.0, "rewards/chosen": -0.14971376955509186, "rewards/margins": 0.1823641061782837, "rewards/rejected": -0.33207792043685913, "step": 979 }, { "epoch": 2.701415257162582, "grad_norm": 0.35579007863998413, "learning_rate": 1.538289448755989e-07, "log_odds_chosen": 2.3106751441955566, "log_odds_ratio": -0.0976729691028595, "logits/chosen": -0.7363239526748657, "logits/rejected": -1.606247901916504, "logps/chosen": -1.3980400562286377, "logps/rejected": -3.4542083740234375, "loss": 1.4172, "nll_loss": 1.4073951244354248, "rewards/accuracies": 1.0, "rewards/chosen": -0.13980400562286377, "rewards/margins": 0.20561686158180237, "rewards/rejected": -0.3454208970069885, "step": 980 }, { "epoch": 2.704176734552986, "grad_norm": 0.2425658106803894, "learning_rate": 1.5107315713563942e-07, "log_odds_chosen": 1.9736579656600952, "log_odds_ratio": -0.1463673710823059, "logits/chosen": -0.7006841897964478, "logits/rejected": -1.49364173412323, "logps/chosen": -1.396539330482483, "logps/rejected": -3.128598213195801, "loss": 1.4174, "nll_loss": 1.402726173400879, "rewards/accuracies": 1.0, "rewards/chosen": -0.13965392112731934, "rewards/margins": 0.1732059121131897, "rewards/rejected": -0.31285983324050903, "step": 981 }, { "epoch": 2.7069382119433896, "grad_norm": 0.2352331131696701, "learning_rate": 1.483415082532938e-07, "log_odds_chosen": 2.0061511993408203, "log_odds_ratio": -0.1282159984111786, "logits/chosen": -0.7078461647033691, "logits/rejected": -1.5026459693908691, "logps/chosen": -1.4230999946594238, "logps/rejected": -3.195002317428589, "loss": 1.4321, "nll_loss": 1.4193131923675537, "rewards/accuracies": 1.0, "rewards/chosen": -0.1423100084066391, "rewards/margins": 0.17719024419784546, "rewards/rejected": -0.31950026750564575, "step": 982 }, { "epoch": 2.7096996893337937, "grad_norm": 0.2721024751663208, "learning_rate": 1.456340263004813e-07, "log_odds_chosen": 2.10006046295166, "log_odds_ratio": -0.1235492080450058, "logits/chosen": -0.6474711298942566, "logits/rejected": -1.2648261785507202, "logps/chosen": -1.4418141841888428, "logps/rejected": -3.3105947971343994, "loss": 1.461, "nll_loss": 1.4486186504364014, "rewards/accuracies": 1.0, "rewards/chosen": -0.14418143033981323, "rewards/margins": 0.1868780553340912, "rewards/rejected": -0.3310594856739044, "step": 983 }, { "epoch": 2.7124611667241973, "grad_norm": 0.23106303811073303, "learning_rate": 1.4295073910076757e-07, "log_odds_chosen": 2.081786870956421, "log_odds_ratio": -0.1300010085105896, "logits/chosen": -0.6778182983398438, "logits/rejected": -1.3403464555740356, "logps/chosen": -1.2968605756759644, "logps/rejected": -3.1098732948303223, "loss": 1.3436, "nll_loss": 1.3305792808532715, "rewards/accuracies": 1.0, "rewards/chosen": -0.12968607246875763, "rewards/margins": 0.1813012659549713, "rewards/rejected": -0.31098732352256775, "step": 984 }, { "epoch": 2.7152226441146015, "grad_norm": 0.26438474655151367, "learning_rate": 1.4029167422908107e-07, "log_odds_chosen": 1.8471654653549194, "log_odds_ratio": -0.16113737225532532, "logits/chosen": -0.6926684975624084, "logits/rejected": -1.294993281364441, "logps/chosen": -1.384852409362793, "logps/rejected": -3.000605583190918, "loss": 1.4038, "nll_loss": 1.3876756429672241, "rewards/accuracies": 1.0, "rewards/chosen": -0.13848525285720825, "rewards/margins": 0.1615753024816513, "rewards/rejected": -0.30006057024002075, "step": 985 }, { "epoch": 2.717984121505005, "grad_norm": 0.2312583476305008, "learning_rate": 1.3765685901142718e-07, "log_odds_chosen": 1.855090618133545, "log_odds_ratio": -0.15754860639572144, "logits/chosen": -0.7419026494026184, "logits/rejected": -1.3446100950241089, "logps/chosen": -1.40529465675354, "logps/rejected": -3.031799554824829, "loss": 1.435, "nll_loss": 1.419202446937561, "rewards/accuracies": 1.0, "rewards/chosen": -0.14052946865558624, "rewards/margins": 0.1626504808664322, "rewards/rejected": -0.30317994952201843, "step": 986 }, { "epoch": 2.720745598895409, "grad_norm": 0.2685120701789856, "learning_rate": 1.3504632052460803e-07, "log_odds_chosen": 1.8317335844039917, "log_odds_ratio": -0.1557828038930893, "logits/chosen": -0.697287380695343, "logits/rejected": -1.464606523513794, "logps/chosen": -1.3516614437103271, "logps/rejected": -2.938924551010132, "loss": 1.3848, "nll_loss": 1.3692270517349243, "rewards/accuracies": 1.0, "rewards/chosen": -0.13516615331172943, "rewards/margins": 0.15872633457183838, "rewards/rejected": -0.2938924729824066, "step": 987 }, { "epoch": 2.723507076285813, "grad_norm": 0.246241956949234, "learning_rate": 1.3246008559594708e-07, "log_odds_chosen": 1.848312497138977, "log_odds_ratio": -0.1592661589384079, "logits/chosen": -0.6982629299163818, "logits/rejected": -1.4037423133850098, "logps/chosen": -1.3378205299377441, "logps/rejected": -2.9401087760925293, "loss": 1.3589, "nll_loss": 1.342982292175293, "rewards/accuracies": 1.0, "rewards/chosen": -0.1337820589542389, "rewards/margins": 0.16022886335849762, "rewards/rejected": -0.2940109074115753, "step": 988 }, { "epoch": 2.726268553676217, "grad_norm": 0.2550162971019745, "learning_rate": 1.2989818080300836e-07, "log_odds_chosen": 2.1453795433044434, "log_odds_ratio": -0.11675063520669937, "logits/chosen": -0.7104974985122681, "logits/rejected": -1.5367162227630615, "logps/chosen": -1.3658851385116577, "logps/rejected": -3.2529282569885254, "loss": 1.3818, "nll_loss": 1.3700977563858032, "rewards/accuracies": 1.0, "rewards/chosen": -0.13658851385116577, "rewards/margins": 0.18870429694652557, "rewards/rejected": -0.32529282569885254, "step": 989 }, { "epoch": 2.7290300310666207, "grad_norm": 0.24795058369636536, "learning_rate": 1.273606324733284e-07, "log_odds_chosen": 2.152780532836914, "log_odds_ratio": -0.1181095689535141, "logits/chosen": -0.8036555051803589, "logits/rejected": -1.3709052801132202, "logps/chosen": -1.4417835474014282, "logps/rejected": -3.3610963821411133, "loss": 1.4521, "nll_loss": 1.4403274059295654, "rewards/accuracies": 1.0, "rewards/chosen": -0.1441783457994461, "rewards/margins": 0.19193130731582642, "rewards/rejected": -0.33610963821411133, "step": 990 }, { "epoch": 2.7317915084570243, "grad_norm": 0.2421213537454605, "learning_rate": 1.2484746668414176e-07, "log_odds_chosen": 2.0020647048950195, "log_odds_ratio": -0.142862468957901, "logits/chosen": -0.66302889585495, "logits/rejected": -1.4799884557724, "logps/chosen": -1.4031946659088135, "logps/rejected": -3.1717405319213867, "loss": 1.434, "nll_loss": 1.4197522401809692, "rewards/accuracies": 1.0, "rewards/chosen": -0.14031948149204254, "rewards/margins": 0.17685456573963165, "rewards/rejected": -0.3171740174293518, "step": 991 }, { "epoch": 2.7345529858474285, "grad_norm": 0.23377768695354462, "learning_rate": 1.223587092621162e-07, "log_odds_chosen": 1.8977649211883545, "log_odds_ratio": -0.15138304233551025, "logits/chosen": -0.7624326348304749, "logits/rejected": -1.5068018436431885, "logps/chosen": -1.4252549409866333, "logps/rejected": -3.0944790840148926, "loss": 1.4384, "nll_loss": 1.4232513904571533, "rewards/accuracies": 1.0, "rewards/chosen": -0.14252547919750214, "rewards/margins": 0.1669224202632904, "rewards/rejected": -0.30944788455963135, "step": 992 }, { "epoch": 2.737314463237832, "grad_norm": 0.28157711029052734, "learning_rate": 1.1989438578308394e-07, "log_odds_chosen": 2.0616867542266846, "log_odds_ratio": -0.1334911584854126, "logits/chosen": -0.7798738479614258, "logits/rejected": -1.3342580795288086, "logps/chosen": -1.4138479232788086, "logps/rejected": -3.2425341606140137, "loss": 1.4482, "nll_loss": 1.4348976612091064, "rewards/accuracies": 1.0, "rewards/chosen": -0.1413847804069519, "rewards/margins": 0.1828686147928238, "rewards/rejected": -0.3242534101009369, "step": 993 }, { "epoch": 2.7400759406282362, "grad_norm": 0.287128746509552, "learning_rate": 1.1745452157178206e-07, "log_odds_chosen": 2.3712995052337646, "log_odds_ratio": -0.11235539615154266, "logits/chosen": -0.7226740717887878, "logits/rejected": -1.469365119934082, "logps/chosen": -1.3113185167312622, "logps/rejected": -3.4118595123291016, "loss": 1.3325, "nll_loss": 1.321272373199463, "rewards/accuracies": 1.0, "rewards/chosen": -0.1311318576335907, "rewards/margins": 0.21005409955978394, "rewards/rejected": -0.34118595719337463, "step": 994 }, { "epoch": 2.74283741801864, "grad_norm": 0.21673133969306946, "learning_rate": 1.1503914170159058e-07, "log_odds_chosen": 2.1125168800354004, "log_odds_ratio": -0.1178525760769844, "logits/chosen": -0.6429874897003174, "logits/rejected": -1.5546855926513672, "logps/chosen": -1.416844129562378, "logps/rejected": -3.287041425704956, "loss": 1.4304, "nll_loss": 1.4185818433761597, "rewards/accuracies": 1.0, "rewards/chosen": -0.141684427857399, "rewards/margins": 0.1870196908712387, "rewards/rejected": -0.3287041485309601, "step": 995 }, { "epoch": 2.745598895409044, "grad_norm": 0.23649489879608154, "learning_rate": 1.1264827099427417e-07, "log_odds_chosen": 1.740596055984497, "log_odds_ratio": -0.17465397715568542, "logits/chosen": -0.702508807182312, "logits/rejected": -1.3662904500961304, "logps/chosen": -1.3189494609832764, "logps/rejected": -2.808136463165283, "loss": 1.3485, "nll_loss": 1.3309894800186157, "rewards/accuracies": 1.0, "rewards/chosen": -0.13189494609832764, "rewards/margins": 0.1489187330007553, "rewards/rejected": -0.28081366419792175, "step": 996 }, { "epoch": 2.7483603727994477, "grad_norm": 0.24152585864067078, "learning_rate": 1.1028193401972865e-07, "log_odds_chosen": 1.958052396774292, "log_odds_ratio": -0.14754720032215118, "logits/chosen": -0.6607347130775452, "logits/rejected": -1.441666841506958, "logps/chosen": -1.3635063171386719, "logps/rejected": -3.0678017139434814, "loss": 1.3872, "nll_loss": 1.3724085092544556, "rewards/accuracies": 1.0, "rewards/chosen": -0.13635066151618958, "rewards/margins": 0.170429527759552, "rewards/rejected": -0.3067801892757416, "step": 997 }, { "epoch": 2.7511218501898513, "grad_norm": 0.26197075843811035, "learning_rate": 1.0794015509572819e-07, "log_odds_chosen": 2.423408031463623, "log_odds_ratio": -0.11334723234176636, "logits/chosen": -0.6618593335151672, "logits/rejected": -1.5322468280792236, "logps/chosen": -1.4603853225708008, "logps/rejected": -3.649425506591797, "loss": 1.485, "nll_loss": 1.4736342430114746, "rewards/accuracies": 1.0, "rewards/chosen": -0.14603853225708008, "rewards/margins": 0.218904048204422, "rewards/rejected": -0.3649425506591797, "step": 998 }, { "epoch": 2.7538833275802554, "grad_norm": 0.2273317277431488, "learning_rate": 1.0562295828767388e-07, "log_odds_chosen": 1.752290964126587, "log_odds_ratio": -0.16621780395507812, "logits/chosen": -0.7716602087020874, "logits/rejected": -1.4140805006027222, "logps/chosen": -1.4367755651474, "logps/rejected": -2.9715890884399414, "loss": 1.4581, "nll_loss": 1.4414745569229126, "rewards/accuracies": 1.0, "rewards/chosen": -0.14367754757404327, "rewards/margins": 0.15348133444786072, "rewards/rejected": -0.2971589267253876, "step": 999 }, { "epoch": 2.7566448049706596, "grad_norm": 0.2583448588848114, "learning_rate": 1.0333036740834857e-07, "log_odds_chosen": 1.9038358926773071, "log_odds_ratio": -0.14574313163757324, "logits/chosen": -0.6324177384376526, "logits/rejected": -1.3562304973602295, "logps/chosen": -1.471291184425354, "logps/rejected": -3.153928279876709, "loss": 1.4642, "nll_loss": 1.449668526649475, "rewards/accuracies": 1.0, "rewards/chosen": -0.1471291184425354, "rewards/margins": 0.16826367378234863, "rewards/rejected": -0.3153928220272064, "step": 1000 }, { "epoch": 2.7594062823610632, "grad_norm": 0.22535422444343567, "learning_rate": 1.0106240601767042e-07, "log_odds_chosen": 2.648030996322632, "log_odds_ratio": -0.0725451335310936, "logits/chosen": -0.7593368291854858, "logits/rejected": -1.6607447862625122, "logps/chosen": -1.3206822872161865, "logps/rejected": -3.6815028190612793, "loss": 1.3404, "nll_loss": 1.3331003189086914, "rewards/accuracies": 1.0, "rewards/chosen": -0.1320682317018509, "rewards/margins": 0.2360820770263672, "rewards/rejected": -0.3681502938270569, "step": 1001 }, { "epoch": 2.762167759751467, "grad_norm": 0.22319379448890686, "learning_rate": 9.881909742245177e-08, "log_odds_chosen": 2.2190699577331543, "log_odds_ratio": -0.12546557188034058, "logits/chosen": -0.768273115158081, "logits/rejected": -1.4960501194000244, "logps/chosen": -1.3576903343200684, "logps/rejected": -3.3215339183807373, "loss": 1.3761, "nll_loss": 1.3635356426239014, "rewards/accuracies": 1.0, "rewards/chosen": -0.13576900959014893, "rewards/margins": 0.19638435542583466, "rewards/rejected": -0.33215340971946716, "step": 1002 }, { "epoch": 2.764929237141871, "grad_norm": 0.28191980719566345, "learning_rate": 9.66004646761598e-08, "log_odds_chosen": 1.8415871858596802, "log_odds_ratio": -0.17654792964458466, "logits/chosen": -0.6697184443473816, "logits/rejected": -1.30105721950531, "logps/chosen": -1.3981242179870605, "logps/rejected": -3.0165035724639893, "loss": 1.4101, "nll_loss": 1.3924883604049683, "rewards/accuracies": 1.0, "rewards/chosen": -0.1398124396800995, "rewards/margins": 0.16183793544769287, "rewards/rejected": -0.30165037512779236, "step": 1003 }, { "epoch": 2.7676907145322747, "grad_norm": 0.22236281633377075, "learning_rate": 9.440653057867816e-08, "log_odds_chosen": 1.993783712387085, "log_odds_ratio": -0.13520008325576782, "logits/chosen": -0.7383299469947815, "logits/rejected": -1.5257141590118408, "logps/chosen": -1.4308398962020874, "logps/rejected": -3.1934688091278076, "loss": 1.4528, "nll_loss": 1.4392435550689697, "rewards/accuracies": 1.0, "rewards/chosen": -0.14308398962020874, "rewards/margins": 0.17626288533210754, "rewards/rejected": -0.3193468749523163, "step": 1004 }, { "epoch": 2.7704521919226788, "grad_norm": 0.2923729419708252, "learning_rate": 9.223731767607436e-08, "log_odds_chosen": 1.824439287185669, "log_odds_ratio": -0.16186580061912537, "logits/chosen": -0.7450851202011108, "logits/rejected": -1.439494252204895, "logps/chosen": -1.3921793699264526, "logps/rejected": -2.9855105876922607, "loss": 1.4157, "nll_loss": 1.3994739055633545, "rewards/accuracies": 1.0, "rewards/chosen": -0.13921794295310974, "rewards/margins": 0.15933310985565186, "rewards/rejected": -0.2985510528087616, "step": 1005 }, { "epoch": 2.7732136693130824, "grad_norm": 0.41212600469589233, "learning_rate": 9.00928482603669e-08, "log_odds_chosen": 1.8886624574661255, "log_odds_ratio": -0.146849125623703, "logits/chosen": -0.5653700232505798, "logits/rejected": -1.3114237785339355, "logps/chosen": -1.4131641387939453, "logps/rejected": -3.070817470550537, "loss": 1.4294, "nll_loss": 1.4146660566329956, "rewards/accuracies": 1.0, "rewards/chosen": -0.14131641387939453, "rewards/margins": 0.16576533019542694, "rewards/rejected": -0.30708175897598267, "step": 1006 }, { "epoch": 2.7759751467034866, "grad_norm": 0.2313527762889862, "learning_rate": 8.797314436929744e-08, "log_odds_chosen": 1.5332307815551758, "log_odds_ratio": -0.21756960451602936, "logits/chosen": -0.6892613768577576, "logits/rejected": -1.093503713607788, "logps/chosen": -1.4477730989456177, "logps/rejected": -2.7788703441619873, "loss": 1.4684, "nll_loss": 1.4466668367385864, "rewards/accuracies": 1.0, "rewards/chosen": -0.144777312874794, "rewards/margins": 0.13310973346233368, "rewards/rejected": -0.2778870463371277, "step": 1007 }, { "epoch": 2.77873662409389, "grad_norm": 0.22530636191368103, "learning_rate": 8.587822778610284e-08, "log_odds_chosen": 2.186410903930664, "log_odds_ratio": -0.12280432879924774, "logits/chosen": -0.6308997869491577, "logits/rejected": -1.4013787508010864, "logps/chosen": -1.3398391008377075, "logps/rejected": -3.2653181552886963, "loss": 1.3588, "nll_loss": 1.3464809656143188, "rewards/accuracies": 1.0, "rewards/chosen": -0.13398392498493195, "rewards/margins": 0.19254791736602783, "rewards/rejected": -0.3265318274497986, "step": 1008 }, { "epoch": 2.781498101484294, "grad_norm": 0.23101964592933655, "learning_rate": 8.380812003929262e-08, "log_odds_chosen": 1.8533670902252197, "log_odds_ratio": -0.15389712154865265, "logits/chosen": -0.6596164107322693, "logits/rejected": -1.4932183027267456, "logps/chosen": -1.463547945022583, "logps/rejected": -3.100766658782959, "loss": 1.4468, "nll_loss": 1.4314396381378174, "rewards/accuracies": 1.0, "rewards/chosen": -0.1463548094034195, "rewards/margins": 0.16372185945510864, "rewards/rejected": -0.31007668375968933, "step": 1009 }, { "epoch": 2.784259578874698, "grad_norm": 0.22587090730667114, "learning_rate": 8.176284240242638e-08, "log_odds_chosen": 2.2043659687042236, "log_odds_ratio": -0.1306949108839035, "logits/chosen": -0.6479353308677673, "logits/rejected": -1.4205193519592285, "logps/chosen": -1.385279893875122, "logps/rejected": -3.342761516571045, "loss": 1.398, "nll_loss": 1.384945273399353, "rewards/accuracies": 1.0, "rewards/chosen": -0.1385279893875122, "rewards/margins": 0.19574815034866333, "rewards/rejected": -0.33427613973617554, "step": 1010 }, { "epoch": 2.787021056265102, "grad_norm": 0.24764949083328247, "learning_rate": 7.97424158938967e-08, "log_odds_chosen": 2.072882890701294, "log_odds_ratio": -0.12916429340839386, "logits/chosen": -0.6634835004806519, "logits/rejected": -1.5250909328460693, "logps/chosen": -1.4897488355636597, "logps/rejected": -3.3414440155029297, "loss": 1.4955, "nll_loss": 1.4825700521469116, "rewards/accuracies": 1.0, "rewards/chosen": -0.14897489547729492, "rewards/margins": 0.18516948819160461, "rewards/rejected": -0.33414438366889954, "step": 1011 }, { "epoch": 2.7897825336555058, "grad_norm": 0.30778175592422485, "learning_rate": 7.774686127671183e-08, "log_odds_chosen": 2.177363872528076, "log_odds_ratio": -0.11666363477706909, "logits/chosen": -0.7891844511032104, "logits/rejected": -1.4910955429077148, "logps/chosen": -1.360632300376892, "logps/rejected": -3.2791755199432373, "loss": 1.368, "nll_loss": 1.3562960624694824, "rewards/accuracies": 1.0, "rewards/chosen": -0.13606323301792145, "rewards/margins": 0.191854327917099, "rewards/rejected": -0.32791754603385925, "step": 1012 }, { "epoch": 2.7925440110459094, "grad_norm": 0.22408710420131683, "learning_rate": 7.577619905828281e-08, "log_odds_chosen": 1.7647576332092285, "log_odds_ratio": -0.16666775941848755, "logits/chosen": -0.6597376465797424, "logits/rejected": -1.448360562324524, "logps/chosen": -1.4035964012145996, "logps/rejected": -2.9451749324798584, "loss": 1.4328, "nll_loss": 1.4160853624343872, "rewards/accuracies": 1.0, "rewards/chosen": -0.14035965502262115, "rewards/margins": 0.1541578322649002, "rewards/rejected": -0.29451751708984375, "step": 1013 }, { "epoch": 2.7953054884363135, "grad_norm": 0.250111848115921, "learning_rate": 7.383044949021339e-08, "log_odds_chosen": 2.0947892665863037, "log_odds_ratio": -0.13201308250427246, "logits/chosen": -0.6975510120391846, "logits/rejected": -1.5848015546798706, "logps/chosen": -1.4522767066955566, "logps/rejected": -3.3220293521881104, "loss": 1.4619, "nll_loss": 1.4486935138702393, "rewards/accuracies": 1.0, "rewards/chosen": -0.14522767066955566, "rewards/margins": 0.18697525560855865, "rewards/rejected": -0.3322029411792755, "step": 1014 }, { "epoch": 2.798066965826717, "grad_norm": 0.2327219545841217, "learning_rate": 7.190963256809069e-08, "log_odds_chosen": 2.1763741970062256, "log_odds_ratio": -0.11422119289636612, "logits/chosen": -0.7316464781761169, "logits/rejected": -1.4472882747650146, "logps/chosen": -1.285031795501709, "logps/rejected": -3.1746089458465576, "loss": 1.3298, "nll_loss": 1.3183457851409912, "rewards/accuracies": 1.0, "rewards/chosen": -0.12850318849086761, "rewards/margins": 0.18895772099494934, "rewards/rejected": -0.31746089458465576, "step": 1015 }, { "epoch": 2.8008284432171213, "grad_norm": 0.2188517302274704, "learning_rate": 7.001376803128041e-08, "log_odds_chosen": 2.2573673725128174, "log_odds_ratio": -0.10664264857769012, "logits/chosen": -0.7266748547554016, "logits/rejected": -1.5064034461975098, "logps/chosen": -1.3659861087799072, "logps/rejected": -3.3637351989746094, "loss": 1.4081, "nll_loss": 1.3973870277404785, "rewards/accuracies": 1.0, "rewards/chosen": -0.1365986168384552, "rewards/margins": 0.19977490603923798, "rewards/rejected": -0.33637353777885437, "step": 1016 }, { "epoch": 2.803589920607525, "grad_norm": 0.2149265706539154, "learning_rate": 6.81428753627239e-08, "log_odds_chosen": 1.9895561933517456, "log_odds_ratio": -0.14788132905960083, "logits/chosen": -0.6736932992935181, "logits/rejected": -1.5023159980773926, "logps/chosen": -1.428675889968872, "logps/rejected": -3.18630313873291, "loss": 1.4536, "nll_loss": 1.4387853145599365, "rewards/accuracies": 1.0, "rewards/chosen": -0.14286759495735168, "rewards/margins": 0.17576274275779724, "rewards/rejected": -0.3186303377151489, "step": 1017 }, { "epoch": 2.806351397997929, "grad_norm": 0.25016653537750244, "learning_rate": 6.629697378873839e-08, "log_odds_chosen": 1.7520558834075928, "log_odds_ratio": -0.16856959462165833, "logits/chosen": -0.8468390703201294, "logits/rejected": -1.345960259437561, "logps/chosen": -1.37800133228302, "logps/rejected": -2.8997609615325928, "loss": 1.3984, "nll_loss": 1.3815675973892212, "rewards/accuracies": 1.0, "rewards/chosen": -0.1378001570701599, "rewards/margins": 0.15217594802379608, "rewards/rejected": -0.2899760603904724, "step": 1018 }, { "epoch": 2.8091128753883328, "grad_norm": 0.22556914389133453, "learning_rate": 6.447608227881791e-08, "log_odds_chosen": 1.9512134790420532, "log_odds_ratio": -0.14954465627670288, "logits/chosen": -0.7097375988960266, "logits/rejected": -1.5652353763580322, "logps/chosen": -1.3805246353149414, "logps/rejected": -3.0939722061157227, "loss": 1.4026, "nll_loss": 1.3876054286956787, "rewards/accuracies": 1.0, "rewards/chosen": -0.13805246353149414, "rewards/margins": 0.17134475708007812, "rewards/rejected": -0.30939725041389465, "step": 1019 }, { "epoch": 2.8118743527787364, "grad_norm": 0.27078142762184143, "learning_rate": 6.268021954544095e-08, "log_odds_chosen": 2.5117132663726807, "log_odds_ratio": -0.11242151260375977, "logits/chosen": -0.7406948804855347, "logits/rejected": -1.457323670387268, "logps/chosen": -1.3857200145721436, "logps/rejected": -3.6457202434539795, "loss": 1.4133, "nll_loss": 1.402071475982666, "rewards/accuracies": 1.0, "rewards/chosen": -0.13857200741767883, "rewards/margins": 0.22600002586841583, "rewards/rejected": -0.36457204818725586, "step": 1020 }, { "epoch": 2.8146358301691405, "grad_norm": 0.26971858739852905, "learning_rate": 6.090940404387513e-08, "log_odds_chosen": 1.8318248987197876, "log_odds_ratio": -0.15712931752204895, "logits/chosen": -0.6660250425338745, "logits/rejected": -1.3423457145690918, "logps/chosen": -1.4494717121124268, "logps/rejected": -3.058114528656006, "loss": 1.4583, "nll_loss": 1.4425814151763916, "rewards/accuracies": 1.0, "rewards/chosen": -0.14494717121124268, "rewards/margins": 0.16086429357528687, "rewards/rejected": -0.30581146478652954, "step": 1021 }, { "epoch": 2.817397307559544, "grad_norm": 0.24719011783599854, "learning_rate": 5.9163653971989754e-08, "log_odds_chosen": 2.4703967571258545, "log_odds_ratio": -0.08853744715452194, "logits/chosen": -0.702930212020874, "logits/rejected": -1.5817415714263916, "logps/chosen": -1.380924105644226, "logps/rejected": -3.591203212738037, "loss": 1.3963, "nll_loss": 1.3874781131744385, "rewards/accuracies": 1.0, "rewards/chosen": -0.13809241354465485, "rewards/margins": 0.2210279256105423, "rewards/rejected": -0.35912033915519714, "step": 1022 }, { "epoch": 2.8201587849499483, "grad_norm": 0.2507234215736389, "learning_rate": 5.744298727006769e-08, "log_odds_chosen": 1.8166286945343018, "log_odds_ratio": -0.1712740659713745, "logits/chosen": -0.7535643577575684, "logits/rejected": -1.3443477153778076, "logps/chosen": -1.4253993034362793, "logps/rejected": -3.020448684692383, "loss": 1.4355, "nll_loss": 1.418355941772461, "rewards/accuracies": 1.0, "rewards/chosen": -0.14253994822502136, "rewards/margins": 0.1595049500465393, "rewards/rejected": -0.3020448684692383, "step": 1023 }, { "epoch": 2.822920262340352, "grad_norm": 0.2217988669872284, "learning_rate": 5.574742162062163e-08, "log_odds_chosen": 1.956099510192871, "log_odds_ratio": -0.15194211900234222, "logits/chosen": -0.7119158506393433, "logits/rejected": -1.4916445016860962, "logps/chosen": -1.4201419353485107, "logps/rejected": -3.1518564224243164, "loss": 1.4414, "nll_loss": 1.4262229204177856, "rewards/accuracies": 1.0, "rewards/chosen": -0.14201419055461884, "rewards/margins": 0.17317146062850952, "rewards/rejected": -0.3151856064796448, "step": 1024 }, { "epoch": 2.825681739730756, "grad_norm": 0.3136124014854431, "learning_rate": 5.407697444821169e-08, "log_odds_chosen": 2.107809543609619, "log_odds_ratio": -0.12612886726856232, "logits/chosen": -0.716467559337616, "logits/rejected": -1.3916617631912231, "logps/chosen": -1.3584463596343994, "logps/rejected": -3.213313579559326, "loss": 1.391, "nll_loss": 1.3783766031265259, "rewards/accuracies": 1.0, "rewards/chosen": -0.1358446478843689, "rewards/margins": 0.18548670411109924, "rewards/rejected": -0.32133132219314575, "step": 1025 }, { "epoch": 2.8284432171211598, "grad_norm": 0.23052355647087097, "learning_rate": 5.2431662919267825e-08, "log_odds_chosen": 2.232840061187744, "log_odds_ratio": -0.10710738599300385, "logits/chosen": -0.705387532711029, "logits/rejected": -1.5807311534881592, "logps/chosen": -1.4884169101715088, "logps/rejected": -3.4969425201416016, "loss": 1.4893, "nll_loss": 1.4785648584365845, "rewards/accuracies": 1.0, "rewards/chosen": -0.1488417088985443, "rewards/margins": 0.20085251331329346, "rewards/rejected": -0.34969422221183777, "step": 1026 }, { "epoch": 2.8312046945115634, "grad_norm": 0.3301798403263092, "learning_rate": 5.0811503941911314e-08, "log_odds_chosen": 1.8585410118103027, "log_odds_ratio": -0.16253486275672913, "logits/chosen": -0.7029575705528259, "logits/rejected": -1.392695665359497, "logps/chosen": -1.4178050756454468, "logps/rejected": -3.0501108169555664, "loss": 1.4271, "nll_loss": 1.4108389616012573, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417805254459381, "rewards/margins": 0.1632305532693863, "rewards/rejected": -0.3050110936164856, "step": 1027 }, { "epoch": 2.8339661719019675, "grad_norm": 0.24940410256385803, "learning_rate": 4.9216514165781885e-08, "log_odds_chosen": 2.162130832672119, "log_odds_ratio": -0.11459186673164368, "logits/chosen": -0.6942464113235474, "logits/rejected": -1.5830132961273193, "logps/chosen": -1.3941457271575928, "logps/rejected": -3.309208631515503, "loss": 1.4216, "nll_loss": 1.4101405143737793, "rewards/accuracies": 1.0, "rewards/chosen": -0.13941457867622375, "rewards/margins": 0.1915063112974167, "rewards/rejected": -0.33092087507247925, "step": 1028 }, { "epoch": 2.8367276492923716, "grad_norm": 0.24598082900047302, "learning_rate": 4.7646709981868376e-08, "log_odds_chosen": 2.1761245727539062, "log_odds_ratio": -0.12259933352470398, "logits/chosen": -0.7400150895118713, "logits/rejected": -1.492202877998352, "logps/chosen": -1.3855305910110474, "logps/rejected": -3.315711259841919, "loss": 1.4082, "nll_loss": 1.395961046218872, "rewards/accuracies": 1.0, "rewards/chosen": -0.13855305314064026, "rewards/margins": 0.19301806390285492, "rewards/rejected": -0.331571102142334, "step": 1029 }, { "epoch": 2.8394891266827753, "grad_norm": 0.22888736426830292, "learning_rate": 4.61021075223364e-08, "log_odds_chosen": 2.0536413192749023, "log_odds_ratio": -0.12501607835292816, "logits/chosen": -0.8160746097564697, "logits/rejected": -1.4835108518600464, "logps/chosen": -1.321547031402588, "logps/rejected": -3.1074142456054688, "loss": 1.3638, "nll_loss": 1.3513017892837524, "rewards/accuracies": 1.0, "rewards/chosen": -0.1321547031402588, "rewards/margins": 0.17858675122261047, "rewards/rejected": -0.3107414245605469, "step": 1030 }, { "epoch": 2.842250604073179, "grad_norm": 0.26659107208251953, "learning_rate": 4.458272266036706e-08, "log_odds_chosen": 1.9500880241394043, "log_odds_ratio": -0.13760273158550262, "logits/chosen": -0.6511709094047546, "logits/rejected": -1.3417994976043701, "logps/chosen": -1.3395155668258667, "logps/rejected": -3.0242364406585693, "loss": 1.3658, "nll_loss": 1.3520032167434692, "rewards/accuracies": 1.0, "rewards/chosen": -0.1339515596628189, "rewards/margins": 0.1684720814228058, "rewards/rejected": -0.3024236559867859, "step": 1031 }, { "epoch": 2.845012081463583, "grad_norm": 0.23956610262393951, "learning_rate": 4.308857100999042e-08, "log_odds_chosen": 2.424144744873047, "log_odds_ratio": -0.09766561537981033, "logits/chosen": -0.6855623722076416, "logits/rejected": -1.6018201112747192, "logps/chosen": -1.3732887506484985, "logps/rejected": -3.5285115242004395, "loss": 1.3997, "nll_loss": 1.3899810314178467, "rewards/accuracies": 1.0, "rewards/chosen": -0.1373288780450821, "rewards/margins": 0.21552230417728424, "rewards/rejected": -0.35285115242004395, "step": 1032 }, { "epoch": 2.8477735588539868, "grad_norm": 0.21250160038471222, "learning_rate": 4.161966792592592e-08, "log_odds_chosen": 1.6863996982574463, "log_odds_ratio": -0.18338753283023834, "logits/chosen": -0.6565060019493103, "logits/rejected": -1.2505011558532715, "logps/chosen": -1.3500611782073975, "logps/rejected": -2.801731586456299, "loss": 1.3748, "nll_loss": 1.3564273118972778, "rewards/accuracies": 1.0, "rewards/chosen": -0.1350061148405075, "rewards/margins": 0.1451670527458191, "rewards/rejected": -0.2801731526851654, "step": 1033 }, { "epoch": 2.850535036244391, "grad_norm": 0.22789491713047028, "learning_rate": 4.017602850342584e-08, "log_odds_chosen": 2.054748058319092, "log_odds_ratio": -0.1231355220079422, "logits/chosen": -0.6738651990890503, "logits/rejected": -1.4317106008529663, "logps/chosen": -1.4131054878234863, "logps/rejected": -3.2276337146759033, "loss": 1.4282, "nll_loss": 1.415932059288025, "rewards/accuracies": 1.0, "rewards/chosen": -0.14131054282188416, "rewards/margins": 0.18145282566547394, "rewards/rejected": -0.3227633833885193, "step": 1034 }, { "epoch": 2.8532965136347945, "grad_norm": 0.2413378357887268, "learning_rate": 3.8757667578119e-08, "log_odds_chosen": 2.269437551498413, "log_odds_ratio": -0.10816527903079987, "logits/chosen": -0.6924024224281311, "logits/rejected": -1.485842227935791, "logps/chosen": -1.4153399467468262, "logps/rejected": -3.436018466949463, "loss": 1.4335, "nll_loss": 1.4226492643356323, "rewards/accuracies": 1.0, "rewards/chosen": -0.1415340155363083, "rewards/margins": 0.20206783711910248, "rewards/rejected": -0.34360188245773315, "step": 1035 }, { "epoch": 2.8560579910251986, "grad_norm": 0.23704323172569275, "learning_rate": 3.736459972585815e-08, "log_odds_chosen": 1.8103829622268677, "log_odds_ratio": -0.1597844660282135, "logits/chosen": -0.6280055642127991, "logits/rejected": -1.4869545698165894, "logps/chosen": -1.5055298805236816, "logps/rejected": -3.110546112060547, "loss": 1.5181, "nll_loss": 1.5021302700042725, "rewards/accuracies": 1.0, "rewards/chosen": -0.15055298805236816, "rewards/margins": 0.160501629114151, "rewards/rejected": -0.31105464696884155, "step": 1036 }, { "epoch": 2.8588194684156023, "grad_norm": 0.21703602373600006, "learning_rate": 3.5996839262571194e-08, "log_odds_chosen": 1.7736772298812866, "log_odds_ratio": -0.16096659004688263, "logits/chosen": -0.7397993803024292, "logits/rejected": -1.391385555267334, "logps/chosen": -1.4275453090667725, "logps/rejected": -2.9790866374969482, "loss": 1.4486, "nll_loss": 1.4324570894241333, "rewards/accuracies": 1.0, "rewards/chosen": -0.14275452494621277, "rewards/margins": 0.15515413880348206, "rewards/rejected": -0.2979086637496948, "step": 1037 }, { "epoch": 2.861580945806006, "grad_norm": 0.2613905370235443, "learning_rate": 3.4654400244112654e-08, "log_odds_chosen": 2.0045812129974365, "log_odds_ratio": -0.14226770401000977, "logits/chosen": -0.6708114743232727, "logits/rejected": -1.4915835857391357, "logps/chosen": -1.421678066253662, "logps/rejected": -3.1922788619995117, "loss": 1.4444, "nll_loss": 1.4301918745040894, "rewards/accuracies": 1.0, "rewards/chosen": -0.1421678066253662, "rewards/margins": 0.1770600825548172, "rewards/rejected": -0.3192278742790222, "step": 1038 }, { "epoch": 2.86434242319641, "grad_norm": 0.24211828410625458, "learning_rate": 3.333729646612077e-08, "log_odds_chosen": 1.53237783908844, "log_odds_ratio": -0.2161731868982315, "logits/chosen": -0.706529974937439, "logits/rejected": -1.216195821762085, "logps/chosen": -1.4395180940628052, "logps/rejected": -2.7703676223754883, "loss": 1.4635, "nll_loss": 1.4418400526046753, "rewards/accuracies": 1.0, "rewards/chosen": -0.14395181834697723, "rewards/margins": 0.1330849528312683, "rewards/rejected": -0.27703675627708435, "step": 1039 }, { "epoch": 2.867103900586814, "grad_norm": 0.27627381682395935, "learning_rate": 3.204554146387456e-08, "log_odds_chosen": 2.1550707817077637, "log_odds_ratio": -0.11569668352603912, "logits/chosen": -0.7337977886199951, "logits/rejected": -1.7024574279785156, "logps/chosen": -1.3803796768188477, "logps/rejected": -3.2796034812927246, "loss": 1.4007, "nll_loss": 1.389117956161499, "rewards/accuracies": 1.0, "rewards/chosen": -0.13803797960281372, "rewards/margins": 0.18992236256599426, "rewards/rejected": -0.327960342168808, "step": 1040 }, { "epoch": 2.869865377977218, "grad_norm": 0.22937756776809692, "learning_rate": 3.077914851215585e-08, "log_odds_chosen": 2.169558525085449, "log_odds_ratio": -0.11814650893211365, "logits/chosen": -0.6977416276931763, "logits/rejected": -1.6600958108901978, "logps/chosen": -1.4013175964355469, "logps/rejected": -3.324887752532959, "loss": 1.4014, "nll_loss": 1.3896281719207764, "rewards/accuracies": 1.0, "rewards/chosen": -0.14013175666332245, "rewards/margins": 0.19235703349113464, "rewards/rejected": -0.3324888050556183, "step": 1041 }, { "epoch": 2.8726268553676215, "grad_norm": 0.22734908759593964, "learning_rate": 2.9538130625110796e-08, "log_odds_chosen": 2.3204684257507324, "log_odds_ratio": -0.10231014341115952, "logits/chosen": -0.7083406448364258, "logits/rejected": -1.6906741857528687, "logps/chosen": -1.415956735610962, "logps/rejected": -3.4903724193573, "loss": 1.4196, "nll_loss": 1.4093878269195557, "rewards/accuracies": 1.0, "rewards/chosen": -0.14159569144248962, "rewards/margins": 0.20744158327579498, "rewards/rejected": -0.3490372598171234, "step": 1042 }, { "epoch": 2.8753883327580256, "grad_norm": 0.2906034588813782, "learning_rate": 2.8322500556118847e-08, "log_odds_chosen": 2.091008424758911, "log_odds_ratio": -0.11896198242902756, "logits/chosen": -0.6563721895217896, "logits/rejected": -1.3930106163024902, "logps/chosen": -1.4288796186447144, "logps/rejected": -3.2848880290985107, "loss": 1.4461, "nll_loss": 1.4342501163482666, "rewards/accuracies": 1.0, "rewards/chosen": -0.14288794994354248, "rewards/margins": 0.1856008917093277, "rewards/rejected": -0.328488826751709, "step": 1043 }, { "epoch": 2.8781498101484293, "grad_norm": 0.250757098197937, "learning_rate": 2.7132270797659567e-08, "log_odds_chosen": 2.1962289810180664, "log_odds_ratio": -0.10955139994621277, "logits/chosen": -0.5953691601753235, "logits/rejected": -1.5709525346755981, "logps/chosen": -1.3853814601898193, "logps/rejected": -3.3294827938079834, "loss": 1.3895, "nll_loss": 1.3785563707351685, "rewards/accuracies": 1.0, "rewards/chosen": -0.13853813707828522, "rewards/margins": 0.19441016018390656, "rewards/rejected": -0.3329482972621918, "step": 1044 }, { "epoch": 2.8809112875388334, "grad_norm": 0.24565622210502625, "learning_rate": 2.5967453581185187e-08, "log_odds_chosen": 2.2447726726531982, "log_odds_ratio": -0.13254183530807495, "logits/chosen": -0.5632262825965881, "logits/rejected": -1.396523118019104, "logps/chosen": -1.4412177801132202, "logps/rejected": -3.4595718383789062, "loss": 1.4738, "nll_loss": 1.460578203201294, "rewards/accuracies": 1.0, "rewards/chosen": -0.14412179589271545, "rewards/margins": 0.20183539390563965, "rewards/rejected": -0.3459571599960327, "step": 1045 }, { "epoch": 2.883672764929237, "grad_norm": 0.24057286977767944, "learning_rate": 2.4828060876995462e-08, "log_odds_chosen": 1.8118979930877686, "log_odds_ratio": -0.15646016597747803, "logits/chosen": -0.6829215288162231, "logits/rejected": -1.4891915321350098, "logps/chosen": -1.4990609884262085, "logps/rejected": -3.1025707721710205, "loss": 1.5013, "nll_loss": 1.4856256246566772, "rewards/accuracies": 1.0, "rewards/chosen": -0.14990611374378204, "rewards/margins": 0.1603509783744812, "rewards/rejected": -0.31025710701942444, "step": 1046 }, { "epoch": 2.886434242319641, "grad_norm": 0.24169136583805084, "learning_rate": 2.371410439411248e-08, "log_odds_chosen": 2.278245687484741, "log_odds_ratio": -0.10662311315536499, "logits/chosen": -0.7540525197982788, "logits/rejected": -1.5213404893875122, "logps/chosen": -1.3615578413009644, "logps/rejected": -3.378927230834961, "loss": 1.3748, "nll_loss": 1.3641554117202759, "rewards/accuracies": 1.0, "rewards/chosen": -0.13615579903125763, "rewards/margins": 0.2017369419336319, "rewards/rejected": -0.3378927707672119, "step": 1047 }, { "epoch": 2.889195719710045, "grad_norm": 0.22865483164787292, "learning_rate": 2.262559558016325e-08, "log_odds_chosen": 1.9611331224441528, "log_odds_ratio": -0.1431179940700531, "logits/chosen": -0.697033166885376, "logits/rejected": -1.5238416194915771, "logps/chosen": -1.3502399921417236, "logps/rejected": -3.0625009536743164, "loss": 1.3791, "nll_loss": 1.3647691011428833, "rewards/accuracies": 1.0, "rewards/chosen": -0.13502401113510132, "rewards/margins": 0.17122608423233032, "rewards/rejected": -0.30625006556510925, "step": 1048 }, { "epoch": 2.8919571971004485, "grad_norm": 0.2690604329109192, "learning_rate": 2.1562545621259534e-08, "log_odds_chosen": 2.1337342262268066, "log_odds_ratio": -0.11663802713155746, "logits/chosen": -0.6461864113807678, "logits/rejected": -1.3793827295303345, "logps/chosen": -1.3172171115875244, "logps/rejected": -3.1804521083831787, "loss": 1.3564, "nll_loss": 1.344741940498352, "rewards/accuracies": 1.0, "rewards/chosen": -0.13172172009944916, "rewards/margins": 0.18632349371910095, "rewards/rejected": -0.3180451989173889, "step": 1049 }, { "epoch": 2.8947186744908526, "grad_norm": 0.24906614422798157, "learning_rate": 2.052496544188487e-08, "log_odds_chosen": 1.8743669986724854, "log_odds_ratio": -0.15791194140911102, "logits/chosen": -0.6495178937911987, "logits/rejected": -1.519452691078186, "logps/chosen": -1.4032065868377686, "logps/rejected": -3.046907901763916, "loss": 1.4234, "nll_loss": 1.4075757265090942, "rewards/accuracies": 1.0, "rewards/chosen": -0.14032067358493805, "rewards/margins": 0.16437013447284698, "rewards/rejected": -0.30469080805778503, "step": 1050 }, { "epoch": 2.8974801518812567, "grad_norm": 0.25790420174598694, "learning_rate": 1.9512865704780504e-08, "log_odds_chosen": 2.004434823989868, "log_odds_ratio": -0.142928808927536, "logits/chosen": -0.7044804096221924, "logits/rejected": -1.4181129932403564, "logps/chosen": -1.4486281871795654, "logps/rejected": -3.231778383255005, "loss": 1.4623, "nll_loss": 1.4480060338974, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448628157377243, "rewards/margins": 0.17831505835056305, "rewards/rejected": -0.32317784428596497, "step": 1051 }, { "epoch": 2.9002416292716604, "grad_norm": 0.2535795271396637, "learning_rate": 1.852625681083742e-08, "log_odds_chosen": 1.953382968902588, "log_odds_ratio": -0.1467381864786148, "logits/chosen": -0.8644619584083557, "logits/rejected": -1.5003637075424194, "logps/chosen": -1.419054627418518, "logps/rejected": -3.133068799972534, "loss": 1.4487, "nll_loss": 1.4340261220932007, "rewards/accuracies": 1.0, "rewards/chosen": -0.14190545678138733, "rewards/margins": 0.17140141129493713, "rewards/rejected": -0.31330686807632446, "step": 1052 }, { "epoch": 2.903003106662064, "grad_norm": 0.2612758278846741, "learning_rate": 1.7565148898988916e-08, "log_odds_chosen": 1.8363025188446045, "log_odds_ratio": -0.15685833990573883, "logits/chosen": -0.7294274568557739, "logits/rejected": -1.3612031936645508, "logps/chosen": -1.442458987236023, "logps/rejected": -3.0576303005218506, "loss": 1.448, "nll_loss": 1.4323320388793945, "rewards/accuracies": 1.0, "rewards/chosen": -0.144245907664299, "rewards/margins": 0.16151714324951172, "rewards/rejected": -0.3057630658149719, "step": 1053 }, { "epoch": 2.905764584052468, "grad_norm": 0.2650681436061859, "learning_rate": 1.6629551846104874e-08, "log_odds_chosen": 1.7293431758880615, "log_odds_ratio": -0.19584131240844727, "logits/chosen": -0.6814649701118469, "logits/rejected": -1.3605228662490845, "logps/chosen": -1.436675786972046, "logps/rejected": -2.958911418914795, "loss": 1.4487, "nll_loss": 1.4290745258331299, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436675786972046, "rewards/margins": 0.15222355723381042, "rewards/rejected": -0.295891135931015, "step": 1054 }, { "epoch": 2.908526061442872, "grad_norm": 0.24677792191505432, "learning_rate": 1.571947526689349e-08, "log_odds_chosen": 2.0139553546905518, "log_odds_ratio": -0.13746051490306854, "logits/chosen": -0.7538365125656128, "logits/rejected": -1.3094806671142578, "logps/chosen": -1.4475135803222656, "logps/rejected": -3.2367706298828125, "loss": 1.4678, "nll_loss": 1.4540915489196777, "rewards/accuracies": 1.0, "rewards/chosen": -0.14475136995315552, "rewards/margins": 0.17892570793628693, "rewards/rejected": -0.32367706298828125, "step": 1055 }, { "epoch": 2.911287538833276, "grad_norm": 0.2379024177789688, "learning_rate": 1.483492851379914e-08, "log_odds_chosen": 2.2087109088897705, "log_odds_ratio": -0.11113860458135605, "logits/chosen": -0.7356382012367249, "logits/rejected": -1.5890445709228516, "logps/chosen": -1.3960850238800049, "logps/rejected": -3.3559443950653076, "loss": 1.4029, "nll_loss": 1.391798973083496, "rewards/accuracies": 1.0, "rewards/chosen": -0.1396085023880005, "rewards/margins": 0.19598592817783356, "rewards/rejected": -0.33559444546699524, "step": 1056 }, { "epoch": 2.9140490162236796, "grad_norm": 0.2611464262008667, "learning_rate": 1.3975920676908838e-08, "log_odds_chosen": 1.786283016204834, "log_odds_ratio": -0.16793999075889587, "logits/chosen": -0.577727198600769, "logits/rejected": -1.3633073568344116, "logps/chosen": -1.44246244430542, "logps/rejected": -3.0122461318969727, "loss": 1.468, "nll_loss": 1.4512224197387695, "rewards/accuracies": 1.0, "rewards/chosen": -0.14424623548984528, "rewards/margins": 0.15697838366031647, "rewards/rejected": -0.30122464895248413, "step": 1057 }, { "epoch": 2.9168104936140837, "grad_norm": 0.27698665857315063, "learning_rate": 1.3142460583856487e-08, "log_odds_chosen": 2.1011810302734375, "log_odds_ratio": -0.12240156531333923, "logits/chosen": -0.7454802989959717, "logits/rejected": -1.307618498802185, "logps/chosen": -1.4059453010559082, "logps/rejected": -3.2645998001098633, "loss": 1.42, "nll_loss": 1.4077752828598022, "rewards/accuracies": 1.0, "rewards/chosen": -0.14059452712535858, "rewards/margins": 0.18586543202400208, "rewards/rejected": -0.32645997405052185, "step": 1058 }, { "epoch": 2.9195719710044874, "grad_norm": 0.258980929851532, "learning_rate": 1.2334556799734887e-08, "log_odds_chosen": 1.8398014307022095, "log_odds_ratio": -0.15507705509662628, "logits/chosen": -0.753607451915741, "logits/rejected": -1.2447738647460938, "logps/chosen": -1.3793818950653076, "logps/rejected": -2.9822983741760254, "loss": 1.4016, "nll_loss": 1.3861240148544312, "rewards/accuracies": 1.0, "rewards/chosen": -0.13793820142745972, "rewards/margins": 0.1602916270494461, "rewards/rejected": -0.29822981357574463, "step": 1059 }, { "epoch": 2.922333448394891, "grad_norm": 0.22700020670890808, "learning_rate": 1.1552217627004426e-08, "log_odds_chosen": 1.8766063451766968, "log_odds_ratio": -0.15902380645275116, "logits/chosen": -0.671968400478363, "logits/rejected": -1.3500280380249023, "logps/chosen": -1.3683315515518188, "logps/rejected": -3.0047004222869873, "loss": 1.4093, "nll_loss": 1.393393874168396, "rewards/accuracies": 1.0, "rewards/chosen": -0.13683316111564636, "rewards/margins": 0.16363686323165894, "rewards/rejected": -0.3004700243473053, "step": 1060 }, { "epoch": 2.925094925785295, "grad_norm": 0.22656705975532532, "learning_rate": 1.079545110541147e-08, "log_odds_chosen": 2.205451488494873, "log_odds_ratio": -0.11620151251554489, "logits/chosen": -0.6744479537010193, "logits/rejected": -1.4664885997772217, "logps/chosen": -1.3883947134017944, "logps/rejected": -3.344780445098877, "loss": 1.4119, "nll_loss": 1.400273084640503, "rewards/accuracies": 1.0, "rewards/chosen": -0.1388394832611084, "rewards/margins": 0.19563856720924377, "rewards/rejected": -0.3344780206680298, "step": 1061 }, { "epoch": 2.927856403175699, "grad_norm": 0.2419911026954651, "learning_rate": 1.006426501190233e-08, "log_odds_chosen": 2.1936004161834717, "log_odds_ratio": -0.1106862723827362, "logits/chosen": -0.6947401762008667, "logits/rejected": -1.6190247535705566, "logps/chosen": -1.3645473718643188, "logps/rejected": -3.3015763759613037, "loss": 1.3777, "nll_loss": 1.366647720336914, "rewards/accuracies": 1.0, "rewards/chosen": -0.1364547312259674, "rewards/margins": 0.19370289146900177, "rewards/rejected": -0.33015763759613037, "step": 1062 }, { "epoch": 2.930617880566103, "grad_norm": 0.2288430631160736, "learning_rate": 9.358666860545817e-09, "log_odds_chosen": 2.2677648067474365, "log_odds_ratio": -0.10382355749607086, "logits/chosen": -0.6226020455360413, "logits/rejected": -1.4943479299545288, "logps/chosen": -1.3931465148925781, "logps/rejected": -3.410277843475342, "loss": 1.4084, "nll_loss": 1.3979921340942383, "rewards/accuracies": 1.0, "rewards/chosen": -0.139314666390419, "rewards/margins": 0.20171311497688293, "rewards/rejected": -0.34102776646614075, "step": 1063 }, { "epoch": 2.9333793579565066, "grad_norm": 0.2356155663728714, "learning_rate": 8.67866390245442e-09, "log_odds_chosen": 1.8083369731903076, "log_odds_ratio": -0.15998531877994537, "logits/chosen": -0.7477939128875732, "logits/rejected": -1.3109685182571411, "logps/chosen": -1.394054651260376, "logps/rejected": -2.970384120941162, "loss": 1.4274, "nll_loss": 1.4114125967025757, "rewards/accuracies": 1.0, "rewards/chosen": -0.13940547406673431, "rewards/margins": 0.1576329618692398, "rewards/rejected": -0.2970384359359741, "step": 1064 }, { "epoch": 2.9361408353469107, "grad_norm": 0.24004729092121124, "learning_rate": 8.024263125710751e-09, "log_odds_chosen": 2.020559787750244, "log_odds_ratio": -0.1439000368118286, "logits/chosen": -0.7140991687774658, "logits/rejected": -1.4966161251068115, "logps/chosen": -1.4045417308807373, "logps/rejected": -3.189021587371826, "loss": 1.4262, "nll_loss": 1.4117966890335083, "rewards/accuracies": 1.0, "rewards/chosen": -0.14045415818691254, "rewards/margins": 0.17844797670841217, "rewards/rejected": -0.3189021348953247, "step": 1065 }, { "epoch": 2.9389023127373144, "grad_norm": 0.2469998449087143, "learning_rate": 7.3954712552953835e-09, "log_odds_chosen": 2.1445934772491455, "log_odds_ratio": -0.12714563310146332, "logits/chosen": -0.692491352558136, "logits/rejected": -1.4322024583816528, "logps/chosen": -1.4771634340286255, "logps/rejected": -3.40108585357666, "loss": 1.4873, "nll_loss": 1.4746204614639282, "rewards/accuracies": 1.0, "rewards/chosen": -0.14771634340286255, "rewards/margins": 0.1923922747373581, "rewards/rejected": -0.34010857343673706, "step": 1066 }, { "epoch": 2.941663790127718, "grad_norm": 0.2169053703546524, "learning_rate": 6.792294753017181e-09, "log_odds_chosen": 1.9535696506500244, "log_odds_ratio": -0.16166673600673676, "logits/chosen": -0.7169552445411682, "logits/rejected": -1.3493878841400146, "logps/chosen": -1.3685996532440186, "logps/rejected": -3.086214065551758, "loss": 1.3847, "nll_loss": 1.3685096502304077, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368599683046341, "rewards/margins": 0.17176145315170288, "rewards/rejected": -0.3086214065551758, "step": 1067 }, { "epoch": 2.944425267518122, "grad_norm": 0.22649963200092316, "learning_rate": 6.214739817448634e-09, "log_odds_chosen": 1.9070916175842285, "log_odds_ratio": -0.16096433997154236, "logits/chosen": -0.6618258357048035, "logits/rejected": -1.24991774559021, "logps/chosen": -1.4187346696853638, "logps/rejected": -3.100508689880371, "loss": 1.4392, "nll_loss": 1.423121690750122, "rewards/accuracies": 1.0, "rewards/chosen": -0.14187347888946533, "rewards/margins": 0.16817741096019745, "rewards/rejected": -0.310050904750824, "step": 1068 }, { "epoch": 2.9471867449085263, "grad_norm": 0.24362997710704803, "learning_rate": 5.662812383859795e-09, "log_odds_chosen": 2.271414041519165, "log_odds_ratio": -0.11012668162584305, "logits/chosen": -0.6633353233337402, "logits/rejected": -1.4905028343200684, "logps/chosen": -1.3171957731246948, "logps/rejected": -3.3101119995117188, "loss": 1.3428, "nll_loss": 1.3318297863006592, "rewards/accuracies": 1.0, "rewards/chosen": -0.13171958923339844, "rewards/margins": 0.19929160177707672, "rewards/rejected": -0.3310111463069916, "step": 1069 }, { "epoch": 2.94994822229893, "grad_norm": 0.2065637707710266, "learning_rate": 5.136518124159162e-09, "log_odds_chosen": 2.032348394393921, "log_odds_ratio": -0.14922606945037842, "logits/chosen": -0.6291341781616211, "logits/rejected": -1.406713604927063, "logps/chosen": -1.36196768283844, "logps/rejected": -3.1446444988250732, "loss": 1.3926, "nll_loss": 1.3776776790618896, "rewards/accuracies": 1.0, "rewards/chosen": -0.1361967772245407, "rewards/margins": 0.1782676875591278, "rewards/rejected": -0.31446442008018494, "step": 1070 }, { "epoch": 2.9527096996893336, "grad_norm": 0.27329304814338684, "learning_rate": 4.63586244683456e-09, "log_odds_chosen": 1.687107801437378, "log_odds_ratio": -0.17623448371887207, "logits/chosen": -0.6939666271209717, "logits/rejected": -1.3353276252746582, "logps/chosen": -1.451897382736206, "logps/rejected": -2.9258344173431396, "loss": 1.4677, "nll_loss": 1.450042724609375, "rewards/accuracies": 1.0, "rewards/chosen": -0.14518973231315613, "rewards/margins": 0.14739371836185455, "rewards/rejected": -0.2925834655761719, "step": 1071 }, { "epoch": 2.9554711770797377, "grad_norm": 0.27351564168930054, "learning_rate": 4.160850496897906e-09, "log_odds_chosen": 2.0957698822021484, "log_odds_ratio": -0.13688980042934418, "logits/chosen": -0.7693864703178406, "logits/rejected": -1.4246867895126343, "logps/chosen": -1.445740818977356, "logps/rejected": -3.3158981800079346, "loss": 1.4548, "nll_loss": 1.441150188446045, "rewards/accuracies": 1.0, "rewards/chosen": -0.1445740908384323, "rewards/margins": 0.18701574206352234, "rewards/rejected": -0.33158981800079346, "step": 1072 }, { "epoch": 2.9582326544701414, "grad_norm": 0.21946768462657928, "learning_rate": 3.7114871558313614e-09, "log_odds_chosen": 2.1303412914276123, "log_odds_ratio": -0.11308460682630539, "logits/chosen": -0.7500283718109131, "logits/rejected": -1.5380302667617798, "logps/chosen": -1.4256103038787842, "logps/rejected": -3.314450263977051, "loss": 1.4426, "nll_loss": 1.4312454462051392, "rewards/accuracies": 1.0, "rewards/chosen": -0.14256104826927185, "rewards/margins": 0.18888401985168457, "rewards/rejected": -0.3314450979232788, "step": 1073 }, { "epoch": 2.9609941318605455, "grad_norm": 0.4080851376056671, "learning_rate": 3.287777041539042e-09, "log_odds_chosen": 1.771087408065796, "log_odds_ratio": -0.17736610770225525, "logits/chosen": -0.7575640678405762, "logits/rejected": -1.2754969596862793, "logps/chosen": -1.3221291303634644, "logps/rejected": -2.852849006652832, "loss": 1.361, "nll_loss": 1.343238353729248, "rewards/accuracies": 1.0, "rewards/chosen": -0.13221290707588196, "rewards/margins": 0.15307198464870453, "rewards/rejected": -0.2852849066257477, "step": 1074 }, { "epoch": 2.963755609250949, "grad_norm": 0.21535170078277588, "learning_rate": 2.8897245082978863e-09, "log_odds_chosen": 2.3666675090789795, "log_odds_ratio": -0.10730992257595062, "logits/chosen": -0.7090921998023987, "logits/rejected": -1.6503901481628418, "logps/chosen": -1.3907458782196045, "logps/rejected": -3.5077884197235107, "loss": 1.3979, "nll_loss": 1.3871413469314575, "rewards/accuracies": 1.0, "rewards/chosen": -0.13907457888126373, "rewards/margins": 0.2117042988538742, "rewards/rejected": -0.35077887773513794, "step": 1075 }, { "epoch": 2.9665170866413533, "grad_norm": 0.26108136773109436, "learning_rate": 2.5173336467135266e-09, "log_odds_chosen": 2.2342419624328613, "log_odds_ratio": -0.1081056222319603, "logits/chosen": -0.7811664938926697, "logits/rejected": -1.55881929397583, "logps/chosen": -1.35188627243042, "logps/rejected": -3.324615716934204, "loss": 1.3672, "nll_loss": 1.3563470840454102, "rewards/accuracies": 1.0, "rewards/chosen": -0.13518862426280975, "rewards/margins": 0.19727298617362976, "rewards/rejected": -0.3324615955352783, "step": 1076 }, { "epoch": 2.969278564031757, "grad_norm": 0.23990552127361298, "learning_rate": 2.170608283677822e-09, "log_odds_chosen": 2.1579220294952393, "log_odds_ratio": -0.12064395099878311, "logits/chosen": -0.8259487748146057, "logits/rejected": -1.4697637557983398, "logps/chosen": -1.339401125907898, "logps/rejected": -3.2319252490997314, "loss": 1.37, "nll_loss": 1.3579620122909546, "rewards/accuracies": 1.0, "rewards/chosen": -0.13394011557102203, "rewards/margins": 0.18925237655639648, "rewards/rejected": -0.3231925070285797, "step": 1077 }, { "epoch": 2.9720400414221606, "grad_norm": 0.24461615085601807, "learning_rate": 1.8495519823308329e-09, "log_odds_chosen": 2.347550630569458, "log_odds_ratio": -0.09764161705970764, "logits/chosen": -0.7218992114067078, "logits/rejected": -1.6028780937194824, "logps/chosen": -1.4019834995269775, "logps/rejected": -3.496467351913452, "loss": 1.4226, "nll_loss": 1.412825345993042, "rewards/accuracies": 1.0, "rewards/chosen": -0.14019834995269775, "rewards/margins": 0.20944838225841522, "rewards/rejected": -0.34964674711227417, "step": 1078 }, { "epoch": 2.9748015188125647, "grad_norm": 0.23573514819145203, "learning_rate": 1.5541680420227968e-09, "log_odds_chosen": 1.7787182331085205, "log_odds_ratio": -0.15938720107078552, "logits/chosen": -0.7991430759429932, "logits/rejected": -1.4347333908081055, "logps/chosen": -1.376333475112915, "logps/rejected": -2.9125349521636963, "loss": 1.3967, "nll_loss": 1.380737543106079, "rewards/accuracies": 1.0, "rewards/chosen": -0.13763335347175598, "rewards/margins": 0.1536201387643814, "rewards/rejected": -0.2912535071372986, "step": 1079 }, { "epoch": 2.977562996202969, "grad_norm": 0.30059483647346497, "learning_rate": 1.284459498280266e-09, "log_odds_chosen": 2.2086105346679688, "log_odds_ratio": -0.11442863196134567, "logits/chosen": -0.6320897936820984, "logits/rejected": -1.3481775522232056, "logps/chosen": -1.3569841384887695, "logps/rejected": -3.30661940574646, "loss": 1.3688, "nll_loss": 1.3573298454284668, "rewards/accuracies": 1.0, "rewards/chosen": -0.13569840788841248, "rewards/margins": 0.19496355950832367, "rewards/rejected": -0.33066195249557495, "step": 1080 }, { "epoch": 2.9803244735933725, "grad_norm": 0.22881917655467987, "learning_rate": 1.0404291227764097e-09, "log_odds_chosen": 2.115375518798828, "log_odds_ratio": -0.12139271199703217, "logits/chosen": -0.713450014591217, "logits/rejected": -1.55438232421875, "logps/chosen": -1.4363725185394287, "logps/rejected": -3.317627429962158, "loss": 1.4406, "nll_loss": 1.4284732341766357, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436372548341751, "rewards/margins": 0.18812552094459534, "rewards/rejected": -0.33176276087760925, "step": 1081 }, { "epoch": 2.983085950983776, "grad_norm": 0.23508727550506592, "learning_rate": 8.220794233004814e-10, "log_odds_chosen": 2.353053569793701, "log_odds_ratio": -0.11799976229667664, "logits/chosen": -0.7291232943534851, "logits/rejected": -1.4448521137237549, "logps/chosen": -1.3283522129058838, "logps/rejected": -3.41225528717041, "loss": 1.3663, "nll_loss": 1.354485034942627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1328352391719818, "rewards/margins": 0.20839029550552368, "rewards/rejected": -0.3412255346775055, "step": 1082 }, { "epoch": 2.9858474283741803, "grad_norm": 0.23063348233699799, "learning_rate": 6.294126437336734e-10, "log_odds_chosen": 2.2829418182373047, "log_odds_ratio": -0.09965363144874573, "logits/chosen": -0.7663443684577942, "logits/rejected": -1.5230656862258911, "logps/chosen": -1.4195582866668701, "logps/rejected": -3.4559733867645264, "loss": 1.43, "nll_loss": 1.4200561046600342, "rewards/accuracies": 1.0, "rewards/chosen": -0.14195583760738373, "rewards/margins": 0.20364150404930115, "rewards/rejected": -0.3455973267555237, "step": 1083 }, { "epoch": 2.988608905764584, "grad_norm": 0.24012912809848785, "learning_rate": 4.624307640249681e-10, "log_odds_chosen": 2.066162586212158, "log_odds_ratio": -0.13231447339057922, "logits/chosen": -0.773321807384491, "logits/rejected": -1.6320139169692993, "logps/chosen": -1.44185209274292, "logps/rejected": -3.273061990737915, "loss": 1.4529, "nll_loss": 1.4396920204162598, "rewards/accuracies": 1.0, "rewards/chosen": -0.14418521523475647, "rewards/margins": 0.18312101066112518, "rewards/rejected": -0.32730618119239807, "step": 1084 }, { "epoch": 2.991370383154988, "grad_norm": 0.22926434874534607, "learning_rate": 3.2113550017198734e-10, "log_odds_chosen": 2.1194610595703125, "log_odds_ratio": -0.12147892266511917, "logits/chosen": -0.7365501523017883, "logits/rejected": -1.4753718376159668, "logps/chosen": -1.3296489715576172, "logps/rejected": -3.1818292140960693, "loss": 1.3702, "nll_loss": 1.3580681085586548, "rewards/accuracies": 1.0, "rewards/chosen": -0.13296489417552948, "rewards/margins": 0.18521800637245178, "rewards/rejected": -0.31818294525146484, "step": 1085 }, { "epoch": 2.9941318605453917, "grad_norm": 0.2311965674161911, "learning_rate": 2.0552830420184077e-10, "log_odds_chosen": 2.0617785453796387, "log_odds_ratio": -0.15817579627037048, "logits/chosen": -0.6742764115333557, "logits/rejected": -1.2439779043197632, "logps/chosen": -1.2953776121139526, "logps/rejected": -3.095350742340088, "loss": 1.3197, "nll_loss": 1.3038488626480103, "rewards/accuracies": 1.0, "rewards/chosen": -0.12953776121139526, "rewards/margins": 0.17999732494354248, "rewards/rejected": -0.30953508615493774, "step": 1086 }, { "epoch": 2.996893337935796, "grad_norm": 0.22364288568496704, "learning_rate": 1.1561036415752568e-10, "log_odds_chosen": 2.2161312103271484, "log_odds_ratio": -0.13474784791469574, "logits/chosen": -0.6817208528518677, "logits/rejected": -1.3339648246765137, "logps/chosen": -1.3357588052749634, "logps/rejected": -3.2936766147613525, "loss": 1.3758, "nll_loss": 1.3622870445251465, "rewards/accuracies": 1.0, "rewards/chosen": -0.13357587158679962, "rewards/margins": 0.19579178094863892, "rewards/rejected": -0.32936763763427734, "step": 1087 }, { "epoch": 2.9996548153261995, "grad_norm": 0.22428768873214722, "learning_rate": 5.138260408488194e-11, "log_odds_chosen": 2.1880013942718506, "log_odds_ratio": -0.12593357264995575, "logits/chosen": -0.6923436522483826, "logits/rejected": -1.4497140645980835, "logps/chosen": -1.46262788772583, "logps/rejected": -3.4268667697906494, "loss": 1.4771, "nll_loss": 1.4645458459854126, "rewards/accuracies": 1.0, "rewards/chosen": -0.14626279473304749, "rewards/margins": 0.19642390310764313, "rewards/rejected": -0.3426866829395294, "step": 1088 }, { "epoch": 3.0, "grad_norm": 0.5826578736305237, "learning_rate": 1.2845684023432559e-11, "log_odds_chosen": 2.5953662395477295, "log_odds_ratio": -0.07196575403213501, "logits/chosen": -0.773766040802002, "logits/rejected": -1.2849546670913696, "logps/chosen": -1.1473791599273682, "logps/rejected": -3.39491868019104, "loss": 1.1904, "nll_loss": 1.1832088232040405, "rewards/accuracies": 1.0, "rewards/chosen": -0.11473792046308517, "rewards/margins": 0.2247539460659027, "rewards/rejected": -0.3394918739795685, "step": 1089 } ], "logging_steps": 1, "max_steps": 1089, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }