{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3783224024853298, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002761477390403866, "grad_norm": 0.6915842294692993, "learning_rate": 0.0, "log_odds_chosen": 0.39300817251205444, "log_odds_ratio": -0.5194607377052307, "logits/chosen": -0.6593188047409058, "logits/rejected": -0.11274649202823639, "logps/chosen": -1.9585602283477783, "logps/rejected": -2.3031246662139893, "loss": 2.2362, "nll_loss": 2.1842355728149414, "rewards/accuracies": 1.0, "rewards/chosen": -0.1958560198545456, "rewards/margins": 0.034456461668014526, "rewards/rejected": -0.23031246662139893, "step": 1 }, { "epoch": 0.005522954780807732, "grad_norm": 0.8511083722114563, "learning_rate": 4.587155963302753e-08, "log_odds_chosen": 0.4412614703178406, "log_odds_ratio": -0.49888327717781067, "logits/chosen": -0.6945286393165588, "logits/rejected": -0.08449751883745193, "logps/chosen": -1.9912033081054688, "logps/rejected": -2.382397413253784, "loss": 2.275, "nll_loss": 2.2251474857330322, "rewards/accuracies": 1.0, "rewards/chosen": -0.19912034273147583, "rewards/margins": 0.0391194187104702, "rewards/rejected": -0.23823975026607513, "step": 2 }, { "epoch": 0.008284432171211598, "grad_norm": 0.6988061666488647, "learning_rate": 9.174311926605506e-08, "log_odds_chosen": 0.49348145723342896, "log_odds_ratio": -0.4824022948741913, "logits/chosen": -0.7197601795196533, "logits/rejected": -0.13051480054855347, "logps/chosen": -1.9484779834747314, "logps/rejected": -2.385488271713257, "loss": 2.1977, "nll_loss": 2.149470567703247, "rewards/accuracies": 1.0, "rewards/chosen": -0.19484779238700867, "rewards/margins": 0.04370103031396866, "rewards/rejected": -0.23854880034923553, "step": 3 }, { "epoch": 0.011045909561615464, "grad_norm": 0.6778914928436279, "learning_rate": 1.376146788990826e-07, "log_odds_chosen": 0.4488891661167145, "log_odds_ratio": -0.5013620853424072, "logits/chosen": -0.6518482565879822, "logits/rejected": -0.08967436850070953, "logps/chosen": -1.9699251651763916, "logps/rejected": -2.3685381412506104, "loss": 2.204, "nll_loss": 2.1538803577423096, "rewards/accuracies": 0.875, "rewards/chosen": -0.19699251651763916, "rewards/margins": 0.0398612916469574, "rewards/rejected": -0.23685382306575775, "step": 4 }, { "epoch": 0.013807386952019331, "grad_norm": 0.7014802694320679, "learning_rate": 1.8348623853211012e-07, "log_odds_chosen": 0.4621143639087677, "log_odds_ratio": -0.5008962750434875, "logits/chosen": -0.6857364177703857, "logits/rejected": -0.15241342782974243, "logps/chosen": -1.9302603006362915, "logps/rejected": -2.335421562194824, "loss": 2.1786, "nll_loss": 2.128532648086548, "rewards/accuracies": 1.0, "rewards/chosen": -0.19302603602409363, "rewards/margins": 0.04051613062620163, "rewards/rejected": -0.23354215919971466, "step": 5 }, { "epoch": 0.016568864342423197, "grad_norm": 0.6745293140411377, "learning_rate": 2.2935779816513764e-07, "log_odds_chosen": 0.37698638439178467, "log_odds_ratio": -0.5254199504852295, "logits/chosen": -0.6411757469177246, "logits/rejected": -0.2817387282848358, "logps/chosen": -1.9658446311950684, "logps/rejected": -2.2975409030914307, "loss": 2.2215, "nll_loss": 2.168914794921875, "rewards/accuracies": 1.0, "rewards/chosen": -0.19658446311950684, "rewards/margins": 0.03316962346434593, "rewards/rejected": -0.22975412011146545, "step": 6 }, { "epoch": 0.019330341732827064, "grad_norm": 0.6645856499671936, "learning_rate": 2.752293577981652e-07, "log_odds_chosen": 0.22311115264892578, "log_odds_ratio": -0.5942606329917908, "logits/chosen": -0.5768107175827026, "logits/rejected": -0.23452866077423096, "logps/chosen": -2.0238585472106934, "logps/rejected": -2.223414421081543, "loss": 2.2796, "nll_loss": 2.2201786041259766, "rewards/accuracies": 0.875, "rewards/chosen": -0.20238585770130157, "rewards/margins": 0.019955584779381752, "rewards/rejected": -0.22234144806861877, "step": 7 }, { "epoch": 0.022091819123230928, "grad_norm": 0.7377482652664185, "learning_rate": 3.211009174311927e-07, "log_odds_chosen": 0.39164018630981445, "log_odds_ratio": -0.5184764266014099, "logits/chosen": -0.566084623336792, "logits/rejected": -0.12580394744873047, "logps/chosen": -2.0374059677124023, "logps/rejected": -2.3862533569335938, "loss": 2.3026, "nll_loss": 2.2507545948028564, "rewards/accuracies": 1.0, "rewards/chosen": -0.20374059677124023, "rewards/margins": 0.0348847433924675, "rewards/rejected": -0.23862534761428833, "step": 8 }, { "epoch": 0.024853296513634795, "grad_norm": 0.7108282446861267, "learning_rate": 3.6697247706422023e-07, "log_odds_chosen": 0.6321737170219421, "log_odds_ratio": -0.4328705072402954, "logits/chosen": -0.7941892743110657, "logits/rejected": -0.22760047018527985, "logps/chosen": -1.9183871746063232, "logps/rejected": -2.4786453247070312, "loss": 2.1856, "nll_loss": 2.142301082611084, "rewards/accuracies": 1.0, "rewards/chosen": -0.19183871150016785, "rewards/margins": 0.05602581799030304, "rewards/rejected": -0.24786454439163208, "step": 9 }, { "epoch": 0.027614773904038662, "grad_norm": 0.8324993252754211, "learning_rate": 4.128440366972478e-07, "log_odds_chosen": 0.39953452348709106, "log_odds_ratio": -0.5179459452629089, "logits/chosen": -0.7621469497680664, "logits/rejected": -0.25276613235473633, "logps/chosen": -2.0076873302459717, "logps/rejected": -2.3631021976470947, "loss": 2.2827, "nll_loss": 2.2309515476226807, "rewards/accuracies": 1.0, "rewards/chosen": -0.20076872408390045, "rewards/margins": 0.03554149717092514, "rewards/rejected": -0.23631024360656738, "step": 10 }, { "epoch": 0.030376251294442526, "grad_norm": 0.7557547092437744, "learning_rate": 4.587155963302753e-07, "log_odds_chosen": 0.5315287113189697, "log_odds_ratio": -0.4713534414768219, "logits/chosen": -0.6893935203552246, "logits/rejected": -0.10655307024717331, "logps/chosen": -1.9816298484802246, "logps/rejected": -2.457181930541992, "loss": 2.257, "nll_loss": 2.209847927093506, "rewards/accuracies": 0.875, "rewards/chosen": -0.1981630027294159, "rewards/margins": 0.04755519703030586, "rewards/rejected": -0.24571821093559265, "step": 11 }, { "epoch": 0.03313772868484639, "grad_norm": 0.7169145941734314, "learning_rate": 5.045871559633028e-07, "log_odds_chosen": 0.38786107301712036, "log_odds_ratio": -0.5263106822967529, "logits/chosen": -0.6816898584365845, "logits/rejected": -0.0623253732919693, "logps/chosen": -1.9207288026809692, "logps/rejected": -2.262866973876953, "loss": 2.1985, "nll_loss": 2.145916223526001, "rewards/accuracies": 0.875, "rewards/chosen": -0.19207286834716797, "rewards/margins": 0.0342138335108757, "rewards/rejected": -0.22628670930862427, "step": 12 }, { "epoch": 0.03589920607525026, "grad_norm": 0.6504296660423279, "learning_rate": 5.504587155963304e-07, "log_odds_chosen": 0.47358548641204834, "log_odds_ratio": -0.48640355467796326, "logits/chosen": -0.6333162784576416, "logits/rejected": -0.1628946214914322, "logps/chosen": -1.867828369140625, "logps/rejected": -2.280285596847534, "loss": 2.1385, "nll_loss": 2.089879035949707, "rewards/accuracies": 1.0, "rewards/chosen": -0.1867828369140625, "rewards/margins": 0.0412457212805748, "rewards/rejected": -0.2280285507440567, "step": 13 }, { "epoch": 0.03866068346565413, "grad_norm": 0.83363276720047, "learning_rate": 5.963302752293579e-07, "log_odds_chosen": 0.4213864207267761, "log_odds_ratio": -0.521136999130249, "logits/chosen": -0.7305208444595337, "logits/rejected": -0.08356200903654099, "logps/chosen": -2.022002935409546, "logps/rejected": -2.3972954750061035, "loss": 2.2795, "nll_loss": 2.2273383140563965, "rewards/accuracies": 0.75, "rewards/chosen": -0.2022002935409546, "rewards/margins": 0.03752923756837845, "rewards/rejected": -0.23972955346107483, "step": 14 }, { "epoch": 0.04142216085605799, "grad_norm": 0.8258711099624634, "learning_rate": 6.422018348623854e-07, "log_odds_chosen": 0.5449070930480957, "log_odds_ratio": -0.4581969678401947, "logits/chosen": -0.8105592131614685, "logits/rejected": 0.08067083358764648, "logps/chosen": -1.9474968910217285, "logps/rejected": -2.428217649459839, "loss": 2.2093, "nll_loss": 2.163527488708496, "rewards/accuracies": 1.0, "rewards/chosen": -0.19474971294403076, "rewards/margins": 0.04807208105921745, "rewards/rejected": -0.24282175302505493, "step": 15 }, { "epoch": 0.044183638246461855, "grad_norm": 0.7511261701583862, "learning_rate": 6.880733944954129e-07, "log_odds_chosen": 0.4355267882347107, "log_odds_ratio": -0.5088008046150208, "logits/chosen": -0.7654774188995361, "logits/rejected": 0.09955525398254395, "logps/chosen": -1.873271107673645, "logps/rejected": -2.249717950820923, "loss": 2.1545, "nll_loss": 2.1036317348480225, "rewards/accuracies": 0.875, "rewards/chosen": -0.18732713162899017, "rewards/margins": 0.03764466941356659, "rewards/rejected": -0.22497178614139557, "step": 16 }, { "epoch": 0.04694511563686572, "grad_norm": 0.7405815124511719, "learning_rate": 7.339449541284405e-07, "log_odds_chosen": 0.49768519401550293, "log_odds_ratio": -0.4804110527038574, "logits/chosen": -0.6771326661109924, "logits/rejected": 0.09095388650894165, "logps/chosen": -1.9401291608810425, "logps/rejected": -2.3801112174987793, "loss": 2.1836, "nll_loss": 2.1355254650115967, "rewards/accuracies": 1.0, "rewards/chosen": -0.19401292502880096, "rewards/margins": 0.04399820417165756, "rewards/rejected": -0.23801112174987793, "step": 17 }, { "epoch": 0.04970659302726959, "grad_norm": 0.7064201831817627, "learning_rate": 7.79816513761468e-07, "log_odds_chosen": 0.6101190447807312, "log_odds_ratio": -0.43982943892478943, "logits/chosen": -0.6823788285255432, "logits/rejected": -0.1550363153219223, "logps/chosen": -1.9634783267974854, "logps/rejected": -2.509146213531494, "loss": 2.2062, "nll_loss": 2.162249803543091, "rewards/accuracies": 1.0, "rewards/chosen": -0.19634784758090973, "rewards/margins": 0.05456679314374924, "rewards/rejected": -0.25091463327407837, "step": 18 }, { "epoch": 0.05246807041767346, "grad_norm": 0.709898829460144, "learning_rate": 8.256880733944956e-07, "log_odds_chosen": 0.22375424206256866, "log_odds_ratio": -0.5918958783149719, "logits/chosen": -0.6519614458084106, "logits/rejected": -0.030727151781320572, "logps/chosen": -1.9863125085830688, "logps/rejected": -2.1811602115631104, "loss": 2.2528, "nll_loss": 2.193619966506958, "rewards/accuracies": 0.875, "rewards/chosen": -0.19863125681877136, "rewards/margins": 0.01948476769030094, "rewards/rejected": -0.21811603009700775, "step": 19 }, { "epoch": 0.055229547808077324, "grad_norm": 0.7068823575973511, "learning_rate": 8.71559633027523e-07, "log_odds_chosen": 0.4315674602985382, "log_odds_ratio": -0.5065562129020691, "logits/chosen": -0.6818419694900513, "logits/rejected": -0.010788477957248688, "logps/chosen": -1.9227614402770996, "logps/rejected": -2.3006279468536377, "loss": 2.205, "nll_loss": 2.1543147563934326, "rewards/accuracies": 1.0, "rewards/chosen": -0.19227616488933563, "rewards/margins": 0.03778664767742157, "rewards/rejected": -0.23006278276443481, "step": 20 }, { "epoch": 0.057991025198481184, "grad_norm": 0.7781582474708557, "learning_rate": 9.174311926605506e-07, "log_odds_chosen": 0.2822119891643524, "log_odds_ratio": -0.5701829195022583, "logits/chosen": -0.5542811155319214, "logits/rejected": 0.0044986791908741, "logps/chosen": -2.092810869216919, "logps/rejected": -2.3459970951080322, "loss": 2.339, "nll_loss": 2.2819888591766357, "rewards/accuracies": 0.75, "rewards/chosen": -0.2092810869216919, "rewards/margins": 0.025318622589111328, "rewards/rejected": -0.23459972441196442, "step": 21 }, { "epoch": 0.06075250258888505, "grad_norm": 0.7080368399620056, "learning_rate": 9.633027522935782e-07, "log_odds_chosen": 0.46356773376464844, "log_odds_ratio": -0.504152774810791, "logits/chosen": -0.6566605567932129, "logits/rejected": -0.23016926646232605, "logps/chosen": -1.9449937343597412, "logps/rejected": -2.354017972946167, "loss": 2.1914, "nll_loss": 2.1410012245178223, "rewards/accuracies": 0.875, "rewards/chosen": -0.19449937343597412, "rewards/margins": 0.040902428328990936, "rewards/rejected": -0.23540180921554565, "step": 22 }, { "epoch": 0.06351397997928893, "grad_norm": 0.7032366394996643, "learning_rate": 1.0091743119266057e-06, "log_odds_chosen": 0.41283032298088074, "log_odds_ratio": -0.5154061913490295, "logits/chosen": -0.6851422786712646, "logits/rejected": -0.43942469358444214, "logps/chosen": -1.9216886758804321, "logps/rejected": -2.2877392768859863, "loss": 2.2005, "nll_loss": 2.1489334106445312, "rewards/accuracies": 1.0, "rewards/chosen": -0.19216886162757874, "rewards/margins": 0.03660505264997482, "rewards/rejected": -0.22877395153045654, "step": 23 }, { "epoch": 0.06627545736969279, "grad_norm": 0.759819746017456, "learning_rate": 1.055045871559633e-06, "log_odds_chosen": 0.5156121253967285, "log_odds_ratio": -0.46936866641044617, "logits/chosen": -0.6876986622810364, "logits/rejected": -0.031957462430000305, "logps/chosen": -1.9351065158843994, "logps/rejected": -2.390742063522339, "loss": 2.1928, "nll_loss": 2.1458399295806885, "rewards/accuracies": 1.0, "rewards/chosen": -0.19351065158843994, "rewards/margins": 0.04556357115507126, "rewards/rejected": -0.2390742301940918, "step": 24 }, { "epoch": 0.06903693476009665, "grad_norm": 0.8110620379447937, "learning_rate": 1.1009174311926608e-06, "log_odds_chosen": 0.6373166441917419, "log_odds_ratio": -0.4303508996963501, "logits/chosen": -0.6892279982566833, "logits/rejected": -0.09929540753364563, "logps/chosen": -1.9359254837036133, "logps/rejected": -2.5030038356781006, "loss": 2.2035, "nll_loss": 2.1604182720184326, "rewards/accuracies": 1.0, "rewards/chosen": -0.19359253346920013, "rewards/margins": 0.05670783296227455, "rewards/rejected": -0.2503003776073456, "step": 25 }, { "epoch": 0.07179841215050052, "grad_norm": 0.7390461564064026, "learning_rate": 1.1467889908256882e-06, "log_odds_chosen": 0.36132410168647766, "log_odds_ratio": -0.5341425538063049, "logits/chosen": -0.6445504426956177, "logits/rejected": -0.1332886964082718, "logps/chosen": -1.9676666259765625, "logps/rejected": -2.2869884967803955, "loss": 2.2373, "nll_loss": 2.1838440895080566, "rewards/accuracies": 1.0, "rewards/chosen": -0.1967666745185852, "rewards/margins": 0.03193218633532524, "rewards/rejected": -0.22869886457920074, "step": 26 }, { "epoch": 0.07455988954090438, "grad_norm": 0.6214093565940857, "learning_rate": 1.1926605504587159e-06, "log_odds_chosen": 0.4441767632961273, "log_odds_ratio": -0.5069965124130249, "logits/chosen": -0.5817967653274536, "logits/rejected": -0.287628710269928, "logps/chosen": -1.8485782146453857, "logps/rejected": -2.2337605953216553, "loss": 2.1166, "nll_loss": 2.065877914428711, "rewards/accuracies": 1.0, "rewards/chosen": -0.1848578304052353, "rewards/margins": 0.03851822763681412, "rewards/rejected": -0.22337606549263, "step": 27 }, { "epoch": 0.07732136693130826, "grad_norm": 0.6619555950164795, "learning_rate": 1.2385321100917433e-06, "log_odds_chosen": 0.5049865245819092, "log_odds_ratio": -0.47638368606567383, "logits/chosen": -0.6138817071914673, "logits/rejected": -0.2531018853187561, "logps/chosen": -1.9172720909118652, "logps/rejected": -2.361618995666504, "loss": 2.1756, "nll_loss": 2.127944231033325, "rewards/accuracies": 1.0, "rewards/chosen": -0.19172722101211548, "rewards/margins": 0.04443468153476715, "rewards/rejected": -0.23616188764572144, "step": 28 }, { "epoch": 0.08008284432171212, "grad_norm": 0.6942340135574341, "learning_rate": 1.2844036697247707e-06, "log_odds_chosen": 0.4293629825115204, "log_odds_ratio": -0.5098441243171692, "logits/chosen": -0.6483147740364075, "logits/rejected": -0.05790426582098007, "logps/chosen": -2.0040318965911865, "logps/rejected": -2.3848392963409424, "loss": 2.2622, "nll_loss": 2.2111713886260986, "rewards/accuracies": 1.0, "rewards/chosen": -0.20040319859981537, "rewards/margins": 0.03808073699474335, "rewards/rejected": -0.23848393559455872, "step": 29 }, { "epoch": 0.08284432171211598, "grad_norm": 0.7068150043487549, "learning_rate": 1.3302752293577984e-06, "log_odds_chosen": 0.6293392181396484, "log_odds_ratio": -0.43408164381980896, "logits/chosen": -0.5528810024261475, "logits/rejected": -0.038134124130010605, "logps/chosen": -1.9628605842590332, "logps/rejected": -2.524247407913208, "loss": 2.2025, "nll_loss": 2.1590917110443115, "rewards/accuracies": 1.0, "rewards/chosen": -0.19628606736660004, "rewards/margins": 0.05613870173692703, "rewards/rejected": -0.25242477655410767, "step": 30 }, { "epoch": 0.08560579910251985, "grad_norm": 0.7285196185112, "learning_rate": 1.3761467889908258e-06, "log_odds_chosen": 0.5635695457458496, "log_odds_ratio": -0.4573448598384857, "logits/chosen": -0.6580309271812439, "logits/rejected": -0.1388925164937973, "logps/chosen": -1.97215735912323, "logps/rejected": -2.47501540184021, "loss": 2.2267, "nll_loss": 2.1810009479522705, "rewards/accuracies": 1.0, "rewards/chosen": -0.1972157210111618, "rewards/margins": 0.050285838544368744, "rewards/rejected": -0.24750158190727234, "step": 31 }, { "epoch": 0.08836727649292371, "grad_norm": 0.663361132144928, "learning_rate": 1.4220183486238535e-06, "log_odds_chosen": 0.4357861876487732, "log_odds_ratio": -0.5044962763786316, "logits/chosen": -0.5250373482704163, "logits/rejected": -0.026727374643087387, "logps/chosen": -1.9720088243484497, "logps/rejected": -2.3577969074249268, "loss": 2.2324, "nll_loss": 2.1819026470184326, "rewards/accuracies": 1.0, "rewards/chosen": -0.19720089435577393, "rewards/margins": 0.0385788157582283, "rewards/rejected": -0.23577968776226044, "step": 32 }, { "epoch": 0.09112875388332758, "grad_norm": 0.6460317373275757, "learning_rate": 1.467889908256881e-06, "log_odds_chosen": 0.5821229219436646, "log_odds_ratio": -0.45059195160865784, "logits/chosen": -0.5484606027603149, "logits/rejected": -0.17182235419750214, "logps/chosen": -1.8812311887741089, "logps/rejected": -2.394681453704834, "loss": 2.1347, "nll_loss": 2.0896193981170654, "rewards/accuracies": 1.0, "rewards/chosen": -0.18812312185764313, "rewards/margins": 0.05134502053260803, "rewards/rejected": -0.23946812748908997, "step": 33 }, { "epoch": 0.09389023127373144, "grad_norm": 0.6461722254753113, "learning_rate": 1.5137614678899084e-06, "log_odds_chosen": 0.5589693784713745, "log_odds_ratio": -0.466278612613678, "logits/chosen": -0.5394806861877441, "logits/rejected": -0.11535287648439407, "logps/chosen": -1.8948417901992798, "logps/rejected": -2.3906807899475098, "loss": 2.1429, "nll_loss": 2.0962626934051514, "rewards/accuracies": 1.0, "rewards/chosen": -0.18948417901992798, "rewards/margins": 0.04958389326930046, "rewards/rejected": -0.23906809091567993, "step": 34 }, { "epoch": 0.09665170866413532, "grad_norm": 0.6749213933944702, "learning_rate": 1.559633027522936e-06, "log_odds_chosen": 0.4189353883266449, "log_odds_ratio": -0.5096418261528015, "logits/chosen": -0.8145462274551392, "logits/rejected": -0.184108167886734, "logps/chosen": -1.9128409624099731, "logps/rejected": -2.278416395187378, "loss": 2.1804, "nll_loss": 2.12943172454834, "rewards/accuracies": 1.0, "rewards/chosen": -0.19128410518169403, "rewards/margins": 0.036557577550411224, "rewards/rejected": -0.22784166038036346, "step": 35 }, { "epoch": 0.09941318605453918, "grad_norm": 0.7020057439804077, "learning_rate": 1.6055045871559635e-06, "log_odds_chosen": 0.5992317199707031, "log_odds_ratio": -0.4454975128173828, "logits/chosen": -0.755850613117218, "logits/rejected": -0.0925610214471817, "logps/chosen": -1.921108603477478, "logps/rejected": -2.4531819820404053, "loss": 2.1777, "nll_loss": 2.1331448554992676, "rewards/accuracies": 1.0, "rewards/chosen": -0.19211086630821228, "rewards/margins": 0.05320734530687332, "rewards/rejected": -0.245318204164505, "step": 36 }, { "epoch": 0.10217466344494304, "grad_norm": 0.7380937337875366, "learning_rate": 1.6513761467889911e-06, "log_odds_chosen": 0.5436604619026184, "log_odds_ratio": -0.4642760455608368, "logits/chosen": -0.5516951680183411, "logits/rejected": -0.17646433413028717, "logps/chosen": -2.041116952896118, "logps/rejected": -2.531487226486206, "loss": 2.3149, "nll_loss": 2.268436908721924, "rewards/accuracies": 1.0, "rewards/chosen": -0.20411169528961182, "rewards/margins": 0.04903702437877655, "rewards/rejected": -0.25314873456954956, "step": 37 }, { "epoch": 0.10493614083534691, "grad_norm": 0.697014331817627, "learning_rate": 1.6972477064220186e-06, "log_odds_chosen": 0.3754476308822632, "log_odds_ratio": -0.5259881615638733, "logits/chosen": -0.6446620225906372, "logits/rejected": -0.2302635908126831, "logps/chosen": -1.916767954826355, "logps/rejected": -2.246654748916626, "loss": 2.1948, "nll_loss": 2.142183303833008, "rewards/accuracies": 1.0, "rewards/chosen": -0.1916767954826355, "rewards/margins": 0.032988667488098145, "rewards/rejected": -0.22466546297073364, "step": 38 }, { "epoch": 0.10769761822575077, "grad_norm": 0.6962437033653259, "learning_rate": 1.743119266055046e-06, "log_odds_chosen": 0.4513578414916992, "log_odds_ratio": -0.5085355043411255, "logits/chosen": -0.5062450170516968, "logits/rejected": -0.0742938369512558, "logps/chosen": -2.0077242851257324, "logps/rejected": -2.4134199619293213, "loss": 2.2615, "nll_loss": 2.2106730937957764, "rewards/accuracies": 0.875, "rewards/chosen": -0.20077240467071533, "rewards/margins": 0.04056959226727486, "rewards/rejected": -0.24134202301502228, "step": 39 }, { "epoch": 0.11045909561615465, "grad_norm": 0.8404136896133423, "learning_rate": 1.7889908256880737e-06, "log_odds_chosen": 0.26594164967536926, "log_odds_ratio": -0.5785322189331055, "logits/chosen": -0.7206758260726929, "logits/rejected": -0.2084624469280243, "logps/chosen": -2.028856039047241, "logps/rejected": -2.2659192085266113, "loss": 2.3049, "nll_loss": 2.2470221519470215, "rewards/accuracies": 0.75, "rewards/chosen": -0.20288559794425964, "rewards/margins": 0.023706313222646713, "rewards/rejected": -0.22659191489219666, "step": 40 }, { "epoch": 0.11322057300655851, "grad_norm": 0.6305637359619141, "learning_rate": 1.8348623853211011e-06, "log_odds_chosen": 0.4789700508117676, "log_odds_ratio": -0.4894922971725464, "logits/chosen": -0.6578766107559204, "logits/rejected": -0.20148660242557526, "logps/chosen": -1.876691460609436, "logps/rejected": -2.2974884510040283, "loss": 2.1332, "nll_loss": 2.0842790603637695, "rewards/accuracies": 1.0, "rewards/chosen": -0.18766914308071136, "rewards/margins": 0.042079709470272064, "rewards/rejected": -0.22974886000156403, "step": 41 }, { "epoch": 0.11598205039696237, "grad_norm": 0.764166533946991, "learning_rate": 1.8807339449541288e-06, "log_odds_chosen": 0.32667985558509827, "log_odds_ratio": -0.5534327030181885, "logits/chosen": -0.6471579074859619, "logits/rejected": -0.24204234778881073, "logps/chosen": -2.054783582687378, "logps/rejected": -2.3455071449279785, "loss": 2.324, "nll_loss": 2.268658399581909, "rewards/accuracies": 0.75, "rewards/chosen": -0.20547834038734436, "rewards/margins": 0.0290723517537117, "rewards/rejected": -0.23455071449279785, "step": 42 }, { "epoch": 0.11874352778736624, "grad_norm": 0.6828538775444031, "learning_rate": 1.9266055045871564e-06, "log_odds_chosen": 0.39937809109687805, "log_odds_ratio": -0.5205667018890381, "logits/chosen": -0.6188192367553711, "logits/rejected": -0.2733380198478699, "logps/chosen": -1.9134750366210938, "logps/rejected": -2.2601161003112793, "loss": 2.1861, "nll_loss": 2.134024143218994, "rewards/accuracies": 0.875, "rewards/chosen": -0.19134750962257385, "rewards/margins": 0.0346641018986702, "rewards/rejected": -0.22601160407066345, "step": 43 }, { "epoch": 0.1215050051777701, "grad_norm": 0.8159171938896179, "learning_rate": 1.9724770642201837e-06, "log_odds_chosen": 0.32455721497535706, "log_odds_ratio": -0.5553261041641235, "logits/chosen": -0.669414758682251, "logits/rejected": -0.2208831012248993, "logps/chosen": -2.0293149948120117, "logps/rejected": -2.317753314971924, "loss": 2.2895, "nll_loss": 2.233963966369629, "rewards/accuracies": 0.75, "rewards/chosen": -0.20293152332305908, "rewards/margins": 0.028843821957707405, "rewards/rejected": -0.23177534341812134, "step": 44 }, { "epoch": 0.12426648256817398, "grad_norm": 0.6607564091682434, "learning_rate": 2.0183486238532113e-06, "log_odds_chosen": 0.615048885345459, "log_odds_ratio": -0.4402877390384674, "logits/chosen": -0.6855505704879761, "logits/rejected": -0.38655874133110046, "logps/chosen": -1.9126825332641602, "logps/rejected": -2.459097385406494, "loss": 2.1594, "nll_loss": 2.115415096282959, "rewards/accuracies": 1.0, "rewards/chosen": -0.19126826524734497, "rewards/margins": 0.054641470313072205, "rewards/rejected": -0.24590973556041718, "step": 45 }, { "epoch": 0.12702795995857785, "grad_norm": 0.6284722685813904, "learning_rate": 2.064220183486239e-06, "log_odds_chosen": 0.5434573888778687, "log_odds_ratio": -0.467115193605423, "logits/chosen": -0.5798700451850891, "logits/rejected": -0.13538922369480133, "logps/chosen": -1.7848091125488281, "logps/rejected": -2.2502312660217285, "loss": 2.0394, "nll_loss": 1.99272620677948, "rewards/accuracies": 1.0, "rewards/chosen": -0.17848090827465057, "rewards/margins": 0.046542223542928696, "rewards/rejected": -0.22502315044403076, "step": 46 }, { "epoch": 0.1297894373489817, "grad_norm": 0.6467039585113525, "learning_rate": 2.110091743119266e-06, "log_odds_chosen": 0.3773024082183838, "log_odds_ratio": -0.5347570180892944, "logits/chosen": -0.559330403804779, "logits/rejected": -0.16529709100723267, "logps/chosen": -2.0019242763519287, "logps/rejected": -2.339775323867798, "loss": 2.2632, "nll_loss": 2.209690809249878, "rewards/accuracies": 0.875, "rewards/chosen": -0.2001924067735672, "rewards/margins": 0.0337851420044899, "rewards/rejected": -0.2339775562286377, "step": 47 }, { "epoch": 0.13255091473938557, "grad_norm": 0.803294837474823, "learning_rate": 2.155963302752294e-06, "log_odds_chosen": 0.16557860374450684, "log_odds_ratio": -0.6379687190055847, "logits/chosen": -0.6091009378433228, "logits/rejected": -0.18343707919120789, "logps/chosen": -2.083127975463867, "logps/rejected": -2.241791009902954, "loss": 2.3479, "nll_loss": 2.2841379642486572, "rewards/accuracies": 0.875, "rewards/chosen": -0.20831279456615448, "rewards/margins": 0.015866274014115334, "rewards/rejected": -0.22417910397052765, "step": 48 }, { "epoch": 0.13531239212978943, "grad_norm": 0.7350826263427734, "learning_rate": 2.2018348623853215e-06, "log_odds_chosen": 0.663620114326477, "log_odds_ratio": -0.430046409368515, "logits/chosen": -0.6330848336219788, "logits/rejected": -0.12406854331493378, "logps/chosen": -1.9958436489105225, "logps/rejected": -2.595897912979126, "loss": 2.2445, "nll_loss": 2.201472282409668, "rewards/accuracies": 1.0, "rewards/chosen": -0.19958436489105225, "rewards/margins": 0.06000541150569916, "rewards/rejected": -0.2595897912979126, "step": 49 }, { "epoch": 0.1380738695201933, "grad_norm": 0.6953707337379456, "learning_rate": 2.2477064220183487e-06, "log_odds_chosen": 0.558549702167511, "log_odds_ratio": -0.4727778434753418, "logits/chosen": -0.6551685333251953, "logits/rejected": -0.10959107428789139, "logps/chosen": -1.9449548721313477, "logps/rejected": -2.4448442459106445, "loss": 2.1886, "nll_loss": 2.1412806510925293, "rewards/accuracies": 0.875, "rewards/chosen": -0.19449549913406372, "rewards/margins": 0.04998895525932312, "rewards/rejected": -0.24448445439338684, "step": 50 }, { "epoch": 0.14083534691059718, "grad_norm": 0.7255063056945801, "learning_rate": 2.2935779816513764e-06, "log_odds_chosen": 0.439910352230072, "log_odds_ratio": -0.5028793811798096, "logits/chosen": -0.5782126784324646, "logits/rejected": -0.3915577232837677, "logps/chosen": -1.9679124355316162, "logps/rejected": -2.3573005199432373, "loss": 2.2331, "nll_loss": 2.1828465461730957, "rewards/accuracies": 1.0, "rewards/chosen": -0.19679124653339386, "rewards/margins": 0.03893881291151047, "rewards/rejected": -0.23573008179664612, "step": 51 }, { "epoch": 0.14359682430100104, "grad_norm": 0.6826524138450623, "learning_rate": 2.339449541284404e-06, "log_odds_chosen": 0.4098314940929413, "log_odds_ratio": -0.5124779939651489, "logits/chosen": -0.5287759304046631, "logits/rejected": -0.16041654348373413, "logps/chosen": -1.9367855787277222, "logps/rejected": -2.296450614929199, "loss": 2.1899, "nll_loss": 2.1386733055114746, "rewards/accuracies": 1.0, "rewards/chosen": -0.19367855787277222, "rewards/margins": 0.03596651181578636, "rewards/rejected": -0.22964505851268768, "step": 52 }, { "epoch": 0.1463583016914049, "grad_norm": 0.7680190801620483, "learning_rate": 2.3853211009174317e-06, "log_odds_chosen": 0.3706192970275879, "log_odds_ratio": -0.5296811461448669, "logits/chosen": -0.707636833190918, "logits/rejected": -0.3318819999694824, "logps/chosen": -1.9044169187545776, "logps/rejected": -2.228182315826416, "loss": 2.1714, "nll_loss": 2.118480920791626, "rewards/accuracies": 1.0, "rewards/chosen": -0.19044168293476105, "rewards/margins": 0.03237656503915787, "rewards/rejected": -0.2228182554244995, "step": 53 }, { "epoch": 0.14911977908180876, "grad_norm": 0.6361218690872192, "learning_rate": 2.431192660550459e-06, "log_odds_chosen": 0.37140461802482605, "log_odds_ratio": -0.5330734848976135, "logits/chosen": -0.5706905126571655, "logits/rejected": -0.2818271815776825, "logps/chosen": -1.8717925548553467, "logps/rejected": -2.1973752975463867, "loss": 2.1391, "nll_loss": 2.0858311653137207, "rewards/accuracies": 0.875, "rewards/chosen": -0.18717925250530243, "rewards/margins": 0.03255828469991684, "rewards/rejected": -0.21973752975463867, "step": 54 }, { "epoch": 0.15188125647221262, "grad_norm": 0.6758925914764404, "learning_rate": 2.4770642201834866e-06, "log_odds_chosen": 0.351237416267395, "log_odds_ratio": -0.5363888740539551, "logits/chosen": -0.6902980804443359, "logits/rejected": -0.1499267816543579, "logps/chosen": -1.9404265880584717, "logps/rejected": -2.248072624206543, "loss": 2.2093, "nll_loss": 2.155627965927124, "rewards/accuracies": 1.0, "rewards/chosen": -0.19404268264770508, "rewards/margins": 0.030764613300561905, "rewards/rejected": -0.2248072773218155, "step": 55 }, { "epoch": 0.1546427338626165, "grad_norm": 0.7609654664993286, "learning_rate": 2.522935779816514e-06, "log_odds_chosen": 0.6203876733779907, "log_odds_ratio": -0.4391648769378662, "logits/chosen": -0.6777594685554504, "logits/rejected": -0.12744097411632538, "logps/chosen": -1.920168161392212, "logps/rejected": -2.470646381378174, "loss": 2.2005, "nll_loss": 2.1565370559692383, "rewards/accuracies": 1.0, "rewards/chosen": -0.1920168101787567, "rewards/margins": 0.055047836154699326, "rewards/rejected": -0.24706465005874634, "step": 56 }, { "epoch": 0.15740421125302037, "grad_norm": 0.6103418469429016, "learning_rate": 2.5688073394495415e-06, "log_odds_chosen": 0.3935878276824951, "log_odds_ratio": -0.52540522813797, "logits/chosen": -0.5779827833175659, "logits/rejected": -0.07134772837162018, "logps/chosen": -1.8075436353683472, "logps/rejected": -2.1494383811950684, "loss": 2.0685, "nll_loss": 2.0159270763397217, "rewards/accuracies": 0.875, "rewards/chosen": -0.18075433373451233, "rewards/margins": 0.03418949246406555, "rewards/rejected": -0.21494384109973907, "step": 57 }, { "epoch": 0.16016568864342423, "grad_norm": 0.6918542981147766, "learning_rate": 2.6146788990825687e-06, "log_odds_chosen": 0.508127748966217, "log_odds_ratio": -0.48639971017837524, "logits/chosen": -0.5526314377784729, "logits/rejected": -0.17470352351665497, "logps/chosen": -1.9131710529327393, "logps/rejected": -2.359304904937744, "loss": 2.1784, "nll_loss": 2.1297237873077393, "rewards/accuracies": 0.875, "rewards/chosen": -0.1913171112537384, "rewards/margins": 0.044613372534513474, "rewards/rejected": -0.23593048751354218, "step": 58 }, { "epoch": 0.1629271660338281, "grad_norm": 0.6938620209693909, "learning_rate": 2.6605504587155968e-06, "log_odds_chosen": 0.2925473153591156, "log_odds_ratio": -0.563460111618042, "logits/chosen": -0.5527081489562988, "logits/rejected": -0.2188190072774887, "logps/chosen": -1.9204390048980713, "logps/rejected": -2.175715684890747, "loss": 2.1995, "nll_loss": 2.1431734561920166, "rewards/accuracies": 1.0, "rewards/chosen": -0.19204390048980713, "rewards/margins": 0.025527678430080414, "rewards/rejected": -0.21757157146930695, "step": 59 }, { "epoch": 0.16568864342423195, "grad_norm": 0.6408482193946838, "learning_rate": 2.706422018348624e-06, "log_odds_chosen": 0.3286433815956116, "log_odds_ratio": -0.5471742749214172, "logits/chosen": -0.5031524300575256, "logits/rejected": -0.016629882156848907, "logps/chosen": -1.9514718055725098, "logps/rejected": -2.240278720855713, "loss": 2.2124, "nll_loss": 2.157729387283325, "rewards/accuracies": 0.875, "rewards/chosen": -0.19514718651771545, "rewards/margins": 0.028880706056952477, "rewards/rejected": -0.22402788698673248, "step": 60 }, { "epoch": 0.16845012081463584, "grad_norm": 0.5977794528007507, "learning_rate": 2.7522935779816517e-06, "log_odds_chosen": 0.6263030767440796, "log_odds_ratio": -0.43304184079170227, "logits/chosen": -0.5808561444282532, "logits/rejected": -0.16479042172431946, "logps/chosen": -1.874776840209961, "logps/rejected": -2.425903797149658, "loss": 2.1062, "nll_loss": 2.0628535747528076, "rewards/accuracies": 1.0, "rewards/chosen": -0.18747767806053162, "rewards/margins": 0.055112697184085846, "rewards/rejected": -0.24259036779403687, "step": 61 }, { "epoch": 0.1712115982050397, "grad_norm": 0.7107025980949402, "learning_rate": 2.798165137614679e-06, "log_odds_chosen": 0.38438883423805237, "log_odds_ratio": -0.5274062752723694, "logits/chosen": -0.5282328724861145, "logits/rejected": 0.017122909426689148, "logps/chosen": -2.0265655517578125, "logps/rejected": -2.3674638271331787, "loss": 2.2774, "nll_loss": 2.224700927734375, "rewards/accuracies": 1.0, "rewards/chosen": -0.202656552195549, "rewards/margins": 0.034089840948581696, "rewards/rejected": -0.2367464005947113, "step": 62 }, { "epoch": 0.17397307559544356, "grad_norm": 0.6914030313491821, "learning_rate": 2.844036697247707e-06, "log_odds_chosen": 0.4569448232650757, "log_odds_ratio": -0.4947759211063385, "logits/chosen": -0.564735472202301, "logits/rejected": -0.17074617743492126, "logps/chosen": -1.9550468921661377, "logps/rejected": -2.357658624649048, "loss": 2.1959, "nll_loss": 2.14642596244812, "rewards/accuracies": 1.0, "rewards/chosen": -0.19550471007823944, "rewards/margins": 0.040261153131723404, "rewards/rejected": -0.23576584458351135, "step": 63 }, { "epoch": 0.17673455298584742, "grad_norm": 0.6147691011428833, "learning_rate": 2.8899082568807342e-06, "log_odds_chosen": 0.5831525325775146, "log_odds_ratio": -0.4512834846973419, "logits/chosen": -0.6652324199676514, "logits/rejected": -0.30751606822013855, "logps/chosen": -1.925264835357666, "logps/rejected": -2.4442431926727295, "loss": 2.154, "nll_loss": 2.1088905334472656, "rewards/accuracies": 1.0, "rewards/chosen": -0.1925264596939087, "rewards/margins": 0.05189783126115799, "rewards/rejected": -0.24442431330680847, "step": 64 }, { "epoch": 0.17949603037625128, "grad_norm": 0.6069121360778809, "learning_rate": 2.935779816513762e-06, "log_odds_chosen": 0.3449505567550659, "log_odds_ratio": -0.5405150055885315, "logits/chosen": -0.46805867552757263, "logits/rejected": -0.031980015337467194, "logps/chosen": -1.8449336290359497, "logps/rejected": -2.141822576522827, "loss": 2.104, "nll_loss": 2.049985885620117, "rewards/accuracies": 1.0, "rewards/chosen": -0.18449336290359497, "rewards/margins": 0.02968890592455864, "rewards/rejected": -0.2141822725534439, "step": 65 }, { "epoch": 0.18225750776665517, "grad_norm": 0.5975824594497681, "learning_rate": 2.981651376146789e-06, "log_odds_chosen": 0.5348482728004456, "log_odds_ratio": -0.48265504837036133, "logits/chosen": -0.6339913606643677, "logits/rejected": -0.19266335666179657, "logps/chosen": -1.7976055145263672, "logps/rejected": -2.2617228031158447, "loss": 2.0549, "nll_loss": 2.006657600402832, "rewards/accuracies": 1.0, "rewards/chosen": -0.17976056039333344, "rewards/margins": 0.04641173034906387, "rewards/rejected": -0.2261722981929779, "step": 66 }, { "epoch": 0.18501898515705903, "grad_norm": 0.6583871245384216, "learning_rate": 3.0275229357798168e-06, "log_odds_chosen": 0.2934231162071228, "log_odds_ratio": -0.5610607862472534, "logits/chosen": -0.49614468216896057, "logits/rejected": -0.02480134554207325, "logps/chosen": -1.9567402601242065, "logps/rejected": -2.21272349357605, "loss": 2.1884, "nll_loss": 2.1323015689849854, "rewards/accuracies": 1.0, "rewards/chosen": -0.19567403197288513, "rewards/margins": 0.025598343461751938, "rewards/rejected": -0.22127236425876617, "step": 67 }, { "epoch": 0.1877804625474629, "grad_norm": 0.6593952775001526, "learning_rate": 3.073394495412844e-06, "log_odds_chosen": 0.39623624086380005, "log_odds_ratio": -0.5267960429191589, "logits/chosen": -0.32998380064964294, "logits/rejected": 0.019692357629537582, "logps/chosen": -1.9624451398849487, "logps/rejected": -2.3119962215423584, "loss": 2.2478, "nll_loss": 2.195082902908325, "rewards/accuracies": 0.875, "rewards/chosen": -0.1962445080280304, "rewards/margins": 0.03495512157678604, "rewards/rejected": -0.23119963705539703, "step": 68 }, { "epoch": 0.19054193993786675, "grad_norm": 0.7153931856155396, "learning_rate": 3.119266055045872e-06, "log_odds_chosen": 0.5069383382797241, "log_odds_ratio": -0.4813777804374695, "logits/chosen": -0.505609393119812, "logits/rejected": -0.05926704406738281, "logps/chosen": -2.0001449584960938, "logps/rejected": -2.453965187072754, "loss": 2.2604, "nll_loss": 2.212214231491089, "rewards/accuracies": 1.0, "rewards/chosen": -0.20001448690891266, "rewards/margins": 0.04538201540708542, "rewards/rejected": -0.24539650976657867, "step": 69 }, { "epoch": 0.19330341732827064, "grad_norm": 0.5841740965843201, "learning_rate": 3.1651376146788993e-06, "log_odds_chosen": 0.48365646600723267, "log_odds_ratio": -0.4906890094280243, "logits/chosen": -0.4503505527973175, "logits/rejected": -0.24621139466762543, "logps/chosen": -1.9794856309890747, "logps/rejected": -2.4092578887939453, "loss": 2.2046, "nll_loss": 2.155548095703125, "rewards/accuracies": 0.875, "rewards/chosen": -0.19794857501983643, "rewards/margins": 0.0429772287607193, "rewards/rejected": -0.24092580378055573, "step": 70 }, { "epoch": 0.1960648947186745, "grad_norm": 0.6693902015686035, "learning_rate": 3.211009174311927e-06, "log_odds_chosen": 0.4308607876300812, "log_odds_ratio": -0.5067814588546753, "logits/chosen": -0.47226929664611816, "logits/rejected": -0.22168010473251343, "logps/chosen": -1.9241232872009277, "logps/rejected": -2.3047096729278564, "loss": 2.191, "nll_loss": 2.14032244682312, "rewards/accuracies": 1.0, "rewards/chosen": -0.192412331700325, "rewards/margins": 0.038058653473854065, "rewards/rejected": -0.23047097027301788, "step": 71 }, { "epoch": 0.19882637210907836, "grad_norm": 0.6415713429450989, "learning_rate": 3.256880733944954e-06, "log_odds_chosen": 0.5713039636611938, "log_odds_ratio": -0.46779656410217285, "logits/chosen": -0.4027179479598999, "logits/rejected": -0.20848074555397034, "logps/chosen": -1.8328369855880737, "logps/rejected": -2.313088893890381, "loss": 2.0868, "nll_loss": 2.0400490760803223, "rewards/accuracies": 0.875, "rewards/chosen": -0.18328368663787842, "rewards/margins": 0.048025187104940414, "rewards/rejected": -0.23130889236927032, "step": 72 }, { "epoch": 0.20158784949948222, "grad_norm": 0.6714993715286255, "learning_rate": 3.3027522935779823e-06, "log_odds_chosen": 0.35765087604522705, "log_odds_ratio": -0.5315056443214417, "logits/chosen": -0.387838214635849, "logits/rejected": -0.10332845896482468, "logps/chosen": -2.011086940765381, "logps/rejected": -2.328169107437134, "loss": 2.2627, "nll_loss": 2.209596872329712, "rewards/accuracies": 1.0, "rewards/chosen": -0.20110869407653809, "rewards/margins": 0.03170822933316231, "rewards/rejected": -0.2328169345855713, "step": 73 }, { "epoch": 0.20434932688988608, "grad_norm": 0.6035195589065552, "learning_rate": 3.3486238532110095e-06, "log_odds_chosen": 0.37045401334762573, "log_odds_ratio": -0.528258740901947, "logits/chosen": -0.4163488447666168, "logits/rejected": -0.20020049810409546, "logps/chosen": -1.927206039428711, "logps/rejected": -2.2507896423339844, "loss": 2.1911, "nll_loss": 2.1382455825805664, "rewards/accuracies": 1.0, "rewards/chosen": -0.19272059202194214, "rewards/margins": 0.03235836327075958, "rewards/rejected": -0.22507895529270172, "step": 74 }, { "epoch": 0.20711080428028997, "grad_norm": 0.6681767702102661, "learning_rate": 3.394495412844037e-06, "log_odds_chosen": 0.4545160233974457, "log_odds_ratio": -0.4990374743938446, "logits/chosen": -0.4695533514022827, "logits/rejected": -0.14828726649284363, "logps/chosen": -1.9052660465240479, "logps/rejected": -2.3053171634674072, "loss": 2.171, "nll_loss": 2.121072769165039, "rewards/accuracies": 0.875, "rewards/chosen": -0.19052661955356598, "rewards/margins": 0.04000508412718773, "rewards/rejected": -0.2305317223072052, "step": 75 }, { "epoch": 0.20987228167069383, "grad_norm": 0.5848620533943176, "learning_rate": 3.4403669724770644e-06, "log_odds_chosen": 0.4162794351577759, "log_odds_ratio": -0.5107027292251587, "logits/chosen": -0.3651241362094879, "logits/rejected": -0.29924386739730835, "logps/chosen": -1.9842548370361328, "logps/rejected": -2.354065418243408, "loss": 2.2131, "nll_loss": 2.1620750427246094, "rewards/accuracies": 1.0, "rewards/chosen": -0.19842548668384552, "rewards/margins": 0.03698106110095978, "rewards/rejected": -0.2354065328836441, "step": 76 }, { "epoch": 0.2126337590610977, "grad_norm": 0.6766842007637024, "learning_rate": 3.486238532110092e-06, "log_odds_chosen": 0.3890829086303711, "log_odds_ratio": -0.5331251621246338, "logits/chosen": -0.5041220784187317, "logits/rejected": -0.256991982460022, "logps/chosen": -1.9675860404968262, "logps/rejected": -2.3138177394866943, "loss": 2.2335, "nll_loss": 2.1801676750183105, "rewards/accuracies": 0.875, "rewards/chosen": -0.19675858318805695, "rewards/margins": 0.03462318331003189, "rewards/rejected": -0.23138177394866943, "step": 77 }, { "epoch": 0.21539523645150155, "grad_norm": 0.6400946974754333, "learning_rate": 3.5321100917431193e-06, "log_odds_chosen": 0.3764447569847107, "log_odds_ratio": -0.5406405329704285, "logits/chosen": -0.30762779712677, "logits/rejected": -0.010896757245063782, "logps/chosen": -1.9988197088241577, "logps/rejected": -2.331996202468872, "loss": 2.246, "nll_loss": 2.191932201385498, "rewards/accuracies": 0.875, "rewards/chosen": -0.19988197088241577, "rewards/margins": 0.03331765532493591, "rewards/rejected": -0.23319962620735168, "step": 78 }, { "epoch": 0.2181567138419054, "grad_norm": 0.5379537343978882, "learning_rate": 3.5779816513761473e-06, "log_odds_chosen": 0.4429885745048523, "log_odds_ratio": -0.49778997898101807, "logits/chosen": -0.32647988200187683, "logits/rejected": -0.19796019792556763, "logps/chosen": -1.8951160907745361, "logps/rejected": -2.282485246658325, "loss": 2.1565, "nll_loss": 2.1067562103271484, "rewards/accuracies": 1.0, "rewards/chosen": -0.18951159715652466, "rewards/margins": 0.038736920803785324, "rewards/rejected": -0.22824853658676147, "step": 79 }, { "epoch": 0.2209181912323093, "grad_norm": 0.5976657867431641, "learning_rate": 3.6238532110091746e-06, "log_odds_chosen": 0.3772650957107544, "log_odds_ratio": -0.5323787927627563, "logits/chosen": -0.396470308303833, "logits/rejected": -0.1672373265028, "logps/chosen": -1.8558274507522583, "logps/rejected": -2.1821842193603516, "loss": 2.119, "nll_loss": 2.065802812576294, "rewards/accuracies": 1.0, "rewards/chosen": -0.1855827420949936, "rewards/margins": 0.032635681331157684, "rewards/rejected": -0.21821841597557068, "step": 80 }, { "epoch": 0.22367966862271316, "grad_norm": 0.5696468353271484, "learning_rate": 3.6697247706422022e-06, "log_odds_chosen": 0.3498988747596741, "log_odds_ratio": -0.5373408198356628, "logits/chosen": -0.32143616676330566, "logits/rejected": -0.08278146386146545, "logps/chosen": -1.867413878440857, "logps/rejected": -2.1695525646209717, "loss": 2.1158, "nll_loss": 2.062082052230835, "rewards/accuracies": 1.0, "rewards/chosen": -0.1867414116859436, "rewards/margins": 0.0302138552069664, "rewards/rejected": -0.2169552594423294, "step": 81 }, { "epoch": 0.22644114601311702, "grad_norm": 0.579322874546051, "learning_rate": 3.7155963302752295e-06, "log_odds_chosen": 0.3590673804283142, "log_odds_ratio": -0.5345906019210815, "logits/chosen": -0.3917956054210663, "logits/rejected": -0.1658124327659607, "logps/chosen": -1.9473357200622559, "logps/rejected": -2.2625551223754883, "loss": 2.2152, "nll_loss": 2.1617040634155273, "rewards/accuracies": 1.0, "rewards/chosen": -0.19473356008529663, "rewards/margins": 0.03152196481823921, "rewards/rejected": -0.22625553607940674, "step": 82 }, { "epoch": 0.22920262340352088, "grad_norm": 0.5707481503486633, "learning_rate": 3.7614678899082575e-06, "log_odds_chosen": 0.601745069026947, "log_odds_ratio": -0.44251349568367004, "logits/chosen": -0.35622307658195496, "logits/rejected": -0.2330722063779831, "logps/chosen": -1.8970295190811157, "logps/rejected": -2.4300365447998047, "loss": 2.141, "nll_loss": 2.096763849258423, "rewards/accuracies": 1.0, "rewards/chosen": -0.18970295786857605, "rewards/margins": 0.053300708532333374, "rewards/rejected": -0.24300366640090942, "step": 83 }, { "epoch": 0.23196410079392474, "grad_norm": 0.6338332891464233, "learning_rate": 3.8073394495412848e-06, "log_odds_chosen": 0.4194182753562927, "log_odds_ratio": -0.5094286799430847, "logits/chosen": -0.38890349864959717, "logits/rejected": -0.1751769334077835, "logps/chosen": -1.8893790245056152, "logps/rejected": -2.2541043758392334, "loss": 2.1687, "nll_loss": 2.1177978515625, "rewards/accuracies": 1.0, "rewards/chosen": -0.18893790245056152, "rewards/margins": 0.036472536623477936, "rewards/rejected": -0.22541043162345886, "step": 84 }, { "epoch": 0.23472557818432863, "grad_norm": 0.514668881893158, "learning_rate": 3.853211009174313e-06, "log_odds_chosen": 0.5391043424606323, "log_odds_ratio": -0.46963804960250854, "logits/chosen": -0.42746701836586, "logits/rejected": -0.19726645946502686, "logps/chosen": -1.8130801916122437, "logps/rejected": -2.285198211669922, "loss": 2.0648, "nll_loss": 2.017812490463257, "rewards/accuracies": 1.0, "rewards/chosen": -0.18130803108215332, "rewards/margins": 0.047211792320013046, "rewards/rejected": -0.22851979732513428, "step": 85 }, { "epoch": 0.2374870555747325, "grad_norm": 0.5241694450378418, "learning_rate": 3.89908256880734e-06, "log_odds_chosen": 0.5370965003967285, "log_odds_ratio": -0.47515052556991577, "logits/chosen": -0.18979063630104065, "logits/rejected": -0.3587040901184082, "logps/chosen": -1.9306432008743286, "logps/rejected": -2.4106080532073975, "loss": 2.1744, "nll_loss": 2.1269240379333496, "rewards/accuracies": 1.0, "rewards/chosen": -0.19306430220603943, "rewards/margins": 0.04799651354551315, "rewards/rejected": -0.24106080830097198, "step": 86 }, { "epoch": 0.24024853296513635, "grad_norm": 0.5388301610946655, "learning_rate": 3.944954128440367e-06, "log_odds_chosen": 0.47338563203811646, "log_odds_ratio": -0.4902949929237366, "logits/chosen": -0.2526980936527252, "logits/rejected": -0.27716919779777527, "logps/chosen": -1.8183417320251465, "logps/rejected": -2.227705955505371, "loss": 2.0679, "nll_loss": 2.0188400745391846, "rewards/accuracies": 0.875, "rewards/chosen": -0.1818341761827469, "rewards/margins": 0.0409364253282547, "rewards/rejected": -0.2227705866098404, "step": 87 }, { "epoch": 0.2430100103555402, "grad_norm": 0.5072680115699768, "learning_rate": 3.9908256880733945e-06, "log_odds_chosen": 0.5098553895950317, "log_odds_ratio": -0.4994601309299469, "logits/chosen": -0.4232753813266754, "logits/rejected": -0.2069588601589203, "logps/chosen": -1.7478512525558472, "logps/rejected": -2.1878132820129395, "loss": 2.0084, "nll_loss": 1.9584167003631592, "rewards/accuracies": 0.75, "rewards/chosen": -0.17478513717651367, "rewards/margins": 0.043996214866638184, "rewards/rejected": -0.21878135204315186, "step": 88 }, { "epoch": 0.24577148774594407, "grad_norm": 0.4936067461967468, "learning_rate": 4.036697247706423e-06, "log_odds_chosen": 0.5959604382514954, "log_odds_ratio": -0.4470658600330353, "logits/chosen": -0.26887303590774536, "logits/rejected": -0.2717263400554657, "logps/chosen": -1.8666045665740967, "logps/rejected": -2.3904991149902344, "loss": 2.1004, "nll_loss": 2.0557374954223633, "rewards/accuracies": 1.0, "rewards/chosen": -0.18666045367717743, "rewards/margins": 0.05238945782184601, "rewards/rejected": -0.23904991149902344, "step": 89 }, { "epoch": 0.24853296513634796, "grad_norm": 0.5457214713096619, "learning_rate": 4.08256880733945e-06, "log_odds_chosen": 0.5490537285804749, "log_odds_ratio": -0.45841851830482483, "logits/chosen": -0.34721043705940247, "logits/rejected": -0.10521189868450165, "logps/chosen": -1.8499623537063599, "logps/rejected": -2.329256296157837, "loss": 2.0961, "nll_loss": 2.050299644470215, "rewards/accuracies": 1.0, "rewards/chosen": -0.18499624729156494, "rewards/margins": 0.04792938381433487, "rewards/rejected": -0.2329256385564804, "step": 90 }, { "epoch": 0.2512944425267518, "grad_norm": 0.5049707293510437, "learning_rate": 4.128440366972478e-06, "log_odds_chosen": 0.4017772674560547, "log_odds_ratio": -0.5216570496559143, "logits/chosen": -0.26840728521347046, "logits/rejected": -0.19604754447937012, "logps/chosen": -1.8395251035690308, "logps/rejected": -2.1878998279571533, "loss": 2.0732, "nll_loss": 2.021005868911743, "rewards/accuracies": 0.875, "rewards/chosen": -0.18395252525806427, "rewards/margins": 0.034837473183870316, "rewards/rejected": -0.2187899798154831, "step": 91 }, { "epoch": 0.2540559199171557, "grad_norm": 0.4878016710281372, "learning_rate": 4.174311926605505e-06, "log_odds_chosen": 0.40200310945510864, "log_odds_ratio": -0.5181869268417358, "logits/chosen": -0.17307503521442413, "logits/rejected": -0.5424618721008301, "logps/chosen": -1.8756885528564453, "logps/rejected": -2.2274668216705322, "loss": 2.1098, "nll_loss": 2.0579941272735596, "rewards/accuracies": 1.0, "rewards/chosen": -0.18756884336471558, "rewards/margins": 0.035177819430828094, "rewards/rejected": -0.22274667024612427, "step": 92 }, { "epoch": 0.25681739730755954, "grad_norm": 0.5148739218711853, "learning_rate": 4.220183486238532e-06, "log_odds_chosen": 0.4867369532585144, "log_odds_ratio": -0.4866551160812378, "logits/chosen": -0.20501409471035004, "logits/rejected": -0.3177582919597626, "logps/chosen": -1.8571135997772217, "logps/rejected": -2.2809691429138184, "loss": 2.1117, "nll_loss": 2.0629868507385254, "rewards/accuracies": 0.875, "rewards/chosen": -0.1857113391160965, "rewards/margins": 0.04238557443022728, "rewards/rejected": -0.22809693217277527, "step": 93 }, { "epoch": 0.2595788746979634, "grad_norm": 0.48695462942123413, "learning_rate": 4.26605504587156e-06, "log_odds_chosen": 0.4320299029350281, "log_odds_ratio": -0.5084698796272278, "logits/chosen": -0.33521950244903564, "logits/rejected": -0.22524963319301605, "logps/chosen": -1.759456753730774, "logps/rejected": -2.130563974380493, "loss": 1.9918, "nll_loss": 1.9409611225128174, "rewards/accuracies": 0.875, "rewards/chosen": -0.17594566941261292, "rewards/margins": 0.03711073845624924, "rewards/rejected": -0.21305640041828156, "step": 94 }, { "epoch": 0.26234035208836726, "grad_norm": 0.4444352984428406, "learning_rate": 4.311926605504588e-06, "log_odds_chosen": 0.3173726201057434, "log_odds_ratio": -0.5506779551506042, "logits/chosen": -0.15516842901706696, "logits/rejected": -0.30819153785705566, "logps/chosen": -1.7852771282196045, "logps/rejected": -2.0567989349365234, "loss": 2.0242, "nll_loss": 1.969139814376831, "rewards/accuracies": 1.0, "rewards/chosen": -0.17852769792079926, "rewards/margins": 0.027152204886078835, "rewards/rejected": -0.20567990839481354, "step": 95 }, { "epoch": 0.26510182947877114, "grad_norm": 0.508222758769989, "learning_rate": 4.357798165137615e-06, "log_odds_chosen": 0.26060953736305237, "log_odds_ratio": -0.5766869187355042, "logits/chosen": -0.21468104422092438, "logits/rejected": -0.17443214356899261, "logps/chosen": -1.9513554573059082, "logps/rejected": -2.179452419281006, "loss": 2.1941, "nll_loss": 2.1364006996154785, "rewards/accuracies": 0.875, "rewards/chosen": -0.19513554871082306, "rewards/margins": 0.02280968800187111, "rewards/rejected": -0.21794524788856506, "step": 96 }, { "epoch": 0.26786330686917503, "grad_norm": 0.4931764602661133, "learning_rate": 4.403669724770643e-06, "log_odds_chosen": 0.5352882146835327, "log_odds_ratio": -0.4726255536079407, "logits/chosen": -0.10641247034072876, "logits/rejected": -0.164643332362175, "logps/chosen": -1.7959346771240234, "logps/rejected": -2.266484022140503, "loss": 2.0405, "nll_loss": 1.9932180643081665, "rewards/accuracies": 1.0, "rewards/chosen": -0.17959347367286682, "rewards/margins": 0.047054924070835114, "rewards/rejected": -0.22664840519428253, "step": 97 }, { "epoch": 0.27062478425957887, "grad_norm": 0.49030837416648865, "learning_rate": 4.44954128440367e-06, "log_odds_chosen": 0.38322845101356506, "log_odds_ratio": -0.5242493748664856, "logits/chosen": -0.15826918184757233, "logits/rejected": -0.20204174518585205, "logps/chosen": -1.8669074773788452, "logps/rejected": -2.199532985687256, "loss": 2.1032, "nll_loss": 2.0507290363311768, "rewards/accuracies": 1.0, "rewards/chosen": -0.18669073283672333, "rewards/margins": 0.03326254338026047, "rewards/rejected": -0.2199532836675644, "step": 98 }, { "epoch": 0.27338626164998275, "grad_norm": 0.49382349848747253, "learning_rate": 4.4954128440366975e-06, "log_odds_chosen": 0.5044716000556946, "log_odds_ratio": -0.4766765236854553, "logits/chosen": -0.1681555211544037, "logits/rejected": -0.3671070337295532, "logps/chosen": -1.9003745317459106, "logps/rejected": -2.344512939453125, "loss": 2.1245, "nll_loss": 2.076801061630249, "rewards/accuracies": 1.0, "rewards/chosen": -0.19003747403621674, "rewards/margins": 0.04441382735967636, "rewards/rejected": -0.2344512790441513, "step": 99 }, { "epoch": 0.2761477390403866, "grad_norm": 0.507435142993927, "learning_rate": 4.541284403669725e-06, "log_odds_chosen": 0.2962280511856079, "log_odds_ratio": -0.5599422454833984, "logits/chosen": -0.26080605387687683, "logits/rejected": -0.24701349437236786, "logps/chosen": -1.798446774482727, "logps/rejected": -2.050736427307129, "loss": 2.0576, "nll_loss": 2.001603841781616, "rewards/accuracies": 0.875, "rewards/chosen": -0.1798446923494339, "rewards/margins": 0.0252289529889822, "rewards/rejected": -0.20507365465164185, "step": 100 }, { "epoch": 0.2789092164307905, "grad_norm": 0.461052268743515, "learning_rate": 4.587155963302753e-06, "log_odds_chosen": 0.29842671751976013, "log_odds_ratio": -0.5633093118667603, "logits/chosen": -0.11747467517852783, "logits/rejected": -0.2521146833896637, "logps/chosen": -1.913146734237671, "logps/rejected": -2.1769111156463623, "loss": 2.1524, "nll_loss": 2.0960795879364014, "rewards/accuracies": 0.875, "rewards/chosen": -0.1913146674633026, "rewards/margins": 0.02637643553316593, "rewards/rejected": -0.21769112348556519, "step": 101 }, { "epoch": 0.28167069382119436, "grad_norm": 0.42991501092910767, "learning_rate": 4.63302752293578e-06, "log_odds_chosen": 0.5397889614105225, "log_odds_ratio": -0.4643154740333557, "logits/chosen": -0.17212505638599396, "logits/rejected": -0.3217812776565552, "logps/chosen": -1.7475972175598145, "logps/rejected": -2.209057331085205, "loss": 2.0036, "nll_loss": 1.9572076797485352, "rewards/accuracies": 1.0, "rewards/chosen": -0.17475973069667816, "rewards/margins": 0.046146005392074585, "rewards/rejected": -0.22090573608875275, "step": 102 }, { "epoch": 0.2844321712115982, "grad_norm": 0.47230181097984314, "learning_rate": 4.678899082568808e-06, "log_odds_chosen": 0.29523512721061707, "log_odds_ratio": -0.5644804239273071, "logits/chosen": -0.22446005046367645, "logits/rejected": -0.26140278577804565, "logps/chosen": -1.7972923517227173, "logps/rejected": -2.051152229309082, "loss": 2.0406, "nll_loss": 1.9841551780700684, "rewards/accuracies": 0.875, "rewards/chosen": -0.17972922325134277, "rewards/margins": 0.025385981425642967, "rewards/rejected": -0.20511522889137268, "step": 103 }, { "epoch": 0.2871936486020021, "grad_norm": 0.4233863651752472, "learning_rate": 4.724770642201835e-06, "log_odds_chosen": 0.3453969359397888, "log_odds_ratio": -0.5395044684410095, "logits/chosen": -0.18183927237987518, "logits/rejected": -0.3289150297641754, "logps/chosen": -1.7190299034118652, "logps/rejected": -2.0107898712158203, "loss": 1.9637, "nll_loss": 1.9097464084625244, "rewards/accuracies": 1.0, "rewards/chosen": -0.17190299928188324, "rewards/margins": 0.02917599491775036, "rewards/rejected": -0.20107899606227875, "step": 104 }, { "epoch": 0.2899551259924059, "grad_norm": 0.4307953417301178, "learning_rate": 4.770642201834863e-06, "log_odds_chosen": 0.46213921904563904, "log_odds_ratio": -0.4912022650241852, "logits/chosen": -0.1190250962972641, "logits/rejected": -0.19093452394008636, "logps/chosen": -1.7502868175506592, "logps/rejected": -2.1464428901672363, "loss": 1.9758, "nll_loss": 1.9266620874404907, "rewards/accuracies": 1.0, "rewards/chosen": -0.17502868175506592, "rewards/margins": 0.03961558640003204, "rewards/rejected": -0.21464428305625916, "step": 105 }, { "epoch": 0.2927166033828098, "grad_norm": 0.43505242466926575, "learning_rate": 4.816513761467891e-06, "log_odds_chosen": 0.4836673140525818, "log_odds_ratio": -0.49270570278167725, "logits/chosen": -0.08418025076389313, "logits/rejected": -0.21315333247184753, "logps/chosen": -1.7327262163162231, "logps/rejected": -2.145224094390869, "loss": 1.9644, "nll_loss": 1.9150831699371338, "rewards/accuracies": 0.875, "rewards/chosen": -0.17327262461185455, "rewards/margins": 0.041249774396419525, "rewards/rejected": -0.21452240645885468, "step": 106 }, { "epoch": 0.2954780807732137, "grad_norm": 0.4454819858074188, "learning_rate": 4.862385321100918e-06, "log_odds_chosen": 0.3297927975654602, "log_odds_ratio": -0.5459839701652527, "logits/chosen": -0.08551089465618134, "logits/rejected": -0.3492465913295746, "logps/chosen": -1.8108294010162354, "logps/rejected": -2.0928804874420166, "loss": 2.0393, "nll_loss": 1.9846614599227905, "rewards/accuracies": 0.875, "rewards/chosen": -0.18108293414115906, "rewards/margins": 0.02820511721074581, "rewards/rejected": -0.20928806066513062, "step": 107 }, { "epoch": 0.2982395581636175, "grad_norm": 0.4228799641132355, "learning_rate": 4.908256880733945e-06, "log_odds_chosen": 0.4139629602432251, "log_odds_ratio": -0.5125847458839417, "logits/chosen": -0.042439091950654984, "logits/rejected": -0.3392437696456909, "logps/chosen": -1.842297077178955, "logps/rejected": -2.203068971633911, "loss": 2.0532, "nll_loss": 2.00195050239563, "rewards/accuracies": 0.875, "rewards/chosen": -0.18422970175743103, "rewards/margins": 0.03607717901468277, "rewards/rejected": -0.2203068882226944, "step": 108 }, { "epoch": 0.3010010355540214, "grad_norm": 0.4132618308067322, "learning_rate": 4.954128440366973e-06, "log_odds_chosen": 0.5509217977523804, "log_odds_ratio": -0.4667437970638275, "logits/chosen": -0.047298721969127655, "logits/rejected": -0.492242693901062, "logps/chosen": -1.8542200326919556, "logps/rejected": -2.3400068283081055, "loss": 2.0568, "nll_loss": 2.0100908279418945, "rewards/accuracies": 1.0, "rewards/chosen": -0.18542201817035675, "rewards/margins": 0.04857867211103439, "rewards/rejected": -0.23400066792964935, "step": 109 }, { "epoch": 0.30376251294442524, "grad_norm": 0.40540555119514465, "learning_rate": 5e-06, "log_odds_chosen": 0.26367828249931335, "log_odds_ratio": -0.5857654213905334, "logits/chosen": 0.020869266241788864, "logits/rejected": -0.3348216116428375, "logps/chosen": -1.8042923212051392, "logps/rejected": -2.0284535884857178, "loss": 2.0564, "nll_loss": 1.997856616973877, "rewards/accuracies": 0.75, "rewards/chosen": -0.18042920529842377, "rewards/margins": 0.02241615764796734, "rewards/rejected": -0.20284539461135864, "step": 110 }, { "epoch": 0.30652399033482913, "grad_norm": 0.3964520990848541, "learning_rate": 4.999987154315977e-06, "log_odds_chosen": 0.39854103326797485, "log_odds_ratio": -0.5201252698898315, "logits/chosen": -0.07075213640928268, "logits/rejected": -0.26321282982826233, "logps/chosen": -1.7023441791534424, "logps/rejected": -2.03615665435791, "loss": 1.9356, "nll_loss": 1.883634090423584, "rewards/accuracies": 1.0, "rewards/chosen": -0.17023441195487976, "rewards/margins": 0.03338123857975006, "rewards/rejected": -0.20361566543579102, "step": 111 }, { "epoch": 0.309285467725233, "grad_norm": 0.4122212827205658, "learning_rate": 4.999948617395916e-06, "log_odds_chosen": 0.3810994029045105, "log_odds_ratio": -0.528425931930542, "logits/chosen": 0.04292220622301102, "logits/rejected": -0.24851810932159424, "logps/chosen": -1.7573786973953247, "logps/rejected": -2.084685802459717, "loss": 1.9844, "nll_loss": 1.931592583656311, "rewards/accuracies": 1.0, "rewards/chosen": -0.17573785781860352, "rewards/margins": 0.03273071348667145, "rewards/rejected": -0.20846858620643616, "step": 112 }, { "epoch": 0.31204694511563685, "grad_norm": 0.4462045729160309, "learning_rate": 4.999884389635843e-06, "log_odds_chosen": 0.392294704914093, "log_odds_ratio": -0.520103394985199, "logits/chosen": -0.046707406640052795, "logits/rejected": -0.22425656020641327, "logps/chosen": -1.8034018278121948, "logps/rejected": -2.140183687210083, "loss": 2.0382, "nll_loss": 1.9861979484558105, "rewards/accuracies": 1.0, "rewards/chosen": -0.18034018576145172, "rewards/margins": 0.03367818892002106, "rewards/rejected": -0.21401838958263397, "step": 113 }, { "epoch": 0.31480842250604074, "grad_norm": 0.40587544441223145, "learning_rate": 4.9997944716957985e-06, "log_odds_chosen": 0.48522624373435974, "log_odds_ratio": -0.49063414335250854, "logits/chosen": 0.049725521355867386, "logits/rejected": -0.3027104139328003, "logps/chosen": -1.8043849468231201, "logps/rejected": -2.224425792694092, "loss": 2.0299, "nll_loss": 1.9808719158172607, "rewards/accuracies": 0.875, "rewards/chosen": -0.18043850362300873, "rewards/margins": 0.04200407862663269, "rewards/rejected": -0.22244258224964142, "step": 114 }, { "epoch": 0.3175698998964446, "grad_norm": 0.38116294145584106, "learning_rate": 4.999678864499828e-06, "log_odds_chosen": 0.35890865325927734, "log_odds_ratio": -0.5324774384498596, "logits/chosen": 0.1463293731212616, "logits/rejected": -0.3165544271469116, "logps/chosen": -1.83231782913208, "logps/rejected": -2.1404545307159424, "loss": 2.0335, "nll_loss": 1.9802701473236084, "rewards/accuracies": 1.0, "rewards/chosen": -0.18323180079460144, "rewards/margins": 0.030813684687018394, "rewards/rejected": -0.21404549479484558, "step": 115 }, { "epoch": 0.32033137728684846, "grad_norm": 0.3846777081489563, "learning_rate": 4.999537569235975e-06, "log_odds_chosen": 0.31865543127059937, "log_odds_ratio": -0.5562014579772949, "logits/chosen": 0.06778667122125626, "logits/rejected": -0.2945130169391632, "logps/chosen": -1.8204419612884521, "logps/rejected": -2.0919411182403564, "loss": 2.0362, "nll_loss": 1.9805940389633179, "rewards/accuracies": 0.875, "rewards/chosen": -0.18204417824745178, "rewards/margins": 0.02714991755783558, "rewards/rejected": -0.2091941088438034, "step": 116 }, { "epoch": 0.32309285467725235, "grad_norm": 0.3859681189060211, "learning_rate": 4.999370587356267e-06, "log_odds_chosen": 0.2864968180656433, "log_odds_ratio": -0.5685579180717468, "logits/chosen": 0.021781759336590767, "logits/rejected": -0.41891583800315857, "logps/chosen": -1.8915698528289795, "logps/rejected": -2.1414055824279785, "loss": 2.1035, "nll_loss": 2.0466203689575195, "rewards/accuracies": 0.875, "rewards/chosen": -0.18915697932243347, "rewards/margins": 0.02498357556760311, "rewards/rejected": -0.21414057910442352, "step": 117 }, { "epoch": 0.3258543320676562, "grad_norm": 0.4066268801689148, "learning_rate": 4.9991779205767e-06, "log_odds_chosen": 0.2647791802883148, "log_odds_ratio": -0.5728141069412231, "logits/chosen": -0.04139568656682968, "logits/rejected": -0.6460073590278625, "logps/chosen": -1.757622480392456, "logps/rejected": -1.9820458889007568, "loss": 1.984, "nll_loss": 1.9267468452453613, "rewards/accuracies": 1.0, "rewards/chosen": -0.17576223611831665, "rewards/margins": 0.022442325949668884, "rewards/rejected": -0.19820457696914673, "step": 118 }, { "epoch": 0.32861580945806007, "grad_norm": 0.4128899574279785, "learning_rate": 4.998959570877224e-06, "log_odds_chosen": 0.3499869406223297, "log_odds_ratio": -0.5363956093788147, "logits/chosen": 0.16672371327877045, "logits/rejected": -0.34432125091552734, "logps/chosen": -1.9091435670852661, "logps/rejected": -2.2108755111694336, "loss": 2.1306, "nll_loss": 2.0769805908203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.19091437757015228, "rewards/margins": 0.0301731638610363, "rewards/rejected": -0.2210875302553177, "step": 119 }, { "epoch": 0.3313772868484639, "grad_norm": 0.358536958694458, "learning_rate": 4.99871554050172e-06, "log_odds_chosen": 0.42555758357048035, "log_odds_ratio": -0.5050589442253113, "logits/chosen": 0.02474893629550934, "logits/rejected": -0.25200581550598145, "logps/chosen": -1.7013750076293945, "logps/rejected": -2.060541868209839, "loss": 1.9062, "nll_loss": 1.855684757232666, "rewards/accuracies": 1.0, "rewards/chosen": -0.17013752460479736, "rewards/margins": 0.03591667488217354, "rewards/rejected": -0.2060541808605194, "step": 120 }, { "epoch": 0.3341387642388678, "grad_norm": 0.394452840089798, "learning_rate": 4.9984458319579775e-06, "log_odds_chosen": 0.423836886882782, "log_odds_ratio": -0.5092670321464539, "logits/chosen": -0.0022555014584213495, "logits/rejected": -0.3431664705276489, "logps/chosen": -1.7344661951065063, "logps/rejected": -2.0950186252593994, "loss": 1.9335, "nll_loss": 1.8825750350952148, "rewards/accuracies": 1.0, "rewards/chosen": -0.17344661056995392, "rewards/margins": 0.03605526313185692, "rewards/rejected": -0.20950186252593994, "step": 121 }, { "epoch": 0.3369002416292717, "grad_norm": 0.3641405999660492, "learning_rate": 4.99815044801767e-06, "log_odds_chosen": 0.31487271189689636, "log_odds_ratio": -0.5565272569656372, "logits/chosen": 0.12278047204017639, "logits/rejected": -0.3144981861114502, "logps/chosen": -1.7826478481292725, "logps/rejected": -2.0509307384490967, "loss": 2.0075, "nll_loss": 1.951832890510559, "rewards/accuracies": 0.875, "rewards/chosen": -0.1782647967338562, "rewards/margins": 0.026828289031982422, "rewards/rejected": -0.20509308576583862, "step": 122 }, { "epoch": 0.3396617190196755, "grad_norm": 0.4089741110801697, "learning_rate": 4.9978293917163225e-06, "log_odds_chosen": 0.1852492243051529, "log_odds_ratio": -0.6086881756782532, "logits/chosen": 0.059370554983615875, "logits/rejected": -0.2047065794467926, "logps/chosen": -1.9199140071868896, "logps/rejected": -2.0819344520568848, "loss": 2.1197, "nll_loss": 2.0588343143463135, "rewards/accuracies": 0.75, "rewards/chosen": -0.1919914036989212, "rewards/margins": 0.016202054917812347, "rewards/rejected": -0.20819345116615295, "step": 123 }, { "epoch": 0.3424231964100794, "grad_norm": 0.39316678047180176, "learning_rate": 4.997482666353287e-06, "log_odds_chosen": 0.3156554102897644, "log_odds_ratio": -0.5562319755554199, "logits/chosen": -0.036051761358976364, "logits/rejected": -0.20177382230758667, "logps/chosen": -1.8153338432312012, "logps/rejected": -2.0875864028930664, "loss": 2.0271, "nll_loss": 1.9714655876159668, "rewards/accuracies": 0.75, "rewards/chosen": -0.18153339624404907, "rewards/margins": 0.027225244790315628, "rewards/rejected": -0.2087586373090744, "step": 124 }, { "epoch": 0.34518467380048323, "grad_norm": 0.37397369742393494, "learning_rate": 4.997110275491702e-06, "log_odds_chosen": 0.47729045152664185, "log_odds_ratio": -0.4998089373111725, "logits/chosen": 0.06823548674583435, "logits/rejected": -0.6389302015304565, "logps/chosen": -1.725682258605957, "logps/rejected": -2.133211612701416, "loss": 1.9424, "nll_loss": 1.8924020528793335, "rewards/accuracies": 1.0, "rewards/chosen": -0.17256823182106018, "rewards/margins": 0.04075293242931366, "rewards/rejected": -0.21332114934921265, "step": 125 }, { "epoch": 0.3479461511908871, "grad_norm": 0.36338478326797485, "learning_rate": 4.9967122229584614e-06, "log_odds_chosen": 0.46506431698799133, "log_odds_ratio": -0.49022355675697327, "logits/chosen": 0.062216617166996, "logits/rejected": -0.4744764268398285, "logps/chosen": -1.7237508296966553, "logps/rejected": -2.1211488246917725, "loss": 1.9404, "nll_loss": 1.891422152519226, "rewards/accuracies": 1.0, "rewards/chosen": -0.17237509787082672, "rewards/margins": 0.03973980247974396, "rewards/rejected": -0.2121148705482483, "step": 126 }, { "epoch": 0.350707628581291, "grad_norm": 0.3949665427207947, "learning_rate": 4.996288512844169e-06, "log_odds_chosen": 0.208975687623024, "log_odds_ratio": -0.603169322013855, "logits/chosen": 0.15239077806472778, "logits/rejected": -0.14533445239067078, "logps/chosen": -1.8812259435653687, "logps/rejected": -2.058507204055786, "loss": 2.1066, "nll_loss": 2.0462498664855957, "rewards/accuracies": 0.625, "rewards/chosen": -0.18812260031700134, "rewards/margins": 0.01772812381386757, "rewards/rejected": -0.2058507204055786, "step": 127 }, { "epoch": 0.35346910597169484, "grad_norm": 0.3274211585521698, "learning_rate": 4.995839149503103e-06, "log_odds_chosen": 0.3612444996833801, "log_odds_ratio": -0.5432137250900269, "logits/chosen": 0.15867066383361816, "logits/rejected": -0.37713369727134705, "logps/chosen": -1.7504801750183105, "logps/rejected": -2.060889959335327, "loss": 1.9636, "nll_loss": 1.9092310667037964, "rewards/accuracies": 0.875, "rewards/chosen": -0.17504799365997314, "rewards/margins": 0.03104100562632084, "rewards/rejected": -0.20608901977539062, "step": 128 }, { "epoch": 0.35623058336209873, "grad_norm": 0.3613954186439514, "learning_rate": 4.995364137553166e-06, "log_odds_chosen": 0.2536008656024933, "log_odds_ratio": -0.5839525461196899, "logits/chosen": 0.025492653250694275, "logits/rejected": -0.21177223324775696, "logps/chosen": -1.7983472347259521, "logps/rejected": -2.013514995574951, "loss": 2.0121, "nll_loss": 1.953747034072876, "rewards/accuracies": 0.875, "rewards/chosen": -0.1798347383737564, "rewards/margins": 0.021516768261790276, "rewards/rejected": -0.20135147869586945, "step": 129 }, { "epoch": 0.35899206075250256, "grad_norm": 0.33361607789993286, "learning_rate": 4.994863481875842e-06, "log_odds_chosen": 0.4904846251010895, "log_odds_ratio": -0.4827660620212555, "logits/chosen": 0.06080925092101097, "logits/rejected": -0.42839962244033813, "logps/chosen": -1.7444632053375244, "logps/rejected": -2.1671104431152344, "loss": 1.9554, "nll_loss": 1.9071358442306519, "rewards/accuracies": 1.0, "rewards/chosen": -0.17444632947444916, "rewards/margins": 0.04226472228765488, "rewards/rejected": -0.21671104431152344, "step": 130 }, { "epoch": 0.36175353814290645, "grad_norm": 0.38626936078071594, "learning_rate": 4.99433718761614e-06, "log_odds_chosen": 0.4231716990470886, "log_odds_ratio": -0.5083524584770203, "logits/chosen": 0.15689942240715027, "logits/rejected": -0.27488887310028076, "logps/chosen": -1.7832603454589844, "logps/rejected": -2.1480154991149902, "loss": 1.9872, "nll_loss": 1.9363242387771606, "rewards/accuracies": 1.0, "rewards/chosen": -0.1783260554075241, "rewards/margins": 0.036475520581007004, "rewards/rejected": -0.21480156481266022, "step": 131 }, { "epoch": 0.36451501553331034, "grad_norm": 0.3283027410507202, "learning_rate": 4.993785260182552e-06, "log_odds_chosen": 0.4390679895877838, "log_odds_ratio": -0.5016705393791199, "logits/chosen": 0.13413819670677185, "logits/rejected": -0.5499831438064575, "logps/chosen": -1.7239453792572021, "logps/rejected": -2.099557399749756, "loss": 1.9329, "nll_loss": 1.882704734802246, "rewards/accuracies": 1.0, "rewards/chosen": -0.1723945438861847, "rewards/margins": 0.037561215460300446, "rewards/rejected": -0.20995575189590454, "step": 132 }, { "epoch": 0.36727649292371417, "grad_norm": 0.29404011368751526, "learning_rate": 4.993207705246983e-06, "log_odds_chosen": 0.33607491850852966, "log_odds_ratio": -0.5448395609855652, "logits/chosen": 0.09872237592935562, "logits/rejected": -0.3414973318576813, "logps/chosen": -1.7783253192901611, "logps/rejected": -2.0684056282043457, "loss": 1.9796, "nll_loss": 1.925091028213501, "rewards/accuracies": 1.0, "rewards/chosen": -0.17783252894878387, "rewards/margins": 0.029008038341999054, "rewards/rejected": -0.20684055984020233, "step": 133 }, { "epoch": 0.37003797031411806, "grad_norm": 0.303219199180603, "learning_rate": 4.992604528744705e-06, "log_odds_chosen": 0.37202000617980957, "log_odds_ratio": -0.5278716087341309, "logits/chosen": 0.0883011743426323, "logits/rejected": -0.6590756177902222, "logps/chosen": -1.7620971202850342, "logps/rejected": -2.0787768363952637, "loss": 1.9649, "nll_loss": 1.9120779037475586, "rewards/accuracies": 1.0, "rewards/chosen": -0.1762097179889679, "rewards/margins": 0.03166797757148743, "rewards/rejected": -0.20787768065929413, "step": 134 }, { "epoch": 0.37279944770452195, "grad_norm": 0.3061475455760956, "learning_rate": 4.9919757368742895e-06, "log_odds_chosen": 0.43486303091049194, "log_odds_ratio": -0.5079333782196045, "logits/chosen": 0.13486391305923462, "logits/rejected": -0.44202011823654175, "logps/chosen": -1.683558464050293, "logps/rejected": -2.0501785278320312, "loss": 1.8894, "nll_loss": 1.8386157751083374, "rewards/accuracies": 1.0, "rewards/chosen": -0.16835585236549377, "rewards/margins": 0.0366620309650898, "rewards/rejected": -0.20501787960529327, "step": 135 }, { "epoch": 0.3755609250949258, "grad_norm": 0.3027716279029846, "learning_rate": 4.991321336097546e-06, "log_odds_chosen": 0.3602009117603302, "log_odds_ratio": -0.5340627431869507, "logits/chosen": 0.09272217750549316, "logits/rejected": -0.37184590101242065, "logps/chosen": -1.8042678833007812, "logps/rejected": -2.1151676177978516, "loss": 1.9799, "nll_loss": 1.9264534711837769, "rewards/accuracies": 1.0, "rewards/chosen": -0.18042679131031036, "rewards/margins": 0.031089982017874718, "rewards/rejected": -0.21151678264141083, "step": 136 }, { "epoch": 0.37832240248532967, "grad_norm": 0.2968030273914337, "learning_rate": 4.990641333139455e-06, "log_odds_chosen": 0.3864700496196747, "log_odds_ratio": -0.5240495204925537, "logits/chosen": 0.15802569687366486, "logits/rejected": -0.40965431928634644, "logps/chosen": -1.6786091327667236, "logps/rejected": -2.002851724624634, "loss": 1.8816, "nll_loss": 1.8292038440704346, "rewards/accuracies": 1.0, "rewards/chosen": -0.16786089539527893, "rewards/margins": 0.032424286007881165, "rewards/rejected": -0.2002851963043213, "step": 137 }, { "epoch": 0.3810838798757335, "grad_norm": 0.2909601330757141, "learning_rate": 4.989935734988098e-06, "log_odds_chosen": 0.2820569574832916, "log_odds_ratio": -0.5659909844398499, "logits/chosen": 0.20683330297470093, "logits/rejected": -0.5098833441734314, "logps/chosen": -1.7661350965499878, "logps/rejected": -2.0048882961273193, "loss": 1.9544, "nll_loss": 1.8978168964385986, "rewards/accuracies": 0.875, "rewards/chosen": -0.17661352455615997, "rewards/margins": 0.023875314742326736, "rewards/rejected": -0.2004888355731964, "step": 138 }, { "epoch": 0.3838453572661374, "grad_norm": 0.3169582188129425, "learning_rate": 4.989204548894589e-06, "log_odds_chosen": 0.27746957540512085, "log_odds_ratio": -0.5728934407234192, "logits/chosen": 0.26577499508857727, "logits/rejected": -0.32127267122268677, "logps/chosen": -1.846369743347168, "logps/rejected": -2.0836005210876465, "loss": 2.0312, "nll_loss": 1.973919153213501, "rewards/accuracies": 0.75, "rewards/chosen": -0.18463698029518127, "rewards/margins": 0.023723063990473747, "rewards/rejected": -0.20836003124713898, "step": 139 }, { "epoch": 0.3866068346565413, "grad_norm": 0.2856157720088959, "learning_rate": 4.988447782372996e-06, "log_odds_chosen": 0.2677723169326782, "log_odds_ratio": -0.5744235515594482, "logits/chosen": 0.08042767643928528, "logits/rejected": -0.489827036857605, "logps/chosen": -1.6772124767303467, "logps/rejected": -1.9024029970169067, "loss": 1.8706, "nll_loss": 1.8131682872772217, "rewards/accuracies": 0.875, "rewards/chosen": -0.1677212417125702, "rewards/margins": 0.022519057616591454, "rewards/rejected": -0.1902403086423874, "step": 140 }, { "epoch": 0.3893683120469451, "grad_norm": 0.26832813024520874, "learning_rate": 4.9876654432002655e-06, "log_odds_chosen": 0.3760983943939209, "log_odds_ratio": -0.5314284563064575, "logits/chosen": 0.22744062542915344, "logits/rejected": -0.6199472546577454, "logps/chosen": -1.7124302387237549, "logps/rejected": -2.035111665725708, "loss": 1.8915, "nll_loss": 1.838379979133606, "rewards/accuracies": 0.875, "rewards/chosen": -0.17124304175376892, "rewards/margins": 0.032268136739730835, "rewards/rejected": -0.20351116359233856, "step": 141 }, { "epoch": 0.392129789437349, "grad_norm": 0.2787982225418091, "learning_rate": 4.986857539416144e-06, "log_odds_chosen": 0.4650447368621826, "log_odds_ratio": -0.49311909079551697, "logits/chosen": 0.13020291924476624, "logits/rejected": -0.6509877443313599, "logps/chosen": -1.6808263063430786, "logps/rejected": -2.0721030235290527, "loss": 1.8625, "nll_loss": 1.813225269317627, "rewards/accuracies": 0.875, "rewards/chosen": -0.16808265447616577, "rewards/margins": 0.039127662777900696, "rewards/rejected": -0.20721031725406647, "step": 142 }, { "epoch": 0.39489126682775283, "grad_norm": 0.2971290349960327, "learning_rate": 4.986024079323092e-06, "log_odds_chosen": 0.20813080668449402, "log_odds_ratio": -0.5995200276374817, "logits/chosen": 0.1712779998779297, "logits/rejected": -0.2880682647228241, "logps/chosen": -1.6474640369415283, "logps/rejected": -1.8199793100357056, "loss": 1.873, "nll_loss": 1.8130154609680176, "rewards/accuracies": 0.875, "rewards/chosen": -0.16474640369415283, "rewards/margins": 0.017251526936888695, "rewards/rejected": -0.18199792504310608, "step": 143 }, { "epoch": 0.3976527442181567, "grad_norm": 0.28045693039894104, "learning_rate": 4.985165071486201e-06, "log_odds_chosen": 0.3738395869731903, "log_odds_ratio": -0.5252775549888611, "logits/chosen": 0.10901468247175217, "logits/rejected": -0.5548363924026489, "logps/chosen": -1.7671691179275513, "logps/rejected": -2.086446762084961, "loss": 1.9397, "nll_loss": 1.8872014284133911, "rewards/accuracies": 1.0, "rewards/chosen": -0.1767169088125229, "rewards/margins": 0.031927771866321564, "rewards/rejected": -0.20864468812942505, "step": 144 }, { "epoch": 0.4004142216085606, "grad_norm": 0.28425726294517517, "learning_rate": 4.984280524733107e-06, "log_odds_chosen": 0.44026607275009155, "log_odds_ratio": -0.5015271902084351, "logits/chosen": 0.08948800712823868, "logits/rejected": -0.4244663119316101, "logps/chosen": -1.698014736175537, "logps/rejected": -2.068047285079956, "loss": 1.8842, "nll_loss": 1.8340739011764526, "rewards/accuracies": 1.0, "rewards/chosen": -0.1698014885187149, "rewards/margins": 0.037003256380558014, "rewards/rejected": -0.20680472254753113, "step": 145 }, { "epoch": 0.40317569899896444, "grad_norm": 0.27275997400283813, "learning_rate": 4.983370448153896e-06, "log_odds_chosen": 0.34414318203926086, "log_odds_ratio": -0.53827965259552, "logits/chosen": 0.12077900767326355, "logits/rejected": -0.45666831731796265, "logps/chosen": -1.7140839099884033, "logps/rejected": -2.004852056503296, "loss": 1.8944, "nll_loss": 1.8405368328094482, "rewards/accuracies": 1.0, "rewards/chosen": -0.17140838503837585, "rewards/margins": 0.029076814651489258, "rewards/rejected": -0.2004851996898651, "step": 146 }, { "epoch": 0.4059371763893683, "grad_norm": 0.2800770699977875, "learning_rate": 4.9824348511010115e-06, "log_odds_chosen": 0.33458200097084045, "log_odds_ratio": -0.5480896830558777, "logits/chosen": 0.24399758875370026, "logits/rejected": -0.5033762454986572, "logps/chosen": -1.8108501434326172, "logps/rejected": -2.099644422531128, "loss": 1.983, "nll_loss": 1.9281907081604004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1810850203037262, "rewards/margins": 0.02887941710650921, "rewards/rejected": -0.20996443927288055, "step": 147 }, { "epoch": 0.40869865377977216, "grad_norm": 0.25797146558761597, "learning_rate": 4.981473743189163e-06, "log_odds_chosen": 0.5845157504081726, "log_odds_ratio": -0.45159903168678284, "logits/chosen": 0.005943242460489273, "logits/rejected": -0.7690349221229553, "logps/chosen": -1.5877741575241089, "logps/rejected": -2.0803065299987793, "loss": 1.7773, "nll_loss": 1.73209547996521, "rewards/accuracies": 1.0, "rewards/chosen": -0.1587774157524109, "rewards/margins": 0.04925324767827988, "rewards/rejected": -0.20803067088127136, "step": 148 }, { "epoch": 0.41146013117017605, "grad_norm": 0.27324819564819336, "learning_rate": 4.98048713429522e-06, "log_odds_chosen": 0.3932475745677948, "log_odds_ratio": -0.5176883339881897, "logits/chosen": 0.20163178443908691, "logits/rejected": -0.283429354429245, "logps/chosen": -1.7028628587722778, "logps/rejected": -2.03279709815979, "loss": 1.8839, "nll_loss": 1.8321328163146973, "rewards/accuracies": 1.0, "rewards/chosen": -0.17028628289699554, "rewards/margins": 0.032993413507938385, "rewards/rejected": -0.20327968895435333, "step": 149 }, { "epoch": 0.41422160856057993, "grad_norm": 0.30329883098602295, "learning_rate": 4.979475034558115e-06, "log_odds_chosen": 0.3415054678916931, "log_odds_ratio": -0.5400842428207397, "logits/chosen": 0.27814820408821106, "logits/rejected": -0.420722633600235, "logps/chosen": -1.834177017211914, "logps/rejected": -2.1293845176696777, "loss": 2.0005, "nll_loss": 1.9464527368545532, "rewards/accuracies": 1.0, "rewards/chosen": -0.18341770768165588, "rewards/margins": 0.029520753771066666, "rewards/rejected": -0.21293845772743225, "step": 150 }, { "epoch": 0.41698308595098377, "grad_norm": 0.2631519138813019, "learning_rate": 4.978437454378741e-06, "log_odds_chosen": 0.31423458456993103, "log_odds_ratio": -0.5544165968894958, "logits/chosen": 0.25030598044395447, "logits/rejected": -0.45834630727767944, "logps/chosen": -1.7350115776062012, "logps/rejected": -1.998989462852478, "loss": 1.9237, "nll_loss": 1.8682823181152344, "rewards/accuracies": 0.875, "rewards/chosen": -0.1735011488199234, "rewards/margins": 0.026397792622447014, "rewards/rejected": -0.19989895820617676, "step": 151 }, { "epoch": 0.41974456334138766, "grad_norm": 0.28724023699760437, "learning_rate": 4.977374404419838e-06, "log_odds_chosen": 0.34610986709594727, "log_odds_ratio": -0.5409384965896606, "logits/chosen": 0.23072689771652222, "logits/rejected": -0.4912022054195404, "logps/chosen": -1.7317731380462646, "logps/rejected": -2.0284037590026855, "loss": 1.9099, "nll_loss": 1.8557640314102173, "rewards/accuracies": 1.0, "rewards/chosen": -0.1731773018836975, "rewards/margins": 0.029663076624274254, "rewards/rejected": -0.2028403878211975, "step": 152 }, { "epoch": 0.4225060407317915, "grad_norm": 0.2612636089324951, "learning_rate": 4.976285895605888e-06, "log_odds_chosen": 0.4580684304237366, "log_odds_ratio": -0.4960138499736786, "logits/chosen": 0.18824972212314606, "logits/rejected": -0.5351172685623169, "logps/chosen": -1.6807944774627686, "logps/rejected": -2.0613059997558594, "loss": 1.8584, "nll_loss": 1.8087894916534424, "rewards/accuracies": 1.0, "rewards/chosen": -0.1680794358253479, "rewards/margins": 0.038051165640354156, "rewards/rejected": -0.20613062381744385, "step": 153 }, { "epoch": 0.4252675181221954, "grad_norm": 0.2724829614162445, "learning_rate": 4.9751719391230055e-06, "log_odds_chosen": 0.24739423394203186, "log_odds_ratio": -0.5839247703552246, "logits/chosen": 0.24238519370555878, "logits/rejected": -0.4675644040107727, "logps/chosen": -1.6522984504699707, "logps/rejected": -1.8578873872756958, "loss": 1.8598, "nll_loss": 1.8013783693313599, "rewards/accuracies": 0.875, "rewards/chosen": -0.16522985696792603, "rewards/margins": 0.02055889368057251, "rewards/rejected": -0.18578873574733734, "step": 154 }, { "epoch": 0.42802899551259926, "grad_norm": 0.27581965923309326, "learning_rate": 4.974032546418816e-06, "log_odds_chosen": 0.42455926537513733, "log_odds_ratio": -0.5114243030548096, "logits/chosen": 0.2685522735118866, "logits/rejected": -0.49730056524276733, "logps/chosen": -1.6981408596038818, "logps/rejected": -2.056694984436035, "loss": 1.8954, "nll_loss": 1.8442238569259644, "rewards/accuracies": 1.0, "rewards/chosen": -0.1698141098022461, "rewards/margins": 0.035855405032634735, "rewards/rejected": -0.20566949248313904, "step": 155 }, { "epoch": 0.4307904729030031, "grad_norm": 0.2632681131362915, "learning_rate": 4.9728677292023405e-06, "log_odds_chosen": 0.17779600620269775, "log_odds_ratio": -0.6146007776260376, "logits/chosen": 0.3470558226108551, "logits/rejected": -0.47237658500671387, "logps/chosen": -1.8082042932510376, "logps/rejected": -1.9588541984558105, "loss": 1.9785, "nll_loss": 1.9170793294906616, "rewards/accuracies": 0.875, "rewards/chosen": -0.18082045018672943, "rewards/margins": 0.015064971521496773, "rewards/rejected": -0.19588540494441986, "step": 156 }, { "epoch": 0.433551950293407, "grad_norm": 0.25831714272499084, "learning_rate": 4.971677499443882e-06, "log_odds_chosen": 0.30535322427749634, "log_odds_ratio": -0.5562926530838013, "logits/chosen": 0.22543753683567047, "logits/rejected": -0.5224329829216003, "logps/chosen": -1.7110515832901, "logps/rejected": -1.9678313732147217, "loss": 1.8946, "nll_loss": 1.838951587677002, "rewards/accuracies": 0.875, "rewards/chosen": -0.17110514640808105, "rewards/margins": 0.025677980855107307, "rewards/rejected": -0.1967831254005432, "step": 157 }, { "epoch": 0.4363134276838108, "grad_norm": 0.24926158785820007, "learning_rate": 4.97046186937489e-06, "log_odds_chosen": 0.2828327715396881, "log_odds_ratio": -0.5760643482208252, "logits/chosen": 0.1486169695854187, "logits/rejected": -0.5880769491195679, "logps/chosen": -1.690523386001587, "logps/rejected": -1.9291006326675415, "loss": 1.8625, "nll_loss": 1.804898977279663, "rewards/accuracies": 0.875, "rewards/chosen": -0.1690523475408554, "rewards/margins": 0.023857703432440758, "rewards/rejected": -0.1929100602865219, "step": 158 }, { "epoch": 0.4390749050742147, "grad_norm": 0.26394572854042053, "learning_rate": 4.9692208514878445e-06, "log_odds_chosen": 0.21585100889205933, "log_odds_ratio": -0.593828558921814, "logits/chosen": 0.3207181692123413, "logits/rejected": -0.3474537134170532, "logps/chosen": -1.8141443729400635, "logps/rejected": -1.9961789846420288, "loss": 1.9958, "nll_loss": 1.9364500045776367, "rewards/accuracies": 1.0, "rewards/chosen": -0.1814144253730774, "rewards/margins": 0.01820346899330616, "rewards/rejected": -0.1996179074048996, "step": 159 }, { "epoch": 0.4418363824646186, "grad_norm": 0.26699379086494446, "learning_rate": 4.967954458536126e-06, "log_odds_chosen": 0.34597551822662354, "log_odds_ratio": -0.5411776900291443, "logits/chosen": 0.29670047760009766, "logits/rejected": -0.25073421001434326, "logps/chosen": -1.6443804502487183, "logps/rejected": -1.9350707530975342, "loss": 1.833, "nll_loss": 1.7788795232772827, "rewards/accuracies": 1.0, "rewards/chosen": -0.16443803906440735, "rewards/margins": 0.029069023206830025, "rewards/rejected": -0.19350707530975342, "step": 160 }, { "epoch": 0.4445978598550224, "grad_norm": 0.2796018719673157, "learning_rate": 4.96666270353388e-06, "log_odds_chosen": 0.3187919855117798, "log_odds_ratio": -0.5501160025596619, "logits/chosen": 0.24211375415325165, "logits/rejected": -0.5377556681632996, "logps/chosen": -1.704611897468567, "logps/rejected": -1.9735724925994873, "loss": 1.8668, "nll_loss": 1.8117769956588745, "rewards/accuracies": 1.0, "rewards/chosen": -0.17046120762825012, "rewards/margins": 0.02689606510102749, "rewards/rejected": -0.19735725224018097, "step": 161 }, { "epoch": 0.4473593372454263, "grad_norm": 0.2614901065826416, "learning_rate": 4.965345599755888e-06, "log_odds_chosen": 0.3870270848274231, "log_odds_ratio": -0.5230943560600281, "logits/chosen": 0.20484450459480286, "logits/rejected": -0.6116334199905396, "logps/chosen": -1.7666045427322388, "logps/rejected": -2.0972108840942383, "loss": 1.9248, "nll_loss": 1.8725322484970093, "rewards/accuracies": 1.0, "rewards/chosen": -0.1766604632139206, "rewards/margins": 0.03306063264608383, "rewards/rejected": -0.20972107350826263, "step": 162 }, { "epoch": 0.45012081463583015, "grad_norm": 0.253493994474411, "learning_rate": 4.964003160737429e-06, "log_odds_chosen": 0.35205185413360596, "log_odds_ratio": -0.5439150333404541, "logits/chosen": 0.24726349115371704, "logits/rejected": -0.7317219972610474, "logps/chosen": -1.6719884872436523, "logps/rejected": -1.9640897512435913, "loss": 1.8488, "nll_loss": 1.7943804264068604, "rewards/accuracies": 0.875, "rewards/chosen": -0.16719885170459747, "rewards/margins": 0.029210133478045464, "rewards/rejected": -0.19640898704528809, "step": 163 }, { "epoch": 0.45288229202623403, "grad_norm": 0.23761652410030365, "learning_rate": 4.9626354002741424e-06, "log_odds_chosen": 0.38894832134246826, "log_odds_ratio": -0.5217959880828857, "logits/chosen": 0.2658887505531311, "logits/rejected": -0.6289904117584229, "logps/chosen": -1.6253769397735596, "logps/rejected": -1.9467687606811523, "loss": 1.7998, "nll_loss": 1.747636318206787, "rewards/accuracies": 1.0, "rewards/chosen": -0.16253769397735596, "rewards/margins": 0.03213919699192047, "rewards/rejected": -0.19467687606811523, "step": 164 }, { "epoch": 0.4556437694166379, "grad_norm": 0.2694617509841919, "learning_rate": 4.9612423324218816e-06, "log_odds_chosen": 0.383113294839859, "log_odds_ratio": -0.523147463798523, "logits/chosen": 0.29493993520736694, "logits/rejected": -0.8007028102874756, "logps/chosen": -1.798284649848938, "logps/rejected": -2.128204584121704, "loss": 1.9598, "nll_loss": 1.907509684562683, "rewards/accuracies": 1.0, "rewards/chosen": -0.1798284649848938, "rewards/margins": 0.03299199044704437, "rewards/rejected": -0.21282047033309937, "step": 165 }, { "epoch": 0.45840524680704176, "grad_norm": 0.27424952387809753, "learning_rate": 4.959823971496575e-06, "log_odds_chosen": 0.3006168007850647, "log_odds_ratio": -0.5665806531906128, "logits/chosen": 0.21390017867088318, "logits/rejected": -0.4561406373977661, "logps/chosen": -1.7170822620391846, "logps/rejected": -1.9759535789489746, "loss": 1.8963, "nll_loss": 1.8396110534667969, "rewards/accuracies": 0.75, "rewards/chosen": -0.17170822620391846, "rewards/margins": 0.025887131690979004, "rewards/rejected": -0.19759535789489746, "step": 166 }, { "epoch": 0.46116672419744564, "grad_norm": 0.28332701325416565, "learning_rate": 4.958380332074074e-06, "log_odds_chosen": 0.3598293364048004, "log_odds_ratio": -0.5315272212028503, "logits/chosen": 0.3352913558483124, "logits/rejected": -0.34136950969696045, "logps/chosen": -1.6895995140075684, "logps/rejected": -1.9940197467803955, "loss": 1.8818, "nll_loss": 1.8286209106445312, "rewards/accuracies": 1.0, "rewards/chosen": -0.16895994544029236, "rewards/margins": 0.030442016199231148, "rewards/rejected": -0.19940195977687836, "step": 167 }, { "epoch": 0.4639282015878495, "grad_norm": 0.2582882344722748, "learning_rate": 4.95691142899001e-06, "log_odds_chosen": 0.4415035843849182, "log_odds_ratio": -0.5072202682495117, "logits/chosen": 0.23933230340480804, "logits/rejected": -0.5575604438781738, "logps/chosen": -1.6840240955352783, "logps/rejected": -2.0607402324676514, "loss": 1.8592, "nll_loss": 1.8084406852722168, "rewards/accuracies": 1.0, "rewards/chosen": -0.16840240359306335, "rewards/margins": 0.037671614438295364, "rewards/rejected": -0.20607402920722961, "step": 168 }, { "epoch": 0.46668967897825336, "grad_norm": 0.2700425982475281, "learning_rate": 4.955417277339633e-06, "log_odds_chosen": 0.33803892135620117, "log_odds_ratio": -0.54216468334198, "logits/chosen": 0.2614552974700928, "logits/rejected": -0.5953667163848877, "logps/chosen": -1.7099006175994873, "logps/rejected": -1.994602918624878, "loss": 1.8657, "nll_loss": 1.8114964962005615, "rewards/accuracies": 0.875, "rewards/chosen": -0.17099007964134216, "rewards/margins": 0.028470242395997047, "rewards/rejected": -0.19946029782295227, "step": 169 }, { "epoch": 0.46945115636865725, "grad_norm": 0.2598535716533661, "learning_rate": 4.953897892477664e-06, "log_odds_chosen": 0.37775442004203796, "log_odds_ratio": -0.5379059910774231, "logits/chosen": 0.19298075139522552, "logits/rejected": -0.7334005832672119, "logps/chosen": -1.6427866220474243, "logps/rejected": -1.9675416946411133, "loss": 1.8149, "nll_loss": 1.7611249685287476, "rewards/accuracies": 0.75, "rewards/chosen": -0.16427867114543915, "rewards/margins": 0.032475508749485016, "rewards/rejected": -0.19675418734550476, "step": 170 }, { "epoch": 0.4722126337590611, "grad_norm": 0.26744216680526733, "learning_rate": 4.952353290018132e-06, "log_odds_chosen": 0.37955784797668457, "log_odds_ratio": -0.5295799374580383, "logits/chosen": 0.27365049719810486, "logits/rejected": -0.16527330875396729, "logps/chosen": -1.7106455564498901, "logps/rejected": -2.0328187942504883, "loss": 1.8951, "nll_loss": 1.842104196548462, "rewards/accuracies": 1.0, "rewards/chosen": -0.171064555644989, "rewards/margins": 0.032217323780059814, "rewards/rejected": -0.20328189432621002, "step": 171 }, { "epoch": 0.474974111149465, "grad_norm": 0.298951655626297, "learning_rate": 4.950783485834218e-06, "log_odds_chosen": 0.4051695168018341, "log_odds_ratio": -0.5118554830551147, "logits/chosen": 0.23584626615047455, "logits/rejected": -0.5780523419380188, "logps/chosen": -1.6387077569961548, "logps/rejected": -1.975178599357605, "loss": 1.8228, "nll_loss": 1.7716267108917236, "rewards/accuracies": 1.0, "rewards/chosen": -0.16387078166007996, "rewards/margins": 0.0336470901966095, "rewards/rejected": -0.19751787185668945, "step": 172 }, { "epoch": 0.4777355885398688, "grad_norm": 0.23760944604873657, "learning_rate": 4.949188496058089e-06, "log_odds_chosen": 0.308282732963562, "log_odds_ratio": -0.5617873668670654, "logits/chosen": 0.2496112585067749, "logits/rejected": -0.595442533493042, "logps/chosen": -1.6498539447784424, "logps/rejected": -1.9084365367889404, "loss": 1.8249, "nll_loss": 1.768703818321228, "rewards/accuracies": 0.875, "rewards/chosen": -0.16498540341854095, "rewards/margins": 0.02585826814174652, "rewards/rejected": -0.19084367156028748, "step": 173 }, { "epoch": 0.4804970659302727, "grad_norm": 0.27792298793792725, "learning_rate": 4.947568337080733e-06, "log_odds_chosen": 0.12510845065116882, "log_odds_ratio": -0.6408029198646545, "logits/chosen": 0.19703012704849243, "logits/rejected": -0.5250687599182129, "logps/chosen": -1.7701200246810913, "logps/rejected": -1.877974033355713, "loss": 1.9402, "nll_loss": 1.876126766204834, "rewards/accuracies": 0.625, "rewards/chosen": -0.17701202630996704, "rewards/margins": 0.010785380378365517, "rewards/rejected": -0.1877973973751068, "step": 174 }, { "epoch": 0.4832585433206766, "grad_norm": 0.26834768056869507, "learning_rate": 4.945923025551789e-06, "log_odds_chosen": 0.3476155996322632, "log_odds_ratio": -0.535990834236145, "logits/chosen": 0.25689467787742615, "logits/rejected": -0.616847813129425, "logps/chosen": -1.7164323329925537, "logps/rejected": -2.0102245807647705, "loss": 1.8654, "nll_loss": 1.8118302822113037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1716432273387909, "rewards/margins": 0.029379239305853844, "rewards/rejected": -0.2010224610567093, "step": 175 }, { "epoch": 0.4860200207110804, "grad_norm": 0.2374398410320282, "learning_rate": 4.944252578379379e-06, "log_odds_chosen": 0.2779533267021179, "log_odds_ratio": -0.5700355172157288, "logits/chosen": 0.19578704237937927, "logits/rejected": -0.759753942489624, "logps/chosen": -1.6441093683242798, "logps/rejected": -1.8757362365722656, "loss": 1.8093, "nll_loss": 1.752274990081787, "rewards/accuracies": 0.875, "rewards/chosen": -0.16441094875335693, "rewards/margins": 0.023162685334682465, "rewards/rejected": -0.1875736117362976, "step": 176 }, { "epoch": 0.4887814981014843, "grad_norm": 0.259815514087677, "learning_rate": 4.942557012729933e-06, "log_odds_chosen": 0.3541259169578552, "log_odds_ratio": -0.5466777086257935, "logits/chosen": 0.26096147298812866, "logits/rejected": -0.6699164509773254, "logps/chosen": -1.7012748718261719, "logps/rejected": -1.9999008178710938, "loss": 1.8742, "nll_loss": 1.819519281387329, "rewards/accuracies": 0.875, "rewards/chosen": -0.1701274961233139, "rewards/margins": 0.02986258640885353, "rewards/rejected": -0.19999009370803833, "step": 177 }, { "epoch": 0.49154297549188813, "grad_norm": 0.2553425133228302, "learning_rate": 4.940836346028011e-06, "log_odds_chosen": 0.5353389978408813, "log_odds_ratio": -0.46747249364852905, "logits/chosen": 0.19104404747486115, "logits/rejected": -0.7688809633255005, "logps/chosen": -1.6563342809677124, "logps/rejected": -2.104980945587158, "loss": 1.8241, "nll_loss": 1.7773433923721313, "rewards/accuracies": 1.0, "rewards/chosen": -0.165633425116539, "rewards/margins": 0.04486468806862831, "rewards/rejected": -0.21049810945987701, "step": 178 }, { "epoch": 0.494304452882292, "grad_norm": 0.2521730661392212, "learning_rate": 4.9390905959561254e-06, "log_odds_chosen": 0.37528765201568604, "log_odds_ratio": -0.5245440602302551, "logits/chosen": 0.32407164573669434, "logits/rejected": -0.7876682281494141, "logps/chosen": -1.7645964622497559, "logps/rejected": -2.0821878910064697, "loss": 1.9004, "nll_loss": 1.847988247871399, "rewards/accuracies": 1.0, "rewards/chosen": -0.1764596402645111, "rewards/margins": 0.03175915777683258, "rewards/rejected": -0.2082187980413437, "step": 179 }, { "epoch": 0.4970659302726959, "grad_norm": 0.263175904750824, "learning_rate": 4.937319780454559e-06, "log_odds_chosen": 0.2128462940454483, "log_odds_ratio": -0.5969148874282837, "logits/chosen": 0.281113862991333, "logits/rejected": -0.6618841290473938, "logps/chosen": -1.717787504196167, "logps/rejected": -1.8978548049926758, "loss": 1.8852, "nll_loss": 1.8254907131195068, "rewards/accuracies": 0.75, "rewards/chosen": -0.17177876830101013, "rewards/margins": 0.018006734549999237, "rewards/rejected": -0.18978549540042877, "step": 180 }, { "epoch": 0.49982740766309974, "grad_norm": 0.2627811133861542, "learning_rate": 4.935523917721182e-06, "log_odds_chosen": 0.32824423909187317, "log_odds_ratio": -0.5476853847503662, "logits/chosen": 0.3165915608406067, "logits/rejected": -0.5085580945014954, "logps/chosen": -1.7242554426193237, "logps/rejected": -1.9997884035110474, "loss": 1.9077, "nll_loss": 1.852901816368103, "rewards/accuracies": 0.875, "rewards/chosen": -0.1724255532026291, "rewards/margins": 0.027553284540772438, "rewards/rejected": -0.19997884333133698, "step": 181 }, { "epoch": 0.5025888850535036, "grad_norm": 0.2430126965045929, "learning_rate": 4.933703026211262e-06, "log_odds_chosen": 0.37905246019363403, "log_odds_ratio": -0.528121292591095, "logits/chosen": 0.3522907495498657, "logits/rejected": -0.9080434441566467, "logps/chosen": -1.7351850271224976, "logps/rejected": -2.059718608856201, "loss": 1.8866, "nll_loss": 1.8337947130203247, "rewards/accuracies": 1.0, "rewards/chosen": -0.17351850867271423, "rewards/margins": 0.03245338052511215, "rewards/rejected": -0.2059718817472458, "step": 182 }, { "epoch": 0.5053503624439075, "grad_norm": 0.2511756420135498, "learning_rate": 4.931857124637276e-06, "log_odds_chosen": 0.3801102638244629, "log_odds_ratio": -0.5302396416664124, "logits/chosen": 0.29899683594703674, "logits/rejected": -0.5272728204727173, "logps/chosen": -1.6140385866165161, "logps/rejected": -1.9330276250839233, "loss": 1.7807, "nll_loss": 1.7276661396026611, "rewards/accuracies": 0.875, "rewards/chosen": -0.16140387952327728, "rewards/margins": 0.031898900866508484, "rewards/rejected": -0.19330278038978577, "step": 183 }, { "epoch": 0.5081118398343114, "grad_norm": 0.33855873346328735, "learning_rate": 4.92998623196872e-06, "log_odds_chosen": 0.3525073528289795, "log_odds_ratio": -0.5346016883850098, "logits/chosen": 0.18561817705631256, "logits/rejected": -0.9478727579116821, "logps/chosen": -1.6743947267532349, "logps/rejected": -1.9694938659667969, "loss": 1.8278, "nll_loss": 1.7743027210235596, "rewards/accuracies": 1.0, "rewards/chosen": -0.16743947565555573, "rewards/margins": 0.029509922489523888, "rewards/rejected": -0.19694939255714417, "step": 184 }, { "epoch": 0.5108733172247152, "grad_norm": 0.24746204912662506, "learning_rate": 4.92809036743191e-06, "log_odds_chosen": 0.4261741042137146, "log_odds_ratio": -0.5064104199409485, "logits/chosen": 0.21586617827415466, "logits/rejected": -0.7548010349273682, "logps/chosen": -1.5876227617263794, "logps/rejected": -1.9392573833465576, "loss": 1.7364, "nll_loss": 1.6857661008834839, "rewards/accuracies": 1.0, "rewards/chosen": -0.15876229107379913, "rewards/margins": 0.035163454711437225, "rewards/rejected": -0.19392573833465576, "step": 185 }, { "epoch": 0.5136347946151191, "grad_norm": 0.256515771150589, "learning_rate": 4.926169550509787e-06, "log_odds_chosen": 0.2986356019973755, "log_odds_ratio": -0.5593284964561462, "logits/chosen": 0.3137073516845703, "logits/rejected": -0.5803498029708862, "logps/chosen": -1.7011560201644897, "logps/rejected": -1.9512310028076172, "loss": 1.8413, "nll_loss": 1.7853296995162964, "rewards/accuracies": 0.875, "rewards/chosen": -0.1701156049966812, "rewards/margins": 0.025007493793964386, "rewards/rejected": -0.1951231062412262, "step": 186 }, { "epoch": 0.516396272005523, "grad_norm": 0.26078805327415466, "learning_rate": 4.924223800941718e-06, "log_odds_chosen": 0.17883820831775665, "log_odds_ratio": -0.6130943894386292, "logits/chosen": 0.2678496241569519, "logits/rejected": -0.6333581209182739, "logps/chosen": -1.6809213161468506, "logps/rejected": -1.829279899597168, "loss": 1.8416, "nll_loss": 1.7803112268447876, "rewards/accuracies": 0.75, "rewards/chosen": -0.16809213161468506, "rewards/margins": 0.014835860580205917, "rewards/rejected": -0.18292801082134247, "step": 187 }, { "epoch": 0.5191577493959268, "grad_norm": 0.3295902609825134, "learning_rate": 4.9222531387232885e-06, "log_odds_chosen": 0.2605365216732025, "log_odds_ratio": -0.5762373208999634, "logits/chosen": 0.3522469103336334, "logits/rejected": -0.6649764180183411, "logps/chosen": -1.7675739526748657, "logps/rejected": -1.9888498783111572, "loss": 1.9141, "nll_loss": 1.8564814329147339, "rewards/accuracies": 0.875, "rewards/chosen": -0.17675741016864777, "rewards/margins": 0.022127564996480942, "rewards/rejected": -0.1988849639892578, "step": 188 }, { "epoch": 0.5219192267863307, "grad_norm": 0.2640776038169861, "learning_rate": 4.920257584106104e-06, "log_odds_chosen": 0.26505714654922485, "log_odds_ratio": -0.5721904039382935, "logits/chosen": 0.2708177864551544, "logits/rejected": -0.7849910855293274, "logps/chosen": -1.7207906246185303, "logps/rejected": -1.9439318180084229, "loss": 1.8705, "nll_loss": 1.8132869005203247, "rewards/accuracies": 1.0, "rewards/chosen": -0.17207907140254974, "rewards/margins": 0.022314125671982765, "rewards/rejected": -0.19439318776130676, "step": 189 }, { "epoch": 0.5246807041767345, "grad_norm": 0.2825080156326294, "learning_rate": 4.918237157597574e-06, "log_odds_chosen": 0.343722403049469, "log_odds_ratio": -0.5388997197151184, "logits/chosen": 0.17086902260780334, "logits/rejected": -0.6501764059066772, "logps/chosen": -1.6524600982666016, "logps/rejected": -1.9383505582809448, "loss": 1.8225, "nll_loss": 1.768616795539856, "rewards/accuracies": 1.0, "rewards/chosen": -0.16524602472782135, "rewards/margins": 0.0285890344530344, "rewards/rejected": -0.1938350349664688, "step": 190 }, { "epoch": 0.5274421815671384, "grad_norm": 0.27104640007019043, "learning_rate": 4.916191879960708e-06, "log_odds_chosen": 0.38741055130958557, "log_odds_ratio": -0.5215877294540405, "logits/chosen": 0.2510019838809967, "logits/rejected": -0.7692076563835144, "logps/chosen": -1.7732622623443604, "logps/rejected": -2.102111577987671, "loss": 1.9158, "nll_loss": 1.8636667728424072, "rewards/accuracies": 1.0, "rewards/chosen": -0.17732621729373932, "rewards/margins": 0.03288493677973747, "rewards/rejected": -0.2102111577987671, "step": 191 }, { "epoch": 0.5302036589575423, "grad_norm": 0.23343509435653687, "learning_rate": 4.914121772213898e-06, "log_odds_chosen": 0.36985254287719727, "log_odds_ratio": -0.5267210006713867, "logits/chosen": 0.28792741894721985, "logits/rejected": -0.6279172301292419, "logps/chosen": -1.6510688066482544, "logps/rejected": -1.9592763185501099, "loss": 1.7875, "nll_loss": 1.7348387241363525, "rewards/accuracies": 1.0, "rewards/chosen": -0.1651068925857544, "rewards/margins": 0.03082074038684368, "rewards/rejected": -0.19592763483524323, "step": 192 }, { "epoch": 0.5329651363479462, "grad_norm": 0.2630840837955475, "learning_rate": 4.912026855630703e-06, "log_odds_chosen": 0.4057270884513855, "log_odds_ratio": -0.5138529539108276, "logits/chosen": 0.2938230633735657, "logits/rejected": -0.5338449478149414, "logps/chosen": -1.6583514213562012, "logps/rejected": -1.9981603622436523, "loss": 1.804, "nll_loss": 1.7526516914367676, "rewards/accuracies": 1.0, "rewards/chosen": -0.1658351570367813, "rewards/margins": 0.03398089110851288, "rewards/rejected": -0.1998160481452942, "step": 193 }, { "epoch": 0.5357266137383501, "grad_norm": 0.32278531789779663, "learning_rate": 4.909907151739634e-06, "log_odds_chosen": 0.36290958523750305, "log_odds_ratio": -0.5369880795478821, "logits/chosen": 0.2148250937461853, "logits/rejected": -0.6797182559967041, "logps/chosen": -1.6431670188903809, "logps/rejected": -1.9445240497589111, "loss": 1.7982, "nll_loss": 1.7444565296173096, "rewards/accuracies": 0.875, "rewards/chosen": -0.16431671380996704, "rewards/margins": 0.030135709792375565, "rewards/rejected": -0.1944524198770523, "step": 194 }, { "epoch": 0.5384880911287538, "grad_norm": 0.27246755361557007, "learning_rate": 4.907762682323926e-06, "log_odds_chosen": 0.5421338081359863, "log_odds_ratio": -0.5049780011177063, "logits/chosen": 0.2726617455482483, "logits/rejected": -0.6698200702667236, "logps/chosen": -1.649804711341858, "logps/rejected": -2.089289426803589, "loss": 1.8247, "nll_loss": 1.774214506149292, "rewards/accuracies": 0.875, "rewards/chosen": -0.1649804711341858, "rewards/margins": 0.04394847899675369, "rewards/rejected": -0.2089289426803589, "step": 195 }, { "epoch": 0.5412495685191577, "grad_norm": 0.25183457136154175, "learning_rate": 4.905593469421323e-06, "log_odds_chosen": 0.23532448709011078, "log_odds_ratio": -0.5886967778205872, "logits/chosen": 0.26476889848709106, "logits/rejected": -1.049080729484558, "logps/chosen": -1.6535186767578125, "logps/rejected": -1.8511910438537598, "loss": 1.8103, "nll_loss": 1.7514796257019043, "rewards/accuracies": 0.75, "rewards/chosen": -0.16535183787345886, "rewards/margins": 0.01976725272834301, "rewards/rejected": -0.18511910736560822, "step": 196 }, { "epoch": 0.5440110459095616, "grad_norm": 0.4395514130592346, "learning_rate": 4.90339953532384e-06, "log_odds_chosen": 0.3145284354686737, "log_odds_ratio": -0.5530616641044617, "logits/chosen": 0.2953311502933502, "logits/rejected": -0.7025068402290344, "logps/chosen": -1.677915096282959, "logps/rejected": -1.9411128759384155, "loss": 1.8327, "nll_loss": 1.7774040699005127, "rewards/accuracies": 0.875, "rewards/chosen": -0.16779151558876038, "rewards/margins": 0.02631976269185543, "rewards/rejected": -0.19411127269268036, "step": 197 }, { "epoch": 0.5467725232999655, "grad_norm": 0.34831514954566956, "learning_rate": 4.901180902577549e-06, "log_odds_chosen": 0.3553253412246704, "log_odds_ratio": -0.5363969206809998, "logits/chosen": 0.32768774032592773, "logits/rejected": -0.8499129414558411, "logps/chosen": -1.6655187606811523, "logps/rejected": -1.9629868268966675, "loss": 1.8077, "nll_loss": 1.7540751695632935, "rewards/accuracies": 1.0, "rewards/chosen": -0.1665518581867218, "rewards/margins": 0.02974681742489338, "rewards/rejected": -0.19629870355129242, "step": 198 }, { "epoch": 0.5495340006903694, "grad_norm": 0.2638852894306183, "learning_rate": 4.8989375939823305e-06, "log_odds_chosen": 0.2459593415260315, "log_odds_ratio": -0.5942656993865967, "logits/chosen": 0.28430262207984924, "logits/rejected": -0.9709011316299438, "logps/chosen": -1.6272318363189697, "logps/rejected": -1.8355400562286377, "loss": 1.7824, "nll_loss": 1.7229478359222412, "rewards/accuracies": 0.875, "rewards/chosen": -0.16272318363189697, "rewards/margins": 0.020830810070037842, "rewards/rejected": -0.183554008603096, "step": 199 }, { "epoch": 0.5522954780807732, "grad_norm": 0.25321170687675476, "learning_rate": 4.896669632591652e-06, "log_odds_chosen": 0.4053685665130615, "log_odds_ratio": -0.5148746371269226, "logits/chosen": 0.26351821422576904, "logits/rejected": -0.6612182259559631, "logps/chosen": -1.6538773775100708, "logps/rejected": -1.9941370487213135, "loss": 1.8067, "nll_loss": 1.7551857233047485, "rewards/accuracies": 1.0, "rewards/chosen": -0.16538773477077484, "rewards/margins": 0.03402596712112427, "rewards/rejected": -0.1994137167930603, "step": 200 }, { "epoch": 0.5550569554711771, "grad_norm": 0.25087103247642517, "learning_rate": 4.894377041712327e-06, "log_odds_chosen": 0.2785293161869049, "log_odds_ratio": -0.5666120052337646, "logits/chosen": 0.31340447068214417, "logits/rejected": -0.8899152278900146, "logps/chosen": -1.7040454149246216, "logps/rejected": -1.9381954669952393, "loss": 1.8502, "nll_loss": 1.7935274839401245, "rewards/accuracies": 0.875, "rewards/chosen": -0.17040453851222992, "rewards/margins": 0.023415017873048782, "rewards/rejected": -0.1938195526599884, "step": 201 }, { "epoch": 0.557818432861581, "grad_norm": 0.25045764446258545, "learning_rate": 4.892059844904273e-06, "log_odds_chosen": 0.3241802752017975, "log_odds_ratio": -0.5491434335708618, "logits/chosen": 0.21476463973522186, "logits/rejected": -0.7644988894462585, "logps/chosen": -1.6328078508377075, "logps/rejected": -1.9039433002471924, "loss": 1.7911, "nll_loss": 1.7361786365509033, "rewards/accuracies": 1.0, "rewards/chosen": -0.16328078508377075, "rewards/margins": 0.027113551273941994, "rewards/rejected": -0.190394327044487, "step": 202 }, { "epoch": 0.5605799102519848, "grad_norm": 0.23429127037525177, "learning_rate": 4.889718065980272e-06, "log_odds_chosen": 0.3891683518886566, "log_odds_ratio": -0.5242507457733154, "logits/chosen": 0.25736698508262634, "logits/rejected": -0.9445868730545044, "logps/chosen": -1.7043213844299316, "logps/rejected": -2.0325913429260254, "loss": 1.8321, "nll_loss": 1.7796692848205566, "rewards/accuracies": 1.0, "rewards/chosen": -0.17043213546276093, "rewards/margins": 0.032826997339725494, "rewards/rejected": -0.20325914025306702, "step": 203 }, { "epoch": 0.5633413876423887, "grad_norm": 0.23629231750965118, "learning_rate": 4.8873517290057265e-06, "log_odds_chosen": 0.5146141052246094, "log_odds_ratio": -0.470738023519516, "logits/chosen": 0.21375544369220734, "logits/rejected": -0.9923663139343262, "logps/chosen": -1.6708019971847534, "logps/rejected": -2.108680248260498, "loss": 1.8157, "nll_loss": 1.7685816287994385, "rewards/accuracies": 1.0, "rewards/chosen": -0.16708020865917206, "rewards/margins": 0.04378781467676163, "rewards/rejected": -0.2108680158853531, "step": 204 }, { "epoch": 0.5661028650327925, "grad_norm": 0.2618404030799866, "learning_rate": 4.88496085829841e-06, "log_odds_chosen": 0.3119449019432068, "log_odds_ratio": -0.5510252714157104, "logits/chosen": 0.24412189424037933, "logits/rejected": -0.44529297947883606, "logps/chosen": -1.6430044174194336, "logps/rejected": -1.902493953704834, "loss": 1.787, "nll_loss": 1.73191237449646, "rewards/accuracies": 1.0, "rewards/chosen": -0.16430042684078217, "rewards/margins": 0.02594897150993347, "rewards/rejected": -0.19024939835071564, "step": 205 }, { "epoch": 0.5688643424231964, "grad_norm": 0.2525455057621002, "learning_rate": 4.882545478428219e-06, "log_odds_chosen": 0.4682312607765198, "log_odds_ratio": -0.5022274851799011, "logits/chosen": 0.31566041707992554, "logits/rejected": -0.9525866508483887, "logps/chosen": -1.62907075881958, "logps/rejected": -2.020961046218872, "loss": 1.7875, "nll_loss": 1.7372711896896362, "rewards/accuracies": 0.875, "rewards/chosen": -0.16290709376335144, "rewards/margins": 0.039189018309116364, "rewards/rejected": -0.2020961046218872, "step": 206 }, { "epoch": 0.5716258198136003, "grad_norm": 0.29324284195899963, "learning_rate": 4.880105614216917e-06, "log_odds_chosen": 0.3941385746002197, "log_odds_ratio": -0.5201365947723389, "logits/chosen": 0.3187817335128784, "logits/rejected": -0.8300870656967163, "logps/chosen": -1.800098180770874, "logps/rejected": -2.1378254890441895, "loss": 1.9198, "nll_loss": 1.8678245544433594, "rewards/accuracies": 1.0, "rewards/chosen": -0.18000982701778412, "rewards/margins": 0.033772725611925125, "rewards/rejected": -0.21378254890441895, "step": 207 }, { "epoch": 0.5743872972040042, "grad_norm": 0.2698252499103546, "learning_rate": 4.8776412907378845e-06, "log_odds_chosen": 0.32584843039512634, "log_odds_ratio": -0.5465385913848877, "logits/chosen": 0.34170591831207275, "logits/rejected": -0.9024736285209656, "logps/chosen": -1.7152948379516602, "logps/rejected": -1.9904066324234009, "loss": 1.8523, "nll_loss": 1.7976171970367432, "rewards/accuracies": 1.0, "rewards/chosen": -0.17152947187423706, "rewards/margins": 0.02751118130981922, "rewards/rejected": -0.19904065132141113, "step": 208 }, { "epoch": 0.577148774594408, "grad_norm": 0.24523906409740448, "learning_rate": 4.875152533315859e-06, "log_odds_chosen": 0.3781871199607849, "log_odds_ratio": -0.5249034762382507, "logits/chosen": 0.22711794078350067, "logits/rejected": -0.8147018551826477, "logps/chosen": -1.5907987356185913, "logps/rejected": -1.901723861694336, "loss": 1.7362, "nll_loss": 1.6837434768676758, "rewards/accuracies": 1.0, "rewards/chosen": -0.15907986462116241, "rewards/margins": 0.031092505902051926, "rewards/rejected": -0.19017238914966583, "step": 209 }, { "epoch": 0.5799102519848118, "grad_norm": 0.25919055938720703, "learning_rate": 4.872639367526672e-06, "log_odds_chosen": 0.39655864238739014, "log_odds_ratio": -0.5207105875015259, "logits/chosen": 0.26906242966651917, "logits/rejected": -0.7278945446014404, "logps/chosen": -1.6198142766952515, "logps/rejected": -1.9519634246826172, "loss": 1.7641, "nll_loss": 1.7119861841201782, "rewards/accuracies": 1.0, "rewards/chosen": -0.16198144853115082, "rewards/margins": 0.03321490436792374, "rewards/rejected": -0.19519636034965515, "step": 210 }, { "epoch": 0.5826717293752157, "grad_norm": 0.2771226763725281, "learning_rate": 4.870101819196992e-06, "log_odds_chosen": 0.33355456590652466, "log_odds_ratio": -0.5614390969276428, "logits/chosen": 0.23215994238853455, "logits/rejected": -0.7148041129112244, "logps/chosen": -1.6759493350982666, "logps/rejected": -1.9613184928894043, "loss": 1.8249, "nll_loss": 1.7687922716140747, "rewards/accuracies": 0.875, "rewards/chosen": -0.16759493947029114, "rewards/margins": 0.028536932542920113, "rewards/rejected": -0.1961318552494049, "step": 211 }, { "epoch": 0.5854332067656196, "grad_norm": 0.24981357157230377, "learning_rate": 4.8675399144040535e-06, "log_odds_chosen": 0.44918757677078247, "log_odds_ratio": -0.5007237792015076, "logits/chosen": 0.2401760071516037, "logits/rejected": -1.0861955881118774, "logps/chosen": -1.5890363454818726, "logps/rejected": -1.9614393711090088, "loss": 1.7223, "nll_loss": 1.6722639799118042, "rewards/accuracies": 1.0, "rewards/chosen": -0.15890364348888397, "rewards/margins": 0.03724028915166855, "rewards/rejected": -0.19614392518997192, "step": 212 }, { "epoch": 0.5881946841560235, "grad_norm": 0.2642802894115448, "learning_rate": 4.864953679475392e-06, "log_odds_chosen": 0.35757988691329956, "log_odds_ratio": -0.5399623513221741, "logits/chosen": 0.2965427339076996, "logits/rejected": -1.087183952331543, "logps/chosen": -1.645439863204956, "logps/rejected": -1.9395672082901, "loss": 1.7654, "nll_loss": 1.7114168405532837, "rewards/accuracies": 0.75, "rewards/chosen": -0.1645439863204956, "rewards/margins": 0.02941274270415306, "rewards/rejected": -0.19395673274993896, "step": 213 }, { "epoch": 0.5909561615464274, "grad_norm": 0.3119313418865204, "learning_rate": 4.862343140988573e-06, "log_odds_chosen": 0.3508715331554413, "log_odds_ratio": -0.5419243574142456, "logits/chosen": 0.2440934181213379, "logits/rejected": -0.9715545773506165, "logps/chosen": -1.691695213317871, "logps/rejected": -1.9900875091552734, "loss": 1.8129, "nll_loss": 1.75870680809021, "rewards/accuracies": 0.875, "rewards/chosen": -0.16916951537132263, "rewards/margins": 0.029839247465133667, "rewards/rejected": -0.1990087330341339, "step": 214 }, { "epoch": 0.5937176389368312, "grad_norm": 0.2919383943080902, "learning_rate": 4.859708325770919e-06, "log_odds_chosen": 0.32950735092163086, "log_odds_ratio": -0.5485372543334961, "logits/chosen": 0.314483106136322, "logits/rejected": -0.6885538697242737, "logps/chosen": -1.6899092197418213, "logps/rejected": -1.96530020236969, "loss": 1.8463, "nll_loss": 1.791460633277893, "rewards/accuracies": 0.875, "rewards/chosen": -0.16899092495441437, "rewards/margins": 0.02753911167383194, "rewards/rejected": -0.1965300291776657, "step": 215 }, { "epoch": 0.596479116327235, "grad_norm": 0.28394845128059387, "learning_rate": 4.857049260899233e-06, "log_odds_chosen": 0.37926068902015686, "log_odds_ratio": -0.5276373624801636, "logits/chosen": 0.21555066108703613, "logits/rejected": -0.8712125420570374, "logps/chosen": -1.565875768661499, "logps/rejected": -1.873572826385498, "loss": 1.7163, "nll_loss": 1.6635123491287231, "rewards/accuracies": 0.875, "rewards/chosen": -0.15658757090568542, "rewards/margins": 0.030769716948270798, "rewards/rejected": -0.18735727667808533, "step": 216 }, { "epoch": 0.5992405937176389, "grad_norm": 0.2573164105415344, "learning_rate": 4.854365973699519e-06, "log_odds_chosen": 0.26477721333503723, "log_odds_ratio": -0.5851221680641174, "logits/chosen": 0.2476685345172882, "logits/rejected": -0.7407121658325195, "logps/chosen": -1.657826542854309, "logps/rejected": -1.8721026182174683, "loss": 1.8129, "nll_loss": 1.7544105052947998, "rewards/accuracies": 0.625, "rewards/chosen": -0.16578267514705658, "rewards/margins": 0.021427594125270844, "rewards/rejected": -0.18721026182174683, "step": 217 }, { "epoch": 0.6020020711080428, "grad_norm": 0.2776831388473511, "learning_rate": 4.851658491746707e-06, "log_odds_chosen": 0.3577430248260498, "log_odds_ratio": -0.5342705249786377, "logits/chosen": 0.17239025235176086, "logits/rejected": -0.9049993753433228, "logps/chosen": -1.6406420469284058, "logps/rejected": -1.9406909942626953, "loss": 1.773, "nll_loss": 1.719523549079895, "rewards/accuracies": 0.875, "rewards/chosen": -0.1640642285346985, "rewards/margins": 0.030004894360899925, "rewards/rejected": -0.19406910240650177, "step": 218 }, { "epoch": 0.6047635484984467, "grad_norm": 0.2520454227924347, "learning_rate": 4.848926842864361e-06, "log_odds_chosen": 0.4466186463832855, "log_odds_ratio": -0.4973817467689514, "logits/chosen": 0.17908504605293274, "logits/rejected": -1.1680846214294434, "logps/chosen": -1.6161878108978271, "logps/rejected": -1.989192247390747, "loss": 1.7485, "nll_loss": 1.6987885236740112, "rewards/accuracies": 1.0, "rewards/chosen": -0.16161878407001495, "rewards/margins": 0.03730044513940811, "rewards/rejected": -0.19891922175884247, "step": 219 }, { "epoch": 0.6075250258888505, "grad_norm": 0.2567070722579956, "learning_rate": 4.846171055124401e-06, "log_odds_chosen": 0.28004616498947144, "log_odds_ratio": -0.5727553963661194, "logits/chosen": 0.2623513638973236, "logits/rejected": -0.7589821815490723, "logps/chosen": -1.692448377609253, "logps/rejected": -1.9293184280395508, "loss": 1.8107, "nll_loss": 1.7533810138702393, "rewards/accuracies": 1.0, "rewards/chosen": -0.16924485564231873, "rewards/margins": 0.023687003180384636, "rewards/rejected": -0.19293184578418732, "step": 220 }, { "epoch": 0.6102865032792544, "grad_norm": 0.2756041884422302, "learning_rate": 4.843391156846811e-06, "log_odds_chosen": 0.4713705778121948, "log_odds_ratio": -0.4867982268333435, "logits/chosen": 0.29524314403533936, "logits/rejected": -0.8037136793136597, "logps/chosen": -1.6270312070846558, "logps/rejected": -2.019744396209717, "loss": 1.7549, "nll_loss": 1.7062143087387085, "rewards/accuracies": 1.0, "rewards/chosen": -0.16270311176776886, "rewards/margins": 0.039271317422389984, "rewards/rejected": -0.20197445154190063, "step": 221 }, { "epoch": 0.6130479806696583, "grad_norm": 0.2521502375602722, "learning_rate": 4.8405871765993435e-06, "log_odds_chosen": 0.5144533514976501, "log_odds_ratio": -0.478085458278656, "logits/chosen": 0.1983458399772644, "logits/rejected": -0.9259991645812988, "logps/chosen": -1.681555151939392, "logps/rejected": -2.111989974975586, "loss": 1.8047, "nll_loss": 1.7569189071655273, "rewards/accuracies": 1.0, "rewards/chosen": -0.1681555211544037, "rewards/margins": 0.043043479323387146, "rewards/rejected": -0.21119900047779083, "step": 222 }, { "epoch": 0.6158094580600622, "grad_norm": 0.2711774706840515, "learning_rate": 4.837759143197237e-06, "log_odds_chosen": 0.29049259424209595, "log_odds_ratio": -0.5618168711662292, "logits/chosen": 0.18880629539489746, "logits/rejected": -1.0936201810836792, "logps/chosen": -1.6453745365142822, "logps/rejected": -1.8913843631744385, "loss": 1.7765, "nll_loss": 1.7203593254089355, "rewards/accuracies": 1.0, "rewards/chosen": -0.1645374447107315, "rewards/margins": 0.02460099384188652, "rewards/rejected": -0.18913844227790833, "step": 223 }, { "epoch": 0.618570935450466, "grad_norm": 0.2559034824371338, "learning_rate": 4.834907085702909e-06, "log_odds_chosen": 0.39943575859069824, "log_odds_ratio": -0.516412615776062, "logits/chosen": 0.22049731016159058, "logits/rejected": -0.741055428981781, "logps/chosen": -1.5809171199798584, "logps/rejected": -1.9103319644927979, "loss": 1.7174, "nll_loss": 1.6657910346984863, "rewards/accuracies": 1.0, "rewards/chosen": -0.1580917090177536, "rewards/margins": 0.032941486686468124, "rewards/rejected": -0.19103318452835083, "step": 224 }, { "epoch": 0.6213324128408698, "grad_norm": 0.2551354169845581, "learning_rate": 4.832031033425663e-06, "log_odds_chosen": 0.45995259284973145, "log_odds_ratio": -0.4912871718406677, "logits/chosen": 0.14968952536582947, "logits/rejected": -1.1501446962356567, "logps/chosen": -1.5380923748016357, "logps/rejected": -1.914685845375061, "loss": 1.6661, "nll_loss": 1.6169726848602295, "rewards/accuracies": 1.0, "rewards/chosen": -0.15380924940109253, "rewards/margins": 0.03765934333205223, "rewards/rejected": -0.19146858155727386, "step": 225 }, { "epoch": 0.6240938902312737, "grad_norm": 0.2664376199245453, "learning_rate": 4.829131015921386e-06, "log_odds_chosen": 0.3153410255908966, "log_odds_ratio": -0.5497522950172424, "logits/chosen": 0.353019654750824, "logits/rejected": -0.7054316997528076, "logps/chosen": -1.708608865737915, "logps/rejected": -1.975217342376709, "loss": 1.8398, "nll_loss": 1.784870982170105, "rewards/accuracies": 1.0, "rewards/chosen": -0.1708608865737915, "rewards/margins": 0.02666083350777626, "rewards/rejected": -0.19752173125743866, "step": 226 }, { "epoch": 0.6268553676216776, "grad_norm": 0.25821202993392944, "learning_rate": 4.826207062992245e-06, "log_odds_chosen": 0.44117486476898193, "log_odds_ratio": -0.5039249062538147, "logits/chosen": 0.22692835330963135, "logits/rejected": -0.9342468976974487, "logps/chosen": -1.7786613702774048, "logps/rejected": -2.1593728065490723, "loss": 1.8966, "nll_loss": 1.846253752708435, "rewards/accuracies": 1.0, "rewards/chosen": -0.1778661459684372, "rewards/margins": 0.038071148097515106, "rewards/rejected": -0.2159372866153717, "step": 227 }, { "epoch": 0.6296168450120815, "grad_norm": 0.28877097368240356, "learning_rate": 4.82325920468638e-06, "log_odds_chosen": 0.2263374626636505, "log_odds_ratio": -0.5921541452407837, "logits/chosen": 0.26566582918167114, "logits/rejected": -0.8801894187927246, "logps/chosen": -1.7435269355773926, "logps/rejected": -1.9335991144180298, "loss": 1.8735, "nll_loss": 1.8142577409744263, "rewards/accuracies": 0.875, "rewards/chosen": -0.17435269057750702, "rewards/margins": 0.01900722086429596, "rewards/rejected": -0.19335991144180298, "step": 228 }, { "epoch": 0.6323783224024854, "grad_norm": 0.286994069814682, "learning_rate": 4.820287471297598e-06, "log_odds_chosen": 0.34056615829467773, "log_odds_ratio": -0.5455049872398376, "logits/chosen": 0.15769648551940918, "logits/rejected": -0.9189969897270203, "logps/chosen": -1.612159252166748, "logps/rejected": -1.8956931829452515, "loss": 1.7497, "nll_loss": 1.6951847076416016, "rewards/accuracies": 0.875, "rewards/chosen": -0.1612159162759781, "rewards/margins": 0.028353393077850342, "rewards/rejected": -0.18956929445266724, "step": 229 }, { "epoch": 0.6351397997928891, "grad_norm": 0.25925716757774353, "learning_rate": 4.817291893365055e-06, "log_odds_chosen": 0.4459936022758484, "log_odds_ratio": -0.50335294008255, "logits/chosen": 0.03054118901491165, "logits/rejected": -1.0507367849349976, "logps/chosen": -1.4849401712417603, "logps/rejected": -1.8426207304000854, "loss": 1.6416, "nll_loss": 1.591292142868042, "rewards/accuracies": 1.0, "rewards/chosen": -0.14849400520324707, "rewards/margins": 0.035768061876297, "rewards/rejected": -0.18426206707954407, "step": 230 }, { "epoch": 0.637901277183293, "grad_norm": 0.2912357449531555, "learning_rate": 4.81427250167295e-06, "log_odds_chosen": 0.20216652750968933, "log_odds_ratio": -0.6004579663276672, "logits/chosen": 0.10540774464607239, "logits/rejected": -1.1034070253372192, "logps/chosen": -1.6417537927627563, "logps/rejected": -1.8084138631820679, "loss": 1.7758, "nll_loss": 1.715754508972168, "rewards/accuracies": 0.75, "rewards/chosen": -0.1641753911972046, "rewards/margins": 0.01666601374745369, "rewards/rejected": -0.1808413863182068, "step": 231 }, { "epoch": 0.6406627545736969, "grad_norm": 0.28148511052131653, "learning_rate": 4.811229327250204e-06, "log_odds_chosen": 0.40823695063591003, "log_odds_ratio": -0.5194427967071533, "logits/chosen": 0.19764533638954163, "logits/rejected": -1.0543047189712524, "logps/chosen": -1.68423330783844, "logps/rejected": -2.0337371826171875, "loss": 1.8128, "nll_loss": 1.760822057723999, "rewards/accuracies": 0.875, "rewards/chosen": -0.1684233546257019, "rewards/margins": 0.034950368106365204, "rewards/rejected": -0.20337370038032532, "step": 232 }, { "epoch": 0.6434242319641008, "grad_norm": 0.26084744930267334, "learning_rate": 4.8081624013701435e-06, "log_odds_chosen": 0.38514161109924316, "log_odds_ratio": -0.5289287567138672, "logits/chosen": 0.16184748709201813, "logits/rejected": -1.2881039381027222, "logps/chosen": -1.6542613506317139, "logps/rejected": -1.9764081239700317, "loss": 1.7775, "nll_loss": 1.7246254682540894, "rewards/accuracies": 1.0, "rewards/chosen": -0.1654261350631714, "rewards/margins": 0.03221467137336731, "rewards/rejected": -0.1976408213376999, "step": 233 }, { "epoch": 0.6461857093545047, "grad_norm": 0.26910293102264404, "learning_rate": 4.805071755550177e-06, "log_odds_chosen": 0.37266138195991516, "log_odds_ratio": -0.5266379117965698, "logits/chosen": 0.2432277947664261, "logits/rejected": -1.2453206777572632, "logps/chosen": -1.676849603652954, "logps/rejected": -1.9903044700622559, "loss": 1.7904, "nll_loss": 1.7377511262893677, "rewards/accuracies": 1.0, "rewards/chosen": -0.16768495738506317, "rewards/margins": 0.031345486640930176, "rewards/rejected": -0.19903044402599335, "step": 234 }, { "epoch": 0.6489471867449085, "grad_norm": 0.26596587896347046, "learning_rate": 4.8019574215514705e-06, "log_odds_chosen": 0.320311963558197, "log_odds_ratio": -0.5477744340896606, "logits/chosen": 0.1768263727426529, "logits/rejected": -0.8458906412124634, "logps/chosen": -1.6462191343307495, "logps/rejected": -1.9113428592681885, "loss": 1.7877, "nll_loss": 1.7328777313232422, "rewards/accuracies": 1.0, "rewards/chosen": -0.16462190449237823, "rewards/margins": 0.026512393727898598, "rewards/rejected": -0.19113430380821228, "step": 235 }, { "epoch": 0.6517086641353124, "grad_norm": 0.23039527237415314, "learning_rate": 4.7988194313786275e-06, "log_odds_chosen": 0.3400653004646301, "log_odds_ratio": -0.5446557998657227, "logits/chosen": 0.17479516565799713, "logits/rejected": -0.9590482711791992, "logps/chosen": -1.5938714742660522, "logps/rejected": -1.8736517429351807, "loss": 1.706, "nll_loss": 1.6515262126922607, "rewards/accuracies": 1.0, "rewards/chosen": -0.15938714146614075, "rewards/margins": 0.027978049591183662, "rewards/rejected": -0.18736517429351807, "step": 236 }, { "epoch": 0.6544701415257163, "grad_norm": 0.25597652792930603, "learning_rate": 4.795657817279349e-06, "log_odds_chosen": 0.35750117897987366, "log_odds_ratio": -0.5432247519493103, "logits/chosen": 0.10542917996644974, "logits/rejected": -1.0094666481018066, "logps/chosen": -1.5345871448516846, "logps/rejected": -1.8278987407684326, "loss": 1.6735, "nll_loss": 1.619143009185791, "rewards/accuracies": 0.875, "rewards/chosen": -0.15345871448516846, "rewards/margins": 0.02933114767074585, "rewards/rejected": -0.1827898770570755, "step": 237 }, { "epoch": 0.6572316189161201, "grad_norm": 0.27119508385658264, "learning_rate": 4.7924726117441135e-06, "log_odds_chosen": 0.4193371534347534, "log_odds_ratio": -0.5230008363723755, "logits/chosen": 0.08888234198093414, "logits/rejected": -1.0837329626083374, "logps/chosen": -1.6472194194793701, "logps/rejected": -1.9990077018737793, "loss": 1.7734, "nll_loss": 1.7211425304412842, "rewards/accuracies": 0.875, "rewards/chosen": -0.16472195088863373, "rewards/margins": 0.035178832709789276, "rewards/rejected": -0.1999007910490036, "step": 238 }, { "epoch": 0.659993096306524, "grad_norm": 0.26720085740089417, "learning_rate": 4.789263847505835e-06, "log_odds_chosen": 0.4661468267440796, "log_odds_ratio": -0.49137797951698303, "logits/chosen": 0.14774960279464722, "logits/rejected": -0.9653093814849854, "logps/chosen": -1.634422779083252, "logps/rejected": -2.0227272510528564, "loss": 1.7557, "nll_loss": 1.706547737121582, "rewards/accuracies": 1.0, "rewards/chosen": -0.16344228386878967, "rewards/margins": 0.038830459117889404, "rewards/rejected": -0.20227274298667908, "step": 239 }, { "epoch": 0.6627545736969278, "grad_norm": 0.26344889402389526, "learning_rate": 4.786031557539532e-06, "log_odds_chosen": 0.620901882648468, "log_odds_ratio": -0.43870899081230164, "logits/chosen": 0.1005333811044693, "logits/rejected": -1.179240345954895, "logps/chosen": -1.5813204050064087, "logps/rejected": -2.10603666305542, "loss": 1.7039, "nll_loss": 1.66001558303833, "rewards/accuracies": 1.0, "rewards/chosen": -0.15813204646110535, "rewards/margins": 0.05247163027524948, "rewards/rejected": -0.21060366928577423, "step": 240 }, { "epoch": 0.6655160510873317, "grad_norm": 0.2564525306224823, "learning_rate": 4.782775775061983e-06, "log_odds_chosen": 0.3930521011352539, "log_odds_ratio": -0.5189710855484009, "logits/chosen": 0.17282657325267792, "logits/rejected": -0.9231711626052856, "logps/chosen": -1.5853371620178223, "logps/rejected": -1.9094042778015137, "loss": 1.704, "nll_loss": 1.6520962715148926, "rewards/accuracies": 1.0, "rewards/chosen": -0.1585337519645691, "rewards/margins": 0.03240669518709183, "rewards/rejected": -0.19094042479991913, "step": 241 }, { "epoch": 0.6682775284777356, "grad_norm": 0.23007996380329132, "learning_rate": 4.779496533531393e-06, "log_odds_chosen": 0.49815472960472107, "log_odds_ratio": -0.47664713859558105, "logits/chosen": 0.16657163202762604, "logits/rejected": -1.0556104183197021, "logps/chosen": -1.5395145416259766, "logps/rejected": -1.9480966329574585, "loss": 1.6541, "nll_loss": 1.6063947677612305, "rewards/accuracies": 1.0, "rewards/chosen": -0.15395145118236542, "rewards/margins": 0.04085820913314819, "rewards/rejected": -0.19480964541435242, "step": 242 }, { "epoch": 0.6710390058681395, "grad_norm": 0.2536675035953522, "learning_rate": 4.7761938666470405e-06, "log_odds_chosen": 0.3562057316303253, "log_odds_ratio": -0.5365646481513977, "logits/chosen": 0.18961578607559204, "logits/rejected": -1.129898190498352, "logps/chosen": -1.6646479368209839, "logps/rejected": -1.964529037475586, "loss": 1.7902, "nll_loss": 1.736580729484558, "rewards/accuracies": 1.0, "rewards/chosen": -0.16646480560302734, "rewards/margins": 0.029988115653395653, "rewards/rejected": -0.19645293056964874, "step": 243 }, { "epoch": 0.6738004832585434, "grad_norm": 0.241102397441864, "learning_rate": 4.7728678083489375e-06, "log_odds_chosen": 0.287707656621933, "log_odds_ratio": -0.5653382539749146, "logits/chosen": 0.18390116095542908, "logits/rejected": -1.0134061574935913, "logps/chosen": -1.5824017524719238, "logps/rejected": -1.8214263916015625, "loss": 1.707, "nll_loss": 1.6505118608474731, "rewards/accuracies": 0.875, "rewards/chosen": -0.1582401841878891, "rewards/margins": 0.02390245348215103, "rewards/rejected": -0.18214263021945953, "step": 244 }, { "epoch": 0.6765619606489471, "grad_norm": 0.24698680639266968, "learning_rate": 4.7695183928174804e-06, "log_odds_chosen": 0.426491916179657, "log_odds_ratio": -0.5090312361717224, "logits/chosen": 0.09436272829771042, "logits/rejected": -1.081894040107727, "logps/chosen": -1.614341378211975, "logps/rejected": -1.9695953130722046, "loss": 1.7422, "nll_loss": 1.6912728548049927, "rewards/accuracies": 0.875, "rewards/chosen": -0.161434143781662, "rewards/margins": 0.03552539646625519, "rewards/rejected": -0.19695955514907837, "step": 245 }, { "epoch": 0.679323438039351, "grad_norm": 0.25812989473342896, "learning_rate": 4.766145654473096e-06, "log_odds_chosen": 0.33238697052001953, "log_odds_ratio": -0.5477334260940552, "logits/chosen": 0.17383219301700592, "logits/rejected": -1.1996134519577026, "logps/chosen": -1.6853795051574707, "logps/rejected": -1.9657583236694336, "loss": 1.797, "nll_loss": 1.7422523498535156, "rewards/accuracies": 0.875, "rewards/chosen": -0.1685379445552826, "rewards/margins": 0.028037890791893005, "rewards/rejected": -0.1965758353471756, "step": 246 }, { "epoch": 0.6820849154297549, "grad_norm": 0.25600266456604004, "learning_rate": 4.762749627975888e-06, "log_odds_chosen": 0.3794538974761963, "log_odds_ratio": -0.5263408422470093, "logits/chosen": 0.07988805323839188, "logits/rejected": -1.091023325920105, "logps/chosen": -1.5263158082962036, "logps/rejected": -1.8329474925994873, "loss": 1.6541, "nll_loss": 1.6014518737792969, "rewards/accuracies": 1.0, "rewards/chosen": -0.15263158082962036, "rewards/margins": 0.030663155019283295, "rewards/rejected": -0.18329472839832306, "step": 247 }, { "epoch": 0.6848463928201588, "grad_norm": 0.24254651367664337, "learning_rate": 4.7593303482252835e-06, "log_odds_chosen": 0.34609463810920715, "log_odds_ratio": -0.541566014289856, "logits/chosen": 0.14708423614501953, "logits/rejected": -0.7518667578697205, "logps/chosen": -1.6179258823394775, "logps/rejected": -1.905765414237976, "loss": 1.7327, "nll_loss": 1.6784985065460205, "rewards/accuracies": 1.0, "rewards/chosen": -0.1617926061153412, "rewards/margins": 0.02878393605351448, "rewards/rejected": -0.19057652354240417, "step": 248 }, { "epoch": 0.6876078702105627, "grad_norm": 0.22665560245513916, "learning_rate": 4.755887850359673e-06, "log_odds_chosen": 0.5504661798477173, "log_odds_ratio": -0.4719165861606598, "logits/chosen": 0.09460557997226715, "logits/rejected": -1.225687861442566, "logps/chosen": -1.4793003797531128, "logps/rejected": -1.9250984191894531, "loss": 1.6118, "nll_loss": 1.5646475553512573, "rewards/accuracies": 0.875, "rewards/chosen": -0.1479300558567047, "rewards/margins": 0.04457978904247284, "rewards/rejected": -0.19250985980033875, "step": 249 }, { "epoch": 0.6903693476009665, "grad_norm": 0.2510414719581604, "learning_rate": 4.752422169756048e-06, "log_odds_chosen": 0.3988419771194458, "log_odds_ratio": -0.5172514915466309, "logits/chosen": 0.16052858531475067, "logits/rejected": -1.2513781785964966, "logps/chosen": -1.6576679944992065, "logps/rejected": -1.9923875331878662, "loss": 1.7851, "nll_loss": 1.7334227561950684, "rewards/accuracies": 1.0, "rewards/chosen": -0.16576679050922394, "rewards/margins": 0.033471960574388504, "rewards/rejected": -0.19923876225948334, "step": 250 }, { "epoch": 0.6931308249913704, "grad_norm": 0.2423536330461502, "learning_rate": 4.748933342029639e-06, "log_odds_chosen": 0.5282813310623169, "log_odds_ratio": -0.4706994891166687, "logits/chosen": 0.10225804150104523, "logits/rejected": -1.2281630039215088, "logps/chosen": -1.5280183553695679, "logps/rejected": -1.9655184745788574, "loss": 1.6654, "nll_loss": 1.6183708906173706, "rewards/accuracies": 1.0, "rewards/chosen": -0.15280182659626007, "rewards/margins": 0.04375001788139343, "rewards/rejected": -0.1965518444776535, "step": 251 }, { "epoch": 0.6958923023817742, "grad_norm": 0.23662064969539642, "learning_rate": 4.745421403033548e-06, "log_odds_chosen": 0.36522558331489563, "log_odds_ratio": -0.5290880799293518, "logits/chosen": 0.10924719274044037, "logits/rejected": -1.0550241470336914, "logps/chosen": -1.6013975143432617, "logps/rejected": -1.901705026626587, "loss": 1.7128, "nll_loss": 1.6598960161209106, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601397693157196, "rewards/margins": 0.0300307534635067, "rewards/rejected": -0.1901705265045166, "step": 252 }, { "epoch": 0.6986537797721781, "grad_norm": 0.2725629508495331, "learning_rate": 4.741886388858384e-06, "log_odds_chosen": 0.31865060329437256, "log_odds_ratio": -0.5531626343727112, "logits/chosen": 0.15934640169143677, "logits/rejected": -1.1095128059387207, "logps/chosen": -1.6118751764297485, "logps/rejected": -1.871192455291748, "loss": 1.7355, "nll_loss": 1.6801382303237915, "rewards/accuracies": 1.0, "rewards/chosen": -0.1611875295639038, "rewards/margins": 0.02593172900378704, "rewards/rejected": -0.187119260430336, "step": 253 }, { "epoch": 0.701415257162582, "grad_norm": 0.2601236402988434, "learning_rate": 4.738328335831883e-06, "log_odds_chosen": 0.3496936559677124, "log_odds_ratio": -0.5388615131378174, "logits/chosen": 0.12353205680847168, "logits/rejected": -1.3011374473571777, "logps/chosen": -1.5815935134887695, "logps/rejected": -1.8707093000411987, "loss": 1.7024, "nll_loss": 1.6484651565551758, "rewards/accuracies": 1.0, "rewards/chosen": -0.15815936028957367, "rewards/margins": 0.028911583125591278, "rewards/rejected": -0.18707093596458435, "step": 254 }, { "epoch": 0.7041767345529858, "grad_norm": 0.2376219779253006, "learning_rate": 4.734747280518549e-06, "log_odds_chosen": 0.4198550879955292, "log_odds_ratio": -0.5078388452529907, "logits/chosen": 0.030534474179148674, "logits/rejected": -1.399457573890686, "logps/chosen": -1.6249712705612183, "logps/rejected": -1.9735503196716309, "loss": 1.7447, "nll_loss": 1.693869948387146, "rewards/accuracies": 1.0, "rewards/chosen": -0.1624971330165863, "rewards/margins": 0.034857891499996185, "rewards/rejected": -0.19735503196716309, "step": 255 }, { "epoch": 0.7069382119433897, "grad_norm": 0.23082926869392395, "learning_rate": 4.7311432597192655e-06, "log_odds_chosen": 0.368667870759964, "log_odds_ratio": -0.5342724323272705, "logits/chosen": 0.03557804599404335, "logits/rejected": -1.3495283126831055, "logps/chosen": -1.6255284547805786, "logps/rejected": -1.9313668012619019, "loss": 1.7347, "nll_loss": 1.6812456846237183, "rewards/accuracies": 0.875, "rewards/chosen": -0.1625528484582901, "rewards/margins": 0.030583834275603294, "rewards/rejected": -0.19313669204711914, "step": 256 }, { "epoch": 0.7096996893337936, "grad_norm": 0.2494223415851593, "learning_rate": 4.72751631047092e-06, "log_odds_chosen": 0.5292459726333618, "log_odds_ratio": -0.4693402051925659, "logits/chosen": -0.008031047880649567, "logits/rejected": -1.0098199844360352, "logps/chosen": -1.6526001691818237, "logps/rejected": -2.0972676277160645, "loss": 1.7807, "nll_loss": 1.7337857484817505, "rewards/accuracies": 1.0, "rewards/chosen": -0.16526003181934357, "rewards/margins": 0.044466737657785416, "rewards/rejected": -0.2097267508506775, "step": 257 }, { "epoch": 0.7124611667241975, "grad_norm": 0.24077369272708893, "learning_rate": 4.72386647004603e-06, "log_odds_chosen": 0.37760159373283386, "log_odds_ratio": -0.5266671180725098, "logits/chosen": 0.1133025735616684, "logits/rejected": -1.0056575536727905, "logps/chosen": -1.6364243030548096, "logps/rejected": -1.9491901397705078, "loss": 1.7621, "nll_loss": 1.7094427347183228, "rewards/accuracies": 1.0, "rewards/chosen": -0.16364243626594543, "rewards/margins": 0.03127656877040863, "rewards/rejected": -0.19491900503635406, "step": 258 }, { "epoch": 0.7152226441146013, "grad_norm": 0.24284473061561584, "learning_rate": 4.720193775952352e-06, "log_odds_chosen": 0.2508072853088379, "log_odds_ratio": -0.5792805552482605, "logits/chosen": 0.11287318915128708, "logits/rejected": -1.0705785751342773, "logps/chosen": -1.6278916597366333, "logps/rejected": -1.833707332611084, "loss": 1.7407, "nll_loss": 1.6827445030212402, "rewards/accuracies": 1.0, "rewards/chosen": -0.16278916597366333, "rewards/margins": 0.02058156207203865, "rewards/rejected": -0.18337073922157288, "step": 259 }, { "epoch": 0.7179841215050051, "grad_norm": 0.2317313551902771, "learning_rate": 4.716498265932501e-06, "log_odds_chosen": 0.5105437636375427, "log_odds_ratio": -0.4731099009513855, "logits/chosen": -0.00561100710183382, "logits/rejected": -1.0285159349441528, "logps/chosen": -1.4508588314056396, "logps/rejected": -1.8631662130355835, "loss": 1.5697, "nll_loss": 1.5224125385284424, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450859010219574, "rewards/margins": 0.04123072326183319, "rewards/rejected": -0.1863166093826294, "step": 260 }, { "epoch": 0.720745598895409, "grad_norm": 0.25082987546920776, "learning_rate": 4.712779977963559e-06, "log_odds_chosen": 0.3830725848674774, "log_odds_ratio": -0.5252867937088013, "logits/chosen": 0.09645551443099976, "logits/rejected": -0.733791172504425, "logps/chosen": -1.56308114528656, "logps/rejected": -1.8771851062774658, "loss": 1.695, "nll_loss": 1.642515778541565, "rewards/accuracies": 0.875, "rewards/chosen": -0.156308114528656, "rewards/margins": 0.031410399824380875, "rewards/rejected": -0.18771851062774658, "step": 261 }, { "epoch": 0.7235070762858129, "grad_norm": 0.22368477284908295, "learning_rate": 4.7090389502566884e-06, "log_odds_chosen": 0.48179319500923157, "log_odds_ratio": -0.4859163165092468, "logits/chosen": 0.042292360216379166, "logits/rejected": -1.0789254903793335, "logps/chosen": -1.5697331428527832, "logps/rejected": -1.9673210382461548, "loss": 1.6673, "nll_loss": 1.61872136592865, "rewards/accuracies": 1.0, "rewards/chosen": -0.15697331726551056, "rewards/margins": 0.039758771657943726, "rewards/rejected": -0.19673210382461548, "step": 262 }, { "epoch": 0.7262685536762168, "grad_norm": 0.22603513300418854, "learning_rate": 4.705275221256738e-06, "log_odds_chosen": 0.3837694227695465, "log_odds_ratio": -0.5233887434005737, "logits/chosen": 0.11829128116369247, "logits/rejected": -1.0814515352249146, "logps/chosen": -1.612618088722229, "logps/rejected": -1.9323992729187012, "loss": 1.7336, "nll_loss": 1.6812975406646729, "rewards/accuracies": 0.875, "rewards/chosen": -0.16126182675361633, "rewards/margins": 0.03197810798883438, "rewards/rejected": -0.1932399421930313, "step": 263 }, { "epoch": 0.7290300310666207, "grad_norm": 0.23297946155071259, "learning_rate": 4.701488829641845e-06, "log_odds_chosen": 0.3633509874343872, "log_odds_ratio": -0.5302541255950928, "logits/chosen": 0.08365479856729507, "logits/rejected": -1.0365325212478638, "logps/chosen": -1.5728099346160889, "logps/rejected": -1.870044231414795, "loss": 1.6965, "nll_loss": 1.6434751749038696, "rewards/accuracies": 1.0, "rewards/chosen": -0.15728099644184113, "rewards/margins": 0.02972342073917389, "rewards/rejected": -0.18700441718101501, "step": 264 }, { "epoch": 0.7317915084570245, "grad_norm": 0.215961292386055, "learning_rate": 4.697679814323044e-06, "log_odds_chosen": 0.35131096839904785, "log_odds_ratio": -0.540593147277832, "logits/chosen": 0.052778296172618866, "logits/rejected": -1.2138348817825317, "logps/chosen": -1.6011123657226562, "logps/rejected": -1.8878912925720215, "loss": 1.7054, "nll_loss": 1.6513090133666992, "rewards/accuracies": 1.0, "rewards/chosen": -0.16011124849319458, "rewards/margins": 0.028677886351943016, "rewards/rejected": -0.18878912925720215, "step": 265 }, { "epoch": 0.7345529858474283, "grad_norm": 0.2192918211221695, "learning_rate": 4.693848214443858e-06, "log_odds_chosen": 0.3489964008331299, "log_odds_ratio": -0.538882315158844, "logits/chosen": 0.0374111570417881, "logits/rejected": -1.4135199785232544, "logps/chosen": -1.66756010055542, "logps/rejected": -1.9600346088409424, "loss": 1.7678, "nll_loss": 1.713918924331665, "rewards/accuracies": 0.875, "rewards/chosen": -0.1667560189962387, "rewards/margins": 0.029247449710965157, "rewards/rejected": -0.19600346684455872, "step": 266 }, { "epoch": 0.7373144632378322, "grad_norm": 0.23613475263118744, "learning_rate": 4.689994069379905e-06, "log_odds_chosen": 0.6149622797966003, "log_odds_ratio": -0.43481168150901794, "logits/chosen": 0.08960134536027908, "logits/rejected": -1.5427201986312866, "logps/chosen": -1.5492388010025024, "logps/rejected": -2.061069965362549, "loss": 1.6539, "nll_loss": 1.6104521751403809, "rewards/accuracies": 1.0, "rewards/chosen": -0.15492388606071472, "rewards/margins": 0.05118309706449509, "rewards/rejected": -0.2061069756746292, "step": 267 }, { "epoch": 0.7400759406282361, "grad_norm": 0.22785541415214539, "learning_rate": 4.686117418738489e-06, "log_odds_chosen": 0.46340838074684143, "log_odds_ratio": -0.4908517003059387, "logits/chosen": 0.09018822759389877, "logits/rejected": -1.188814640045166, "logps/chosen": -1.607946753501892, "logps/rejected": -1.9937546253204346, "loss": 1.7214, "nll_loss": 1.6723337173461914, "rewards/accuracies": 1.0, "rewards/chosen": -0.1607946753501892, "rewards/margins": 0.03858078643679619, "rewards/rejected": -0.1993754655122757, "step": 268 }, { "epoch": 0.74283741801864, "grad_norm": 0.20759011805057526, "learning_rate": 4.6822183023581945e-06, "log_odds_chosen": 0.39548274874687195, "log_odds_ratio": -0.5299872756004333, "logits/chosen": 0.08991736173629761, "logits/rejected": -1.5129450559616089, "logps/chosen": -1.5805068016052246, "logps/rejected": -1.9086334705352783, "loss": 1.705, "nll_loss": 1.6519887447357178, "rewards/accuracies": 0.875, "rewards/chosen": -0.15805068612098694, "rewards/margins": 0.03281266242265701, "rewards/rejected": -0.19086334109306335, "step": 269 }, { "epoch": 0.7455988954090439, "grad_norm": 0.21977882087230682, "learning_rate": 4.678296760308474e-06, "log_odds_chosen": 0.2966475486755371, "log_odds_ratio": -0.5605870485305786, "logits/chosen": -0.026812348514795303, "logits/rejected": -1.3540985584259033, "logps/chosen": -1.5566442012786865, "logps/rejected": -1.7983969449996948, "loss": 1.6709, "nll_loss": 1.6148244142532349, "rewards/accuracies": 1.0, "rewards/chosen": -0.15566441416740417, "rewards/margins": 0.024175278842449188, "rewards/rejected": -0.17983968555927277, "step": 270 }, { "epoch": 0.7483603727994477, "grad_norm": 0.24105672538280487, "learning_rate": 4.674352832889239e-06, "log_odds_chosen": 0.5675957798957825, "log_odds_ratio": -0.4524817168712616, "logits/chosen": 0.017950108274817467, "logits/rejected": -1.1157424449920654, "logps/chosen": -1.617455005645752, "logps/rejected": -2.094006299972534, "loss": 1.7444, "nll_loss": 1.6991225481033325, "rewards/accuracies": 1.0, "rewards/chosen": -0.16174551844596863, "rewards/margins": 0.0476551279425621, "rewards/rejected": -0.20940065383911133, "step": 271 }, { "epoch": 0.7511218501898516, "grad_norm": 0.20193441212177277, "learning_rate": 4.670386560630446e-06, "log_odds_chosen": 0.4167816638946533, "log_odds_ratio": -0.5149449110031128, "logits/chosen": -0.06302893161773682, "logits/rejected": -1.2852458953857422, "logps/chosen": -1.538407325744629, "logps/rejected": -1.8780553340911865, "loss": 1.6523, "nll_loss": 1.600759744644165, "rewards/accuracies": 0.875, "rewards/chosen": -0.15384072065353394, "rewards/margins": 0.03396480157971382, "rewards/rejected": -0.18780553340911865, "step": 272 }, { "epoch": 0.7538833275802554, "grad_norm": 0.2135000228881836, "learning_rate": 4.66639798429168e-06, "log_odds_chosen": 0.5502100586891174, "log_odds_ratio": -0.45903506875038147, "logits/chosen": 0.02270820550620556, "logits/rejected": -1.4955902099609375, "logps/chosen": -1.6026809215545654, "logps/rejected": -2.0662648677825928, "loss": 1.6999, "nll_loss": 1.6539511680603027, "rewards/accuracies": 1.0, "rewards/chosen": -0.1602681279182434, "rewards/margins": 0.0463583879172802, "rewards/rejected": -0.2066265046596527, "step": 273 }, { "epoch": 0.7566448049706593, "grad_norm": 0.23999927937984467, "learning_rate": 4.6623871448617345e-06, "log_odds_chosen": 0.3460523784160614, "log_odds_ratio": -0.5406045913696289, "logits/chosen": -0.06589086353778839, "logits/rejected": -1.3238555192947388, "logps/chosen": -1.5511431694030762, "logps/rejected": -1.8326784372329712, "loss": 1.6633, "nll_loss": 1.6092641353607178, "rewards/accuracies": 0.875, "rewards/chosen": -0.1551143229007721, "rewards/margins": 0.028153544291853905, "rewards/rejected": -0.18326784670352936, "step": 274 }, { "epoch": 0.7594062823610632, "grad_norm": 0.2286817878484726, "learning_rate": 4.6583540835581885e-06, "log_odds_chosen": 0.4262576103210449, "log_odds_ratio": -0.5114239454269409, "logits/chosen": -0.04603245109319687, "logits/rejected": -1.176690936088562, "logps/chosen": -1.551270842552185, "logps/rejected": -1.902989387512207, "loss": 1.6683, "nll_loss": 1.6171845197677612, "rewards/accuracies": 1.0, "rewards/chosen": -0.15512707829475403, "rewards/margins": 0.03517187386751175, "rewards/rejected": -0.19029895961284637, "step": 275 }, { "epoch": 0.762167759751467, "grad_norm": 0.20076729357242584, "learning_rate": 4.654298841826988e-06, "log_odds_chosen": 0.35074901580810547, "log_odds_ratio": -0.5370126962661743, "logits/chosen": -0.04673296585679054, "logits/rejected": -1.0621964931488037, "logps/chosen": -1.4891098737716675, "logps/rejected": -1.7715747356414795, "loss": 1.5999, "nll_loss": 1.5461980104446411, "rewards/accuracies": 1.0, "rewards/chosen": -0.1489109843969345, "rewards/margins": 0.02824649028480053, "rewards/rejected": -0.1771574765443802, "step": 276 }, { "epoch": 0.7649292371418709, "grad_norm": 0.2110532820224762, "learning_rate": 4.6502214613420164e-06, "log_odds_chosen": 0.6417544484138489, "log_odds_ratio": -0.4257214069366455, "logits/chosen": -0.04618730768561363, "logits/rejected": -1.1318330764770508, "logps/chosen": -1.457794189453125, "logps/rejected": -1.9827320575714111, "loss": 1.5723, "nll_loss": 1.5297592878341675, "rewards/accuracies": 1.0, "rewards/chosen": -0.14577943086624146, "rewards/margins": 0.052493780851364136, "rewards/rejected": -0.1982732117176056, "step": 277 }, { "epoch": 0.7676907145322748, "grad_norm": 0.23329485952854156, "learning_rate": 4.646121984004666e-06, "log_odds_chosen": 0.5122417211532593, "log_odds_ratio": -0.47840094566345215, "logits/chosen": -0.014363419264554977, "logits/rejected": -1.0884288549423218, "logps/chosen": -1.582539677619934, "logps/rejected": -2.0093653202056885, "loss": 1.7056, "nll_loss": 1.6577521562576294, "rewards/accuracies": 1.0, "rewards/chosen": -0.15825395286083221, "rewards/margins": 0.042682573199272156, "rewards/rejected": -0.20093654096126556, "step": 278 }, { "epoch": 0.7704521919226787, "grad_norm": 0.21924971044063568, "learning_rate": 4.642000451943409e-06, "log_odds_chosen": 0.4158315062522888, "log_odds_ratio": -0.5117031335830688, "logits/chosen": 0.08326566219329834, "logits/rejected": -1.0792198181152344, "logps/chosen": -1.6467266082763672, "logps/rejected": -1.9971718788146973, "loss": 1.7532, "nll_loss": 1.7020772695541382, "rewards/accuracies": 1.0, "rewards/chosen": -0.16467267274856567, "rewards/margins": 0.03504452481865883, "rewards/rejected": -0.199717178940773, "step": 279 }, { "epoch": 0.7732136693130826, "grad_norm": 0.2058788388967514, "learning_rate": 4.637856907513366e-06, "log_odds_chosen": 0.5298818945884705, "log_odds_ratio": -0.4738863706588745, "logits/chosen": -0.052150044590234756, "logits/rejected": -1.148370623588562, "logps/chosen": -1.6041690111160278, "logps/rejected": -2.045445680618286, "loss": 1.697, "nll_loss": 1.6495710611343384, "rewards/accuracies": 1.0, "rewards/chosen": -0.16041693091392517, "rewards/margins": 0.04412766546010971, "rewards/rejected": -0.2045445740222931, "step": 280 }, { "epoch": 0.7759751467034863, "grad_norm": 0.19594821333885193, "learning_rate": 4.633691393295865e-06, "log_odds_chosen": 0.33417803049087524, "log_odds_ratio": -0.5438077449798584, "logits/chosen": -0.02928408607840538, "logits/rejected": -1.2465791702270508, "logps/chosen": -1.6147441864013672, "logps/rejected": -1.8932818174362183, "loss": 1.7171, "nll_loss": 1.6626923084259033, "rewards/accuracies": 0.875, "rewards/chosen": -0.16147442162036896, "rewards/margins": 0.027853764593601227, "rewards/rejected": -0.18932819366455078, "step": 281 }, { "epoch": 0.7787366240938902, "grad_norm": 0.2078380137681961, "learning_rate": 4.629503952098011e-06, "log_odds_chosen": 0.6047598123550415, "log_odds_ratio": -0.4436852037906647, "logits/chosen": -0.0367959700524807, "logits/rejected": -1.4540506601333618, "logps/chosen": -1.5447849035263062, "logps/rejected": -2.0499391555786133, "loss": 1.6528, "nll_loss": 1.6084190607070923, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544785052537918, "rewards/margins": 0.050515398383140564, "rewards/rejected": -0.20499390363693237, "step": 282 }, { "epoch": 0.7814981014842941, "grad_norm": 0.20601390302181244, "learning_rate": 4.6252946269522406e-06, "log_odds_chosen": 0.38651254773139954, "log_odds_ratio": -0.5361747741699219, "logits/chosen": -0.06736142188310623, "logits/rejected": -1.498019814491272, "logps/chosen": -1.5710289478302002, "logps/rejected": -1.8959977626800537, "loss": 1.6776, "nll_loss": 1.6239646673202515, "rewards/accuracies": 0.75, "rewards/chosen": -0.15710289776325226, "rewards/margins": 0.03249688073992729, "rewards/rejected": -0.18959978222846985, "step": 283 }, { "epoch": 0.784259578874698, "grad_norm": 0.20410317182540894, "learning_rate": 4.621063461115882e-06, "log_odds_chosen": 0.36583784222602844, "log_odds_ratio": -0.5439432263374329, "logits/chosen": -0.001009071245789528, "logits/rejected": -1.3512407541275024, "logps/chosen": -1.6412224769592285, "logps/rejected": -1.9503740072250366, "loss": 1.7443, "nll_loss": 1.6899352073669434, "rewards/accuracies": 0.875, "rewards/chosen": -0.16412223875522614, "rewards/margins": 0.030915159732103348, "rewards/rejected": -0.19503742456436157, "step": 284 }, { "epoch": 0.7870210562651019, "grad_norm": 0.2131362408399582, "learning_rate": 4.6168104980707105e-06, "log_odds_chosen": 0.44328027963638306, "log_odds_ratio": -0.49936914443969727, "logits/chosen": -0.058265671133995056, "logits/rejected": -1.3667389154434204, "logps/chosen": -1.5707849264144897, "logps/rejected": -1.9372496604919434, "loss": 1.6771, "nll_loss": 1.6271687746047974, "rewards/accuracies": 1.0, "rewards/chosen": -0.15707850456237793, "rewards/margins": 0.03664647787809372, "rewards/rejected": -0.19372497498989105, "step": 285 }, { "epoch": 0.7897825336555057, "grad_norm": 0.1902783066034317, "learning_rate": 4.612535781522504e-06, "log_odds_chosen": 0.34869980812072754, "log_odds_ratio": -0.5359938144683838, "logits/chosen": -0.04153807833790779, "logits/rejected": -1.3222988843917847, "logps/chosen": -1.537316918373108, "logps/rejected": -1.8218798637390137, "loss": 1.6401, "nll_loss": 1.5864717960357666, "rewards/accuracies": 1.0, "rewards/chosen": -0.15373168885707855, "rewards/margins": 0.02845628745853901, "rewards/rejected": -0.1821879893541336, "step": 286 }, { "epoch": 0.7925440110459095, "grad_norm": 0.20296384394168854, "learning_rate": 4.6082393554005855e-06, "log_odds_chosen": 0.5364274978637695, "log_odds_ratio": -0.4683885872364044, "logits/chosen": 0.05323365330696106, "logits/rejected": -1.2694860696792603, "logps/chosen": -1.5045408010482788, "logps/rejected": -1.949967384338379, "loss": 1.6241, "nll_loss": 1.5772613286972046, "rewards/accuracies": 0.875, "rewards/chosen": -0.1504540890455246, "rewards/margins": 0.04454266279935837, "rewards/rejected": -0.19499675929546356, "step": 287 }, { "epoch": 0.7953054884363134, "grad_norm": 0.2286815494298935, "learning_rate": 4.6039212638573835e-06, "log_odds_chosen": 0.2805465757846832, "log_odds_ratio": -0.5681173205375671, "logits/chosen": -0.09262797981500626, "logits/rejected": -1.4461908340454102, "logps/chosen": -1.680091381072998, "logps/rejected": -1.9138922691345215, "loss": 1.7686, "nll_loss": 1.7117558717727661, "rewards/accuracies": 0.875, "rewards/chosen": -0.16800916194915771, "rewards/margins": 0.02338009513914585, "rewards/rejected": -0.19138923287391663, "step": 288 }, { "epoch": 0.7980669658267173, "grad_norm": 0.21770860254764557, "learning_rate": 4.599581551267969e-06, "log_odds_chosen": 0.5229898691177368, "log_odds_ratio": -0.47206243872642517, "logits/chosen": 0.0574706494808197, "logits/rejected": -1.1304949522018433, "logps/chosen": -1.6498278379440308, "logps/rejected": -2.090548038482666, "loss": 1.7491, "nll_loss": 1.701880931854248, "rewards/accuracies": 1.0, "rewards/chosen": -0.16498279571533203, "rewards/margins": 0.04407202824950218, "rewards/rejected": -0.20905481278896332, "step": 289 }, { "epoch": 0.8008284432171212, "grad_norm": 0.2001868039369583, "learning_rate": 4.5952202622296015e-06, "log_odds_chosen": 0.33771640062332153, "log_odds_ratio": -0.5422191619873047, "logits/chosen": -0.08145216107368469, "logits/rejected": -1.286673903465271, "logps/chosen": -1.5776478052139282, "logps/rejected": -1.857193946838379, "loss": 1.6829, "nll_loss": 1.6286838054656982, "rewards/accuracies": 1.0, "rewards/chosen": -0.15776477754116058, "rewards/margins": 0.027954626828432083, "rewards/rejected": -0.18571940064430237, "step": 290 }, { "epoch": 0.803589920607525, "grad_norm": 0.19392110407352448, "learning_rate": 4.590837441561277e-06, "log_odds_chosen": 0.4482704699039459, "log_odds_ratio": -0.504833459854126, "logits/chosen": -0.05314670130610466, "logits/rejected": -1.3596844673156738, "logps/chosen": -1.596375584602356, "logps/rejected": -1.9696097373962402, "loss": 1.6868, "nll_loss": 1.6362988948822021, "rewards/accuracies": 1.0, "rewards/chosen": -0.15963755548000336, "rewards/margins": 0.037323422729969025, "rewards/rejected": -0.19696098566055298, "step": 291 }, { "epoch": 0.8063513979979289, "grad_norm": 0.18595081567764282, "learning_rate": 4.586433134303257e-06, "log_odds_chosen": 0.5394923090934753, "log_odds_ratio": -0.4622240662574768, "logits/chosen": -0.05867187678813934, "logits/rejected": -1.1816052198410034, "logps/chosen": -1.4675815105438232, "logps/rejected": -1.904748797416687, "loss": 1.5911, "nll_loss": 1.5448615550994873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14675816893577576, "rewards/margins": 0.04371672868728638, "rewards/rejected": -0.19047488272190094, "step": 292 }, { "epoch": 0.8091128753883328, "grad_norm": 0.2025194615125656, "learning_rate": 4.582007385716614e-06, "log_odds_chosen": 0.3834781348705292, "log_odds_ratio": -0.5252506732940674, "logits/chosen": -0.03208540380001068, "logits/rejected": -1.2696198225021362, "logps/chosen": -1.5880941152572632, "logps/rejected": -1.9049972295761108, "loss": 1.7106, "nll_loss": 1.6580520868301392, "rewards/accuracies": 1.0, "rewards/chosen": -0.15880942344665527, "rewards/margins": 0.0316903218626976, "rewards/rejected": -0.19049973785877228, "step": 293 }, { "epoch": 0.8118743527787367, "grad_norm": 0.21143269538879395, "learning_rate": 4.57756024128276e-06, "log_odds_chosen": 0.5385761260986328, "log_odds_ratio": -0.4631249010562897, "logits/chosen": -0.12074042856693268, "logits/rejected": -1.2522529363632202, "logps/chosen": -1.5002479553222656, "logps/rejected": -1.9420486688613892, "loss": 1.6132, "nll_loss": 1.566838026046753, "rewards/accuracies": 1.0, "rewards/chosen": -0.15002478659152985, "rewards/margins": 0.044180065393447876, "rewards/rejected": -0.19420485198497772, "step": 294 }, { "epoch": 0.8146358301691405, "grad_norm": 0.1943599432706833, "learning_rate": 4.573091746702988e-06, "log_odds_chosen": 0.5673821568489075, "log_odds_ratio": -0.45537808537483215, "logits/chosen": -0.1322491317987442, "logits/rejected": -1.209002137184143, "logps/chosen": -1.5007110834121704, "logps/rejected": -1.9674400091171265, "loss": 1.6187, "nll_loss": 1.573127031326294, "rewards/accuracies": 1.0, "rewards/chosen": -0.15007109940052032, "rewards/margins": 0.04667289927601814, "rewards/rejected": -0.19674400985240936, "step": 295 }, { "epoch": 0.8173973075595443, "grad_norm": 0.17433036863803864, "learning_rate": 4.5686019478979915e-06, "log_odds_chosen": 0.6013860702514648, "log_odds_ratio": -0.4401004910469055, "logits/chosen": 0.0062029119580984116, "logits/rejected": -1.6436456441879272, "logps/chosen": -1.534691572189331, "logps/rejected": -2.034040927886963, "loss": 1.6251, "nll_loss": 1.5810506343841553, "rewards/accuracies": 1.0, "rewards/chosen": -0.15346917510032654, "rewards/margins": 0.049934931099414825, "rewards/rejected": -0.20340411365032196, "step": 296 }, { "epoch": 0.8201587849499482, "grad_norm": 0.20673449337482452, "learning_rate": 4.564090891007401e-06, "log_odds_chosen": 0.5072333812713623, "log_odds_ratio": -0.47898009419441223, "logits/chosen": -0.12156552821397781, "logits/rejected": -1.4179987907409668, "logps/chosen": -1.63102388381958, "logps/rejected": -2.0585782527923584, "loss": 1.7285, "nll_loss": 1.6806358098983765, "rewards/accuracies": 1.0, "rewards/chosen": -0.1631024032831192, "rewards/margins": 0.04275544360280037, "rewards/rejected": -0.20585784316062927, "step": 297 }, { "epoch": 0.8229202623403521, "grad_norm": 0.1835467368364334, "learning_rate": 4.559558622389304e-06, "log_odds_chosen": 0.5705875754356384, "log_odds_ratio": -0.45824331045150757, "logits/chosen": -0.08186288177967072, "logits/rejected": -1.502951979637146, "logps/chosen": -1.5210040807724, "logps/rejected": -1.987526774406433, "loss": 1.6191, "nll_loss": 1.5732712745666504, "rewards/accuracies": 1.0, "rewards/chosen": -0.15210041403770447, "rewards/margins": 0.046652257442474365, "rewards/rejected": -0.19875267148017883, "step": 298 }, { "epoch": 0.825681739730756, "grad_norm": 0.19064722955226898, "learning_rate": 4.555005188619776e-06, "log_odds_chosen": 0.5287789106369019, "log_odds_ratio": -0.473359614610672, "logits/chosen": -0.1356291025876999, "logits/rejected": -1.412203073501587, "logps/chosen": -1.5497990846633911, "logps/rejected": -1.9918665885925293, "loss": 1.6437, "nll_loss": 1.5964010953903198, "rewards/accuracies": 0.875, "rewards/chosen": -0.1549799144268036, "rewards/margins": 0.04420673847198486, "rewards/rejected": -0.19918665289878845, "step": 299 }, { "epoch": 0.8284432171211599, "grad_norm": 0.1883833110332489, "learning_rate": 4.55043063649239e-06, "log_odds_chosen": 0.6203457117080688, "log_odds_ratio": -0.4358068108558655, "logits/chosen": -0.1520073264837265, "logits/rejected": -1.6608895063400269, "logps/chosen": -1.5645240545272827, "logps/rejected": -2.0832083225250244, "loss": 1.6559, "nll_loss": 1.6122933626174927, "rewards/accuracies": 1.0, "rewards/chosen": -0.15645241737365723, "rewards/margins": 0.05186842754483223, "rewards/rejected": -0.20832082629203796, "step": 300 }, { "epoch": 0.8312046945115636, "grad_norm": 0.18228144943714142, "learning_rate": 4.54583501301775e-06, "log_odds_chosen": 0.5522174835205078, "log_odds_ratio": -0.46315470337867737, "logits/chosen": -0.10763005167245865, "logits/rejected": -1.706877589225769, "logps/chosen": -1.5923174619674683, "logps/rejected": -2.0555925369262695, "loss": 1.6766, "nll_loss": 1.630320429801941, "rewards/accuracies": 1.0, "rewards/chosen": -0.1592317670583725, "rewards/margins": 0.046327486634254456, "rewards/rejected": -0.20555922389030457, "step": 301 }, { "epoch": 0.8339661719019675, "grad_norm": 0.18666355311870575, "learning_rate": 4.541218365422997e-06, "log_odds_chosen": 0.6547673344612122, "log_odds_ratio": -0.4269846975803375, "logits/chosen": -0.1366514414548874, "logits/rejected": -1.3694720268249512, "logps/chosen": -1.4531749486923218, "logps/rejected": -1.9791964292526245, "loss": 1.5613, "nll_loss": 1.5185647010803223, "rewards/accuracies": 1.0, "rewards/chosen": -0.14531749486923218, "rewards/margins": 0.05260216072201729, "rewards/rejected": -0.19791966676712036, "step": 302 }, { "epoch": 0.8367276492923714, "grad_norm": 0.19289466738700867, "learning_rate": 4.536580741151328e-06, "log_odds_chosen": 0.42363226413726807, "log_odds_ratio": -0.5060065984725952, "logits/chosen": -0.1389242559671402, "logits/rejected": -1.133392095565796, "logps/chosen": -1.5939542055130005, "logps/rejected": -1.9430160522460938, "loss": 1.6864, "nll_loss": 1.63578200340271, "rewards/accuracies": 1.0, "rewards/chosen": -0.15939541161060333, "rewards/margins": 0.034906186163425446, "rewards/rejected": -0.19430160522460938, "step": 303 }, { "epoch": 0.8394891266827753, "grad_norm": 0.20372274518013, "learning_rate": 4.531922187861507e-06, "log_odds_chosen": 0.6614566445350647, "log_odds_ratio": -0.41916701197624207, "logits/chosen": -0.11737221479415894, "logits/rejected": -1.2571710348129272, "logps/chosen": -1.4728827476501465, "logps/rejected": -2.0177319049835205, "loss": 1.5909, "nll_loss": 1.548986792564392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1472882777452469, "rewards/margins": 0.05448490381240845, "rewards/rejected": -0.20177316665649414, "step": 304 }, { "epoch": 0.8422506040731792, "grad_norm": 0.20864824950695038, "learning_rate": 4.527242753427378e-06, "log_odds_chosen": 0.4375610649585724, "log_odds_ratio": -0.5043913722038269, "logits/chosen": -0.14712439477443695, "logits/rejected": -1.1074727773666382, "logps/chosen": -1.6707913875579834, "logps/rejected": -2.040156126022339, "loss": 1.7741, "nll_loss": 1.723702073097229, "rewards/accuracies": 1.0, "rewards/chosen": -0.1670791655778885, "rewards/margins": 0.036936454474925995, "rewards/rejected": -0.20401562750339508, "step": 305 }, { "epoch": 0.845012081463583, "grad_norm": 0.1923041045665741, "learning_rate": 4.522542485937369e-06, "log_odds_chosen": 0.6749688982963562, "log_odds_ratio": -0.4191080629825592, "logits/chosen": -0.11244256049394608, "logits/rejected": -1.4451961517333984, "logps/chosen": -1.528746485710144, "logps/rejected": -2.0891406536102295, "loss": 1.644, "nll_loss": 1.602088212966919, "rewards/accuracies": 1.0, "rewards/chosen": -0.1528746634721756, "rewards/margins": 0.05603940039873123, "rewards/rejected": -0.20891407132148743, "step": 306 }, { "epoch": 0.8477735588539869, "grad_norm": 0.19692490994930267, "learning_rate": 4.5178214336940015e-06, "log_odds_chosen": 0.536999523639679, "log_odds_ratio": -0.4662824273109436, "logits/chosen": -0.12849773466587067, "logits/rejected": -1.3011747598648071, "logps/chosen": -1.537503957748413, "logps/rejected": -1.9821370840072632, "loss": 1.6458, "nll_loss": 1.5992212295532227, "rewards/accuracies": 1.0, "rewards/chosen": -0.15375038981437683, "rewards/margins": 0.044463321566581726, "rewards/rejected": -0.19821371138095856, "step": 307 }, { "epoch": 0.8505350362443908, "grad_norm": 0.19530101120471954, "learning_rate": 4.513079645213391e-06, "log_odds_chosen": 0.5806478261947632, "log_odds_ratio": -0.45124712586402893, "logits/chosen": -0.14092004299163818, "logits/rejected": -1.2499042749404907, "logps/chosen": -1.447067141532898, "logps/rejected": -1.9178260564804077, "loss": 1.543, "nll_loss": 1.4978904724121094, "rewards/accuracies": 1.0, "rewards/chosen": -0.14470671117305756, "rewards/margins": 0.04707589000463486, "rewards/rejected": -0.19178259372711182, "step": 308 }, { "epoch": 0.8532965136347946, "grad_norm": 0.21278323233127594, "learning_rate": 4.508317169224752e-06, "log_odds_chosen": 0.2714424431324005, "log_odds_ratio": -0.5724983215332031, "logits/chosen": -0.23690785467624664, "logits/rejected": -1.2109217643737793, "logps/chosen": -1.568101406097412, "logps/rejected": -1.7880043983459473, "loss": 1.6669, "nll_loss": 1.6096391677856445, "rewards/accuracies": 0.875, "rewards/chosen": -0.15681014955043793, "rewards/margins": 0.021990297362208366, "rewards/rejected": -0.17880043387413025, "step": 309 }, { "epoch": 0.8560579910251985, "grad_norm": 0.3061252534389496, "learning_rate": 4.5035340546698915e-06, "log_odds_chosen": 0.5074098110198975, "log_odds_ratio": -0.47923699021339417, "logits/chosen": -0.08015923947095871, "logits/rejected": -1.4736651182174683, "logps/chosen": -1.5646302700042725, "logps/rejected": -1.9794241189956665, "loss": 1.6599, "nll_loss": 1.6120116710662842, "rewards/accuracies": 1.0, "rewards/chosen": -0.15646302700042725, "rewards/margins": 0.041479405015707016, "rewards/rejected": -0.19794242084026337, "step": 310 }, { "epoch": 0.8588194684156023, "grad_norm": 0.20862670242786407, "learning_rate": 4.4987303507027155e-06, "log_odds_chosen": 0.32739633321762085, "log_odds_ratio": -0.5497077703475952, "logits/chosen": -0.057409606873989105, "logits/rejected": -1.186546802520752, "logps/chosen": -1.6279197931289673, "logps/rejected": -1.8978025913238525, "loss": 1.7296, "nll_loss": 1.6746082305908203, "rewards/accuracies": 0.75, "rewards/chosen": -0.16279199719429016, "rewards/margins": 0.026988260447978973, "rewards/rejected": -0.18978025019168854, "step": 311 }, { "epoch": 0.8615809458060062, "grad_norm": 0.1961701512336731, "learning_rate": 4.493906106688712e-06, "log_odds_chosen": 0.6683472394943237, "log_odds_ratio": -0.4163239598274231, "logits/chosen": -0.08115644752979279, "logits/rejected": -1.3991774320602417, "logps/chosen": -1.5248966217041016, "logps/rejected": -2.0810930728912354, "loss": 1.633, "nll_loss": 1.5913276672363281, "rewards/accuracies": 1.0, "rewards/chosen": -0.15248967707157135, "rewards/margins": 0.05561964958906174, "rewards/rejected": -0.20810934901237488, "step": 312 }, { "epoch": 0.8643424231964101, "grad_norm": 0.4846391975879669, "learning_rate": 4.4890613722044526e-06, "log_odds_chosen": 0.5416931509971619, "log_odds_ratio": -0.46186453104019165, "logits/chosen": -0.13847319781780243, "logits/rejected": -1.388474702835083, "logps/chosen": -1.5097135305404663, "logps/rejected": -1.9569801092147827, "loss": 1.59, "nll_loss": 1.5437886714935303, "rewards/accuracies": 1.0, "rewards/chosen": -0.15097135305404663, "rewards/margins": 0.04472666233778, "rewards/rejected": -0.19569800794124603, "step": 313 }, { "epoch": 0.867103900586814, "grad_norm": 0.2009696215391159, "learning_rate": 4.484196197037082e-06, "log_odds_chosen": 0.5546321868896484, "log_odds_ratio": -0.45888563990592957, "logits/chosen": -0.07164500653743744, "logits/rejected": -1.3182318210601807, "logps/chosen": -1.561537265777588, "logps/rejected": -2.02632212638855, "loss": 1.6542, "nll_loss": 1.6082967519760132, "rewards/accuracies": 1.0, "rewards/chosen": -0.15615373849868774, "rewards/margins": 0.046478480100631714, "rewards/rejected": -0.20263221859931946, "step": 314 }, { "epoch": 0.8698653779772179, "grad_norm": 0.18780747056007385, "learning_rate": 4.4793106311838e-06, "log_odds_chosen": 0.5521085262298584, "log_odds_ratio": -0.46405351161956787, "logits/chosen": -0.17451435327529907, "logits/rejected": -1.2368041276931763, "logps/chosen": -1.547911524772644, "logps/rejected": -2.007674217224121, "loss": 1.6276, "nll_loss": 1.5811690092086792, "rewards/accuracies": 1.0, "rewards/chosen": -0.15479114651679993, "rewards/margins": 0.04597627371549606, "rewards/rejected": -0.2007674127817154, "step": 315 }, { "epoch": 0.8726268553676216, "grad_norm": 0.19249524176120758, "learning_rate": 4.474404724851356e-06, "log_odds_chosen": 0.49972984194755554, "log_odds_ratio": -0.4845547676086426, "logits/chosen": -0.12106968462467194, "logits/rejected": -1.4282002449035645, "logps/chosen": -1.599500060081482, "logps/rejected": -2.0155510902404785, "loss": 1.6902, "nll_loss": 1.641719102859497, "rewards/accuracies": 1.0, "rewards/chosen": -0.15995000302791595, "rewards/margins": 0.04160511493682861, "rewards/rejected": -0.20155511796474457, "step": 316 }, { "epoch": 0.8753883327580255, "grad_norm": 0.2032056301832199, "learning_rate": 4.469478528455529e-06, "log_odds_chosen": 0.5046655535697937, "log_odds_ratio": -0.47710105776786804, "logits/chosen": -0.27712228894233704, "logits/rejected": -1.1458078622817993, "logps/chosen": -1.587306261062622, "logps/rejected": -2.0054972171783447, "loss": 1.6787, "nll_loss": 1.6309565305709839, "rewards/accuracies": 1.0, "rewards/chosen": -0.1587306261062622, "rewards/margins": 0.041819095611572266, "rewards/rejected": -0.20054972171783447, "step": 317 }, { "epoch": 0.8781498101484294, "grad_norm": 0.19460327923297882, "learning_rate": 4.464532092620607e-06, "log_odds_chosen": 0.5168122053146362, "log_odds_ratio": -0.474026620388031, "logits/chosen": -0.1055205836892128, "logits/rejected": -1.2003237009048462, "logps/chosen": -1.6242094039916992, "logps/rejected": -2.0614237785339355, "loss": 1.7035, "nll_loss": 1.6561068296432495, "rewards/accuracies": 1.0, "rewards/chosen": -0.16242094337940216, "rewards/margins": 0.04372143745422363, "rewards/rejected": -0.206142395734787, "step": 318 }, { "epoch": 0.8809112875388333, "grad_norm": 0.17680254578590393, "learning_rate": 4.4595654681788715e-06, "log_odds_chosen": 0.6278887987136841, "log_odds_ratio": -0.43415001034736633, "logits/chosen": -0.15030619502067566, "logits/rejected": -1.5502567291259766, "logps/chosen": -1.5051651000976562, "logps/rejected": -2.024726629257202, "loss": 1.5904, "nll_loss": 1.5469478368759155, "rewards/accuracies": 1.0, "rewards/chosen": -0.15051651000976562, "rewards/margins": 0.051956143230199814, "rewards/rejected": -0.20247265696525574, "step": 319 }, { "epoch": 0.8836727649292372, "grad_norm": 0.20075084269046783, "learning_rate": 4.454578706170075e-06, "log_odds_chosen": 0.3344058692455292, "log_odds_ratio": -0.547435998916626, "logits/chosen": -0.09762324392795563, "logits/rejected": -1.3721954822540283, "logps/chosen": -1.610334873199463, "logps/rejected": -1.8859809637069702, "loss": 1.6984, "nll_loss": 1.6436134576797485, "rewards/accuracies": 1.0, "rewards/chosen": -0.1610334813594818, "rewards/margins": 0.02756461501121521, "rewards/rejected": -0.18859811127185822, "step": 320 }, { "epoch": 0.886434242319641, "grad_norm": 0.1969553381204605, "learning_rate": 4.449571857840911e-06, "log_odds_chosen": 0.5939600467681885, "log_odds_ratio": -0.4538188576698303, "logits/chosen": -0.02511260285973549, "logits/rejected": -1.4034056663513184, "logps/chosen": -1.554402232170105, "logps/rejected": -2.0581822395324707, "loss": 1.647, "nll_loss": 1.6016532182693481, "rewards/accuracies": 0.875, "rewards/chosen": -0.15544021129608154, "rewards/margins": 0.05037800967693329, "rewards/rejected": -0.20581823587417603, "step": 321 }, { "epoch": 0.8891957197100449, "grad_norm": 0.20333482325077057, "learning_rate": 4.444544974644493e-06, "log_odds_chosen": 0.4322926700115204, "log_odds_ratio": -0.5066258311271667, "logits/chosen": -0.1251221001148224, "logits/rejected": -1.2058238983154297, "logps/chosen": -1.5241694450378418, "logps/rejected": -1.878300666809082, "loss": 1.6259, "nll_loss": 1.5751992464065552, "rewards/accuracies": 0.875, "rewards/chosen": -0.15241695940494537, "rewards/margins": 0.035413116216659546, "rewards/rejected": -0.18783007562160492, "step": 322 }, { "epoch": 0.8919571971004487, "grad_norm": 0.19495686888694763, "learning_rate": 4.4394981082398254e-06, "log_odds_chosen": 0.4637213945388794, "log_odds_ratio": -0.5005385875701904, "logits/chosen": -0.19393548369407654, "logits/rejected": -1.3736821413040161, "logps/chosen": -1.5801210403442383, "logps/rejected": -1.9673550128936768, "loss": 1.678, "nll_loss": 1.6279520988464355, "rewards/accuracies": 1.0, "rewards/chosen": -0.15801210701465607, "rewards/margins": 0.0387234091758728, "rewards/rejected": -0.19673550128936768, "step": 323 }, { "epoch": 0.8947186744908526, "grad_norm": 0.21592693030834198, "learning_rate": 4.434431310491267e-06, "log_odds_chosen": 0.6085124015808105, "log_odds_ratio": -0.44234418869018555, "logits/chosen": -0.20342035591602325, "logits/rejected": -1.3250751495361328, "logps/chosen": -1.6450177431106567, "logps/rejected": -2.15885329246521, "loss": 1.7148, "nll_loss": 1.6705245971679688, "rewards/accuracies": 1.0, "rewards/chosen": -0.16450178623199463, "rewards/margins": 0.05138356238603592, "rewards/rejected": -0.21588534116744995, "step": 324 }, { "epoch": 0.8974801518812565, "grad_norm": 0.2041964828968048, "learning_rate": 4.429344633468005e-06, "log_odds_chosen": 0.5157475471496582, "log_odds_ratio": -0.4731763005256653, "logits/chosen": -0.1373744010925293, "logits/rejected": -1.3682070970535278, "logps/chosen": -1.5091853141784668, "logps/rejected": -1.9341413974761963, "loss": 1.5962, "nll_loss": 1.5488858222961426, "rewards/accuracies": 1.0, "rewards/chosen": -0.15091852843761444, "rewards/margins": 0.042495615780353546, "rewards/rejected": -0.19341415166854858, "step": 325 }, { "epoch": 0.9002416292716603, "grad_norm": 0.21775774657726288, "learning_rate": 4.424238129443515e-06, "log_odds_chosen": 0.483593225479126, "log_odds_ratio": -0.48364073038101196, "logits/chosen": -0.09863223880529404, "logits/rejected": -1.0222870111465454, "logps/chosen": -1.6142656803131104, "logps/rejected": -2.01713228225708, "loss": 1.7025, "nll_loss": 1.6541244983673096, "rewards/accuracies": 1.0, "rewards/chosen": -0.1614265739917755, "rewards/margins": 0.04028663784265518, "rewards/rejected": -0.2017132043838501, "step": 326 }, { "epoch": 0.9030031066620642, "grad_norm": 0.20912012457847595, "learning_rate": 4.4191118508950286e-06, "log_odds_chosen": 0.5832557678222656, "log_odds_ratio": -0.44807419180870056, "logits/chosen": -0.18956468999385834, "logits/rejected": -1.2079969644546509, "logps/chosen": -1.5554307699203491, "logps/rejected": -2.04107666015625, "loss": 1.659, "nll_loss": 1.6141505241394043, "rewards/accuracies": 1.0, "rewards/chosen": -0.15554308891296387, "rewards/margins": 0.04856458306312561, "rewards/rejected": -0.20410768687725067, "step": 327 }, { "epoch": 0.9057645840524681, "grad_norm": 0.1959802657365799, "learning_rate": 4.413965850502987e-06, "log_odds_chosen": 0.6536615490913391, "log_odds_ratio": -0.42614060640335083, "logits/chosen": -0.2154429852962494, "logits/rejected": -1.3865175247192383, "logps/chosen": -1.4310425519943237, "logps/rejected": -1.9644105434417725, "loss": 1.4983, "nll_loss": 1.455698013305664, "rewards/accuracies": 1.0, "rewards/chosen": -0.14310424029827118, "rewards/margins": 0.05333680659532547, "rewards/rejected": -0.19644105434417725, "step": 328 }, { "epoch": 0.908526061442872, "grad_norm": 0.19480550289154053, "learning_rate": 4.408800181150509e-06, "log_odds_chosen": 0.6978083252906799, "log_odds_ratio": -0.4107431471347809, "logits/chosen": -0.20347651839256287, "logits/rejected": -1.4490666389465332, "logps/chosen": -1.508465051651001, "logps/rejected": -2.0856833457946777, "loss": 1.5878, "nll_loss": 1.5467225313186646, "rewards/accuracies": 1.0, "rewards/chosen": -0.15084651112556458, "rewards/margins": 0.0577218271791935, "rewards/rejected": -0.20856834948062897, "step": 329 }, { "epoch": 0.9112875388332758, "grad_norm": 0.20806226134300232, "learning_rate": 4.4036148959228365e-06, "log_odds_chosen": 0.5531899333000183, "log_odds_ratio": -0.455844908952713, "logits/chosen": -0.12704087793827057, "logits/rejected": -1.144600510597229, "logps/chosen": -1.5709285736083984, "logps/rejected": -2.0322890281677246, "loss": 1.6525, "nll_loss": 1.6068720817565918, "rewards/accuracies": 1.0, "rewards/chosen": -0.1570928692817688, "rewards/margins": 0.046136029064655304, "rewards/rejected": -0.2032289057970047, "step": 330 }, { "epoch": 0.9140490162236796, "grad_norm": 0.20405174791812897, "learning_rate": 4.3984100481068e-06, "log_odds_chosen": 0.5182926654815674, "log_odds_ratio": -0.4803735911846161, "logits/chosen": -0.05097893998026848, "logits/rejected": -1.530269980430603, "logps/chosen": -1.5223273038864136, "logps/rejected": -1.9506477117538452, "loss": 1.602, "nll_loss": 1.5539740324020386, "rewards/accuracies": 0.875, "rewards/chosen": -0.15223273634910583, "rewards/margins": 0.04283204674720764, "rewards/rejected": -0.19506478309631348, "step": 331 }, { "epoch": 0.9168104936140835, "grad_norm": 0.1879170536994934, "learning_rate": 4.3931856911902635e-06, "log_odds_chosen": 0.6412795782089233, "log_odds_ratio": -0.4325963854789734, "logits/chosen": -0.23800577223300934, "logits/rejected": -1.4617422819137573, "logps/chosen": -1.5373084545135498, "logps/rejected": -2.0728628635406494, "loss": 1.6293, "nll_loss": 1.586013674736023, "rewards/accuracies": 1.0, "rewards/chosen": -0.1537308394908905, "rewards/margins": 0.0535554476082325, "rewards/rejected": -0.2072862982749939, "step": 332 }, { "epoch": 0.9195719710044874, "grad_norm": 0.19721606373786926, "learning_rate": 4.387941878861578e-06, "log_odds_chosen": 0.43159598112106323, "log_odds_ratio": -0.5106831192970276, "logits/chosen": -0.2015897035598755, "logits/rejected": -1.1725409030914307, "logps/chosen": -1.4626684188842773, "logps/rejected": -1.816298246383667, "loss": 1.5551, "nll_loss": 1.5040326118469238, "rewards/accuracies": 0.875, "rewards/chosen": -0.1462668478488922, "rewards/margins": 0.03536297380924225, "rewards/rejected": -0.18162982165813446, "step": 333 }, { "epoch": 0.9223334483948913, "grad_norm": 0.19862483441829681, "learning_rate": 4.382678665009028e-06, "log_odds_chosen": 0.5599039196968079, "log_odds_ratio": -0.45722323656082153, "logits/chosen": -0.1434212028980255, "logits/rejected": -1.4838988780975342, "logps/chosen": -1.5819792747497559, "logps/rejected": -2.048774003982544, "loss": 1.6595, "nll_loss": 1.6138123273849487, "rewards/accuracies": 1.0, "rewards/chosen": -0.15819790959358215, "rewards/margins": 0.04667946696281433, "rewards/rejected": -0.20487739145755768, "step": 334 }, { "epoch": 0.9250949257852952, "grad_norm": 0.20085540413856506, "learning_rate": 4.3773961037202784e-06, "log_odds_chosen": 0.5849855542182922, "log_odds_ratio": -0.4470357596874237, "logits/chosen": -0.16471579670906067, "logits/rejected": -1.48972749710083, "logps/chosen": -1.6449600458145142, "logps/rejected": -2.1387722492218018, "loss": 1.7056, "nll_loss": 1.6609022617340088, "rewards/accuracies": 1.0, "rewards/chosen": -0.16449600458145142, "rewards/margins": 0.04938122630119324, "rewards/rejected": -0.21387723088264465, "step": 335 }, { "epoch": 0.927856403175699, "grad_norm": 0.19542387127876282, "learning_rate": 4.37209424928182e-06, "log_odds_chosen": 0.5759137272834778, "log_odds_ratio": -0.4505816698074341, "logits/chosen": -0.1124081164598465, "logits/rejected": -1.1634758710861206, "logps/chosen": -1.5444155931472778, "logps/rejected": -2.0244972705841064, "loss": 1.6261, "nll_loss": 1.581070065498352, "rewards/accuracies": 1.0, "rewards/chosen": -0.15444158017635345, "rewards/margins": 0.048008158802986145, "rewards/rejected": -0.2024497389793396, "step": 336 }, { "epoch": 0.9306178805661028, "grad_norm": 0.187562957406044, "learning_rate": 4.366773156178413e-06, "log_odds_chosen": 0.40636616945266724, "log_odds_ratio": -0.5145981311798096, "logits/chosen": -0.23331299424171448, "logits/rejected": -1.272404432296753, "logps/chosen": -1.4672964811325073, "logps/rejected": -1.792708396911621, "loss": 1.5577, "nll_loss": 1.5061949491500854, "rewards/accuracies": 1.0, "rewards/chosen": -0.14672963321208954, "rewards/margins": 0.03254120051860809, "rewards/rejected": -0.17927084863185883, "step": 337 }, { "epoch": 0.9333793579565067, "grad_norm": 0.19676648080348969, "learning_rate": 4.361432879092518e-06, "log_odds_chosen": 0.6147382259368896, "log_odds_ratio": -0.444987416267395, "logits/chosen": -0.2584179937839508, "logits/rejected": -1.2025710344314575, "logps/chosen": -1.4777214527130127, "logps/rejected": -1.9875514507293701, "loss": 1.5673, "nll_loss": 1.5228395462036133, "rewards/accuracies": 1.0, "rewards/chosen": -0.1477721631526947, "rewards/margins": 0.0509830042719841, "rewards/rejected": -0.198755145072937, "step": 338 }, { "epoch": 0.9361408353469106, "grad_norm": 0.20113791525363922, "learning_rate": 4.356073472903747e-06, "log_odds_chosen": 0.5940407514572144, "log_odds_ratio": -0.4446149170398712, "logits/chosen": -0.09678924083709717, "logits/rejected": -1.2151576280593872, "logps/chosen": -1.4379996061325073, "logps/rejected": -1.9237509965896606, "loss": 1.5294, "nll_loss": 1.4849097728729248, "rewards/accuracies": 1.0, "rewards/chosen": -0.14379996061325073, "rewards/margins": 0.04857514798641205, "rewards/rejected": -0.19237510859966278, "step": 339 }, { "epoch": 0.9389023127373145, "grad_norm": 0.18967628479003906, "learning_rate": 4.350694992688289e-06, "log_odds_chosen": 0.6668899059295654, "log_odds_ratio": -0.427124559879303, "logits/chosen": -0.08107070624828339, "logits/rejected": -1.2518596649169922, "logps/chosen": -1.471956491470337, "logps/rejected": -2.021543025970459, "loss": 1.5567, "nll_loss": 1.513990879058838, "rewards/accuracies": 1.0, "rewards/chosen": -0.14719566702842712, "rewards/margins": 0.05495864897966385, "rewards/rejected": -0.20215432345867157, "step": 340 }, { "epoch": 0.9416637901277183, "grad_norm": 0.1954769492149353, "learning_rate": 4.345297493718352e-06, "log_odds_chosen": 0.5373238325119019, "log_odds_ratio": -0.46630507707595825, "logits/chosen": -0.24432893097400665, "logits/rejected": -1.0526390075683594, "logps/chosen": -1.472176432609558, "logps/rejected": -1.9091975688934326, "loss": 1.5654, "nll_loss": 1.5187859535217285, "rewards/accuracies": 1.0, "rewards/chosen": -0.14721764624118805, "rewards/margins": 0.04370209947228432, "rewards/rejected": -0.19091975688934326, "step": 341 }, { "epoch": 0.9444252675181222, "grad_norm": 0.18435297906398773, "learning_rate": 4.339881031461588e-06, "log_odds_chosen": 0.46578601002693176, "log_odds_ratio": -0.49112096428871155, "logits/chosen": -0.18604259192943573, "logits/rejected": -1.3026976585388184, "logps/chosen": -1.4346725940704346, "logps/rejected": -1.7989139556884766, "loss": 1.5414, "nll_loss": 1.4923009872436523, "rewards/accuracies": 1.0, "rewards/chosen": -0.1434672772884369, "rewards/margins": 0.036424119025468826, "rewards/rejected": -0.17989139258861542, "step": 342 }, { "epoch": 0.9471867449085261, "grad_norm": 0.19713670015335083, "learning_rate": 4.334445661580527e-06, "log_odds_chosen": 0.5911546945571899, "log_odds_ratio": -0.444367378950119, "logits/chosen": -0.24173688888549805, "logits/rejected": -1.4439072608947754, "logps/chosen": -1.5436309576034546, "logps/rejected": -2.035305976867676, "loss": 1.6258, "nll_loss": 1.5813885927200317, "rewards/accuracies": 1.0, "rewards/chosen": -0.15436309576034546, "rewards/margins": 0.049167513847351074, "rewards/rejected": -0.20353063941001892, "step": 343 }, { "epoch": 0.94994822229893, "grad_norm": 0.19868247210979462, "learning_rate": 4.328991439932003e-06, "log_odds_chosen": 0.646172046661377, "log_odds_ratio": -0.42435139417648315, "logits/chosen": -0.21409977972507477, "logits/rejected": -1.1630572080612183, "logps/chosen": -1.475473165512085, "logps/rejected": -2.007708787918091, "loss": 1.5666, "nll_loss": 1.5241310596466064, "rewards/accuracies": 1.0, "rewards/chosen": -0.14754730463027954, "rewards/margins": 0.053223565220832825, "rewards/rejected": -0.20077086985111237, "step": 344 }, { "epoch": 0.9527096996893338, "grad_norm": 0.2008034884929657, "learning_rate": 4.323518422566586e-06, "log_odds_chosen": 0.7171238660812378, "log_odds_ratio": -0.4020853638648987, "logits/chosen": -0.14678461849689484, "logits/rejected": -1.1799049377441406, "logps/chosen": -1.5431439876556396, "logps/rejected": -2.141486167907715, "loss": 1.639, "nll_loss": 1.5987637042999268, "rewards/accuracies": 1.0, "rewards/chosen": -0.15431438386440277, "rewards/margins": 0.05983421206474304, "rewards/rejected": -0.214148610830307, "step": 345 }, { "epoch": 0.9554711770797376, "grad_norm": 0.18002885580062866, "learning_rate": 4.318026665727993e-06, "log_odds_chosen": 0.7151846885681152, "log_odds_ratio": -0.40609210729599, "logits/chosen": -0.2377801239490509, "logits/rejected": -1.4111603498458862, "logps/chosen": -1.4105151891708374, "logps/rejected": -1.9907076358795166, "loss": 1.4953, "nll_loss": 1.4546504020690918, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410515308380127, "rewards/margins": 0.058019235730171204, "rewards/rejected": -0.1990707665681839, "step": 346 }, { "epoch": 0.9582326544701415, "grad_norm": 0.2088988721370697, "learning_rate": 4.3125162258525265e-06, "log_odds_chosen": 0.37778812646865845, "log_odds_ratio": -0.5246101021766663, "logits/chosen": -0.26066213846206665, "logits/rejected": -1.111747145652771, "logps/chosen": -1.6180295944213867, "logps/rejected": -1.932243824005127, "loss": 1.7004, "nll_loss": 1.6479332447052002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1618029624223709, "rewards/margins": 0.03142143785953522, "rewards/rejected": -0.19322440028190613, "step": 347 }, { "epoch": 0.9609941318605454, "grad_norm": 0.23166809976100922, "learning_rate": 4.3069871595684795e-06, "log_odds_chosen": 0.5429641604423523, "log_odds_ratio": -0.46485579013824463, "logits/chosen": -0.26122069358825684, "logits/rejected": -1.2304470539093018, "logps/chosen": -1.482200026512146, "logps/rejected": -1.9262490272521973, "loss": 1.5751, "nll_loss": 1.5286585092544556, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482200026512146, "rewards/margins": 0.044404882937669754, "rewards/rejected": -0.19262489676475525, "step": 348 }, { "epoch": 0.9637556092509493, "grad_norm": 0.19496473670005798, "learning_rate": 4.3014395236955635e-06, "log_odds_chosen": 0.6688944101333618, "log_odds_ratio": -0.4200526475906372, "logits/chosen": -0.2004820704460144, "logits/rejected": -1.322332501411438, "logps/chosen": -1.4777374267578125, "logps/rejected": -2.0316760540008545, "loss": 1.5606, "nll_loss": 1.5185458660125732, "rewards/accuracies": 1.0, "rewards/chosen": -0.14777372777462006, "rewards/margins": 0.055393870919942856, "rewards/rejected": -0.2031676024198532, "step": 349 }, { "epoch": 0.9665170866413532, "grad_norm": 0.18388359248638153, "learning_rate": 4.295873375244319e-06, "log_odds_chosen": 0.5681695938110352, "log_odds_ratio": -0.4566437602043152, "logits/chosen": -0.21363496780395508, "logits/rejected": -1.1769250631332397, "logps/chosen": -1.439588189125061, "logps/rejected": -1.9001712799072266, "loss": 1.5306, "nll_loss": 1.4849416017532349, "rewards/accuracies": 1.0, "rewards/chosen": -0.14395882189273834, "rewards/margins": 0.04605831205844879, "rewards/rejected": -0.19001714885234833, "step": 350 }, { "epoch": 0.9692785640317569, "grad_norm": 0.20012839138507843, "learning_rate": 4.290288771415536e-06, "log_odds_chosen": 0.5067012906074524, "log_odds_ratio": -0.477801650762558, "logits/chosen": -0.20678414404392242, "logits/rejected": -1.340362787246704, "logps/chosen": -1.5872958898544312, "logps/rejected": -2.0096263885498047, "loss": 1.6634, "nll_loss": 1.6156083345413208, "rewards/accuracies": 1.0, "rewards/chosen": -0.15872959792613983, "rewards/margins": 0.04223306477069855, "rewards/rejected": -0.20096264779567719, "step": 351 }, { "epoch": 0.9720400414221608, "grad_norm": 0.1952664852142334, "learning_rate": 4.284685769599658e-06, "log_odds_chosen": 0.5722883343696594, "log_odds_ratio": -0.45633140206336975, "logits/chosen": -0.2586257755756378, "logits/rejected": -1.3633008003234863, "logps/chosen": -1.495697259902954, "logps/rejected": -1.9731667041778564, "loss": 1.5729, "nll_loss": 1.5272736549377441, "rewards/accuracies": 1.0, "rewards/chosen": -0.14956970512866974, "rewards/margins": 0.04774694889783859, "rewards/rejected": -0.19731666147708893, "step": 352 }, { "epoch": 0.9748015188125647, "grad_norm": 0.194318950176239, "learning_rate": 4.279064427376199e-06, "log_odds_chosen": 0.5692598819732666, "log_odds_ratio": -0.4534481167793274, "logits/chosen": -0.21069735288619995, "logits/rejected": -1.2419835329055786, "logps/chosen": -1.5351362228393555, "logps/rejected": -2.0084264278411865, "loss": 1.6002, "nll_loss": 1.5548111200332642, "rewards/accuracies": 1.0, "rewards/chosen": -0.15351362526416779, "rewards/margins": 0.04732901602983475, "rewards/rejected": -0.20084263384342194, "step": 353 }, { "epoch": 0.9775629962029686, "grad_norm": 0.20712882280349731, "learning_rate": 4.273424802513145e-06, "log_odds_chosen": 0.6014809012413025, "log_odds_ratio": -0.4396442174911499, "logits/chosen": -0.2209128886461258, "logits/rejected": -1.3539609909057617, "logps/chosen": -1.5357590913772583, "logps/rejected": -2.0328547954559326, "loss": 1.6122, "nll_loss": 1.5682051181793213, "rewards/accuracies": 1.0, "rewards/chosen": -0.15357591211795807, "rewards/margins": 0.04970954358577728, "rewards/rejected": -0.20328545570373535, "step": 354 }, { "epoch": 0.9803244735933725, "grad_norm": 0.18310385942459106, "learning_rate": 4.267766952966369e-06, "log_odds_chosen": 0.7377204298973083, "log_odds_ratio": -0.40729206800460815, "logits/chosen": -0.15849949419498444, "logits/rejected": -1.2910916805267334, "logps/chosen": -1.3705458641052246, "logps/rejected": -1.968411922454834, "loss": 1.4587, "nll_loss": 1.4179855585098267, "rewards/accuracies": 1.0, "rewards/chosen": -0.13705459237098694, "rewards/margins": 0.05978662520647049, "rewards/rejected": -0.19684121012687683, "step": 355 }, { "epoch": 0.9830859509837763, "grad_norm": 0.18806028366088867, "learning_rate": 4.26209093687903e-06, "log_odds_chosen": 0.4816596806049347, "log_odds_ratio": -0.48845529556274414, "logits/chosen": -0.2249145656824112, "logits/rejected": -1.4047096967697144, "logps/chosen": -1.5253814458847046, "logps/rejected": -1.9247775077819824, "loss": 1.6102, "nll_loss": 1.5613718032836914, "rewards/accuracies": 1.0, "rewards/chosen": -0.15253815054893494, "rewards/margins": 0.03993961960077286, "rewards/rejected": -0.1924777626991272, "step": 356 }, { "epoch": 0.9858474283741802, "grad_norm": 0.19746609032154083, "learning_rate": 4.2563968125809734e-06, "log_odds_chosen": 0.6187906265258789, "log_odds_ratio": -0.43648919463157654, "logits/chosen": -0.08207077533006668, "logits/rejected": -1.3270372152328491, "logps/chosen": -1.5937108993530273, "logps/rejected": -2.113786458969116, "loss": 1.6592, "nll_loss": 1.615505576133728, "rewards/accuracies": 1.0, "rewards/chosen": -0.15937110781669617, "rewards/margins": 0.05200754106044769, "rewards/rejected": -0.21137863397598267, "step": 357 }, { "epoch": 0.988608905764584, "grad_norm": 0.19708895683288574, "learning_rate": 4.2506846385881375e-06, "log_odds_chosen": 0.6972445845603943, "log_odds_ratio": -0.4114294648170471, "logits/chosen": -0.37635722756385803, "logits/rejected": -1.2826493978500366, "logps/chosen": -1.4048396348953247, "logps/rejected": -1.9649187326431274, "loss": 1.4948, "nll_loss": 1.4536077976226807, "rewards/accuracies": 1.0, "rewards/chosen": -0.14048396050930023, "rewards/margins": 0.056007932871580124, "rewards/rejected": -0.19649189710617065, "step": 358 }, { "epoch": 0.9913703831549879, "grad_norm": 0.19348447024822235, "learning_rate": 4.2449544736019486e-06, "log_odds_chosen": 0.5735771656036377, "log_odds_ratio": -0.4517831802368164, "logits/chosen": -0.15842154622077942, "logits/rejected": -1.160691738128662, "logps/chosen": -1.4741443395614624, "logps/rejected": -1.9411314725875854, "loss": 1.559, "nll_loss": 1.513805866241455, "rewards/accuracies": 1.0, "rewards/chosen": -0.147414430975914, "rewards/margins": 0.04669870436191559, "rewards/rejected": -0.1941131353378296, "step": 359 }, { "epoch": 0.9941318605453918, "grad_norm": 0.18270692229270935, "learning_rate": 4.239206376508716e-06, "log_odds_chosen": 0.7792839407920837, "log_odds_ratio": -0.38477823138237, "logits/chosen": -0.15978127717971802, "logits/rejected": -1.5017080307006836, "logps/chosen": -1.494106411933899, "logps/rejected": -2.147690534591675, "loss": 1.5826, "nll_loss": 1.5441505908966064, "rewards/accuracies": 1.0, "rewards/chosen": -0.14941063523292542, "rewards/margins": 0.06535841524600983, "rewards/rejected": -0.21476906538009644, "step": 360 }, { "epoch": 0.9968933379357956, "grad_norm": 0.18168719112873077, "learning_rate": 4.233440406379032e-06, "log_odds_chosen": 0.5889392495155334, "log_odds_ratio": -0.4472864866256714, "logits/chosen": -0.28017136454582214, "logits/rejected": -1.22226881980896, "logps/chosen": -1.414659857749939, "logps/rejected": -1.88670814037323, "loss": 1.505, "nll_loss": 1.460268497467041, "rewards/accuracies": 1.0, "rewards/chosen": -0.141465961933136, "rewards/margins": 0.047204844653606415, "rewards/rejected": -0.1886708289384842, "step": 361 }, { "epoch": 0.9996548153261995, "grad_norm": 0.19425641000270844, "learning_rate": 4.227656622467162e-06, "log_odds_chosen": 0.6574209928512573, "log_odds_ratio": -0.4261106252670288, "logits/chosen": -0.2706007957458496, "logits/rejected": -1.1785038709640503, "logps/chosen": -1.516379475593567, "logps/rejected": -2.065363883972168, "loss": 1.5932, "nll_loss": 1.550638198852539, "rewards/accuracies": 1.0, "rewards/chosen": -0.1516379415988922, "rewards/margins": 0.054898452013731, "rewards/rejected": -0.2065364122390747, "step": 362 }, { "epoch": 1.0, "grad_norm": 0.46724027395248413, "learning_rate": 4.221855084210433e-06, "log_odds_chosen": 0.3892417550086975, "log_odds_ratio": -0.5173466205596924, "logits/chosen": -0.4694238007068634, "logits/rejected": -1.6226561069488525, "logps/chosen": -1.5516700744628906, "logps/rejected": -1.8701450824737549, "loss": 1.6319, "nll_loss": 1.5801714658737183, "rewards/accuracies": 1.0, "rewards/chosen": -0.15516701340675354, "rewards/margins": 0.03184749186038971, "rewards/rejected": -0.18701450526714325, "step": 363 }, { "epoch": 1.0027614773904039, "grad_norm": 0.19933797419071198, "learning_rate": 4.2160358512286266e-06, "log_odds_chosen": 0.612895667552948, "log_odds_ratio": -0.4389383792877197, "logits/chosen": -0.19393223524093628, "logits/rejected": -1.2420426607131958, "logps/chosen": -1.498990535736084, "logps/rejected": -2.0020782947540283, "loss": 1.5772, "nll_loss": 1.5332666635513306, "rewards/accuracies": 1.0, "rewards/chosen": -0.14989906549453735, "rewards/margins": 0.05030875653028488, "rewards/rejected": -0.20020782947540283, "step": 364 }, { "epoch": 1.0055229547808078, "grad_norm": 0.18562446534633636, "learning_rate": 4.210198983323366e-06, "log_odds_chosen": 0.6299260258674622, "log_odds_ratio": -0.4328676760196686, "logits/chosen": -0.18327558040618896, "logits/rejected": -1.4133291244506836, "logps/chosen": -1.5473169088363647, "logps/rejected": -2.070706605911255, "loss": 1.6099, "nll_loss": 1.5666420459747314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15473169088363647, "rewards/margins": 0.052338968962430954, "rewards/rejected": -0.20707066357135773, "step": 365 }, { "epoch": 1.0082844321712117, "grad_norm": 0.19783955812454224, "learning_rate": 4.204344540477499e-06, "log_odds_chosen": 0.6581704020500183, "log_odds_ratio": -0.42357465624809265, "logits/chosen": -0.21236993372440338, "logits/rejected": -1.2212865352630615, "logps/chosen": -1.5292942523956299, "logps/rejected": -2.0793232917785645, "loss": 1.6086, "nll_loss": 1.5662219524383545, "rewards/accuracies": 1.0, "rewards/chosen": -0.152929425239563, "rewards/margins": 0.055002905428409576, "rewards/rejected": -0.20793233811855316, "step": 366 }, { "epoch": 1.0110459095616156, "grad_norm": 0.18359708786010742, "learning_rate": 4.1984725828544855e-06, "log_odds_chosen": 0.7003411054611206, "log_odds_ratio": -0.40691572427749634, "logits/chosen": -0.18985068798065186, "logits/rejected": -1.434674620628357, "logps/chosen": -1.4541864395141602, "logps/rejected": -2.0293405055999756, "loss": 1.531, "nll_loss": 1.4903078079223633, "rewards/accuracies": 1.0, "rewards/chosen": -0.1454186588525772, "rewards/margins": 0.05751540884375572, "rewards/rejected": -0.20293407142162323, "step": 367 }, { "epoch": 1.0138073869520194, "grad_norm": 0.19799236953258514, "learning_rate": 4.192583170797775e-06, "log_odds_chosen": 0.524876594543457, "log_odds_ratio": -0.4676755368709564, "logits/chosen": -0.2710600793361664, "logits/rejected": -1.0796301364898682, "logps/chosen": -1.492266058921814, "logps/rejected": -1.9222991466522217, "loss": 1.5776, "nll_loss": 1.5308009386062622, "rewards/accuracies": 1.0, "rewards/chosen": -0.1492266058921814, "rewards/margins": 0.04300331324338913, "rewards/rejected": -0.19222994148731232, "step": 368 }, { "epoch": 1.016568864342423, "grad_norm": 0.18693482875823975, "learning_rate": 4.186676364830187e-06, "log_odds_chosen": 0.5753119587898254, "log_odds_ratio": -0.4493744671344757, "logits/chosen": -0.1405227780342102, "logits/rejected": -1.2737160921096802, "logps/chosen": -1.5617775917053223, "logps/rejected": -2.0400755405426025, "loss": 1.6285, "nll_loss": 1.5835627317428589, "rewards/accuracies": 1.0, "rewards/chosen": -0.15617777407169342, "rewards/margins": 0.047829799354076385, "rewards/rejected": -0.2040075659751892, "step": 369 }, { "epoch": 1.019330341732827, "grad_norm": 0.21877062320709229, "learning_rate": 4.1807522256532925e-06, "log_odds_chosen": 0.6495308876037598, "log_odds_ratio": -0.4310915768146515, "logits/chosen": -0.20181499421596527, "logits/rejected": -1.3658957481384277, "logps/chosen": -1.6181979179382324, "logps/rejected": -2.170933485031128, "loss": 1.6861, "nll_loss": 1.6429781913757324, "rewards/accuracies": 1.0, "rewards/chosen": -0.16181980073451996, "rewards/margins": 0.05527355521917343, "rewards/rejected": -0.217093363404274, "step": 370 }, { "epoch": 1.0220918191232309, "grad_norm": 0.18539805710315704, "learning_rate": 4.174810814146789e-06, "log_odds_chosen": 0.6834878921508789, "log_odds_ratio": -0.4169776141643524, "logits/chosen": -0.08832372725009918, "logits/rejected": -1.0964831113815308, "logps/chosen": -1.5441397428512573, "logps/rejected": -2.117156505584717, "loss": 1.6166, "nll_loss": 1.5748991966247559, "rewards/accuracies": 1.0, "rewards/chosen": -0.15441398322582245, "rewards/margins": 0.057301655411720276, "rewards/rejected": -0.21171565353870392, "step": 371 }, { "epoch": 1.0248532965136348, "grad_norm": 0.1914224624633789, "learning_rate": 4.1688521913678706e-06, "log_odds_chosen": 0.7822255492210388, "log_odds_ratio": -0.38675639033317566, "logits/chosen": -0.16042651236057281, "logits/rejected": -1.2148573398590088, "logps/chosen": -1.4989123344421387, "logps/rejected": -2.1540937423706055, "loss": 1.5648, "nll_loss": 1.5261112451553345, "rewards/accuracies": 1.0, "rewards/chosen": -0.14989124238491058, "rewards/margins": 0.06551814079284668, "rewards/rejected": -0.21540936827659607, "step": 372 }, { "epoch": 1.0276147739040387, "grad_norm": 0.1842961460351944, "learning_rate": 4.162876418550606e-06, "log_odds_chosen": 0.5339972376823425, "log_odds_ratio": -0.46532902121543884, "logits/chosen": -0.2638809084892273, "logits/rejected": -1.291076421737671, "logps/chosen": -1.506029486656189, "logps/rejected": -1.944475769996643, "loss": 1.5811, "nll_loss": 1.5345618724822998, "rewards/accuracies": 1.0, "rewards/chosen": -0.15060293674468994, "rewards/margins": 0.043844640254974365, "rewards/rejected": -0.1944475769996643, "step": 373 }, { "epoch": 1.0303762512944425, "grad_norm": 0.19836845993995667, "learning_rate": 4.156883557105308e-06, "log_odds_chosen": 0.5632570385932922, "log_odds_ratio": -0.46161651611328125, "logits/chosen": -0.23987069725990295, "logits/rejected": -1.331959843635559, "logps/chosen": -1.4848788976669312, "logps/rejected": -1.9496028423309326, "loss": 1.5517, "nll_loss": 1.5055512189865112, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484878808259964, "rewards/margins": 0.04647241160273552, "rewards/rejected": -0.19496029615402222, "step": 374 }, { "epoch": 1.0331377286848464, "grad_norm": 0.20077650249004364, "learning_rate": 4.150873668617899e-06, "log_odds_chosen": 0.5416385531425476, "log_odds_ratio": -0.4598068594932556, "logits/chosen": -0.2892989218235016, "logits/rejected": -1.227949619293213, "logps/chosen": -1.5431418418884277, "logps/rejected": -1.9903440475463867, "loss": 1.6064, "nll_loss": 1.5604456663131714, "rewards/accuracies": 1.0, "rewards/chosen": -0.15431420505046844, "rewards/margins": 0.04472021758556366, "rewards/rejected": -0.1990344226360321, "step": 375 }, { "epoch": 1.0358992060752503, "grad_norm": 0.1922207772731781, "learning_rate": 4.144846814849282e-06, "log_odds_chosen": 0.5037797093391418, "log_odds_ratio": -0.4755171537399292, "logits/chosen": -0.10908980667591095, "logits/rejected": -1.1120904684066772, "logps/chosen": -1.5532734394073486, "logps/rejected": -1.9692023992538452, "loss": 1.6357, "nll_loss": 1.5881571769714355, "rewards/accuracies": 1.0, "rewards/chosen": -0.15532734990119934, "rewards/margins": 0.04159289598464966, "rewards/rejected": -0.1969202607870102, "step": 376 }, { "epoch": 1.0386606834656542, "grad_norm": 0.19230647385120392, "learning_rate": 4.138803057734705e-06, "log_odds_chosen": 0.7433000802993774, "log_odds_ratio": -0.3926478624343872, "logits/chosen": -0.16731296479701996, "logits/rejected": -1.154883623123169, "logps/chosen": -1.4572123289108276, "logps/rejected": -2.0705974102020264, "loss": 1.5333, "nll_loss": 1.4939861297607422, "rewards/accuracies": 1.0, "rewards/chosen": -0.14572124183177948, "rewards/margins": 0.06133852154016495, "rewards/rejected": -0.20705974102020264, "step": 377 }, { "epoch": 1.041422160856058, "grad_norm": 0.1747986078262329, "learning_rate": 4.132742459383122e-06, "log_odds_chosen": 0.8132926821708679, "log_odds_ratio": -0.37535253167152405, "logits/chosen": -0.19614934921264648, "logits/rejected": -1.3223804235458374, "logps/chosen": -1.3767224550247192, "logps/rejected": -2.0322914123535156, "loss": 1.4591, "nll_loss": 1.4215335845947266, "rewards/accuracies": 1.0, "rewards/chosen": -0.13767226040363312, "rewards/margins": 0.06555688381195068, "rewards/rejected": -0.2032291442155838, "step": 378 }, { "epoch": 1.0441836382464618, "grad_norm": 0.18519026041030884, "learning_rate": 4.126665082076559e-06, "log_odds_chosen": 0.5479438900947571, "log_odds_ratio": -0.46051234006881714, "logits/chosen": -0.21864745020866394, "logits/rejected": -0.9870752096176147, "logps/chosen": -1.5094008445739746, "logps/rejected": -1.9554524421691895, "loss": 1.5949, "nll_loss": 1.5488578081130981, "rewards/accuracies": 1.0, "rewards/chosen": -0.15094009041786194, "rewards/margins": 0.044605158269405365, "rewards/rejected": -0.1955452561378479, "step": 379 }, { "epoch": 1.0469451156368657, "grad_norm": 0.1884365975856781, "learning_rate": 4.120570988269472e-06, "log_odds_chosen": 0.6451675891876221, "log_odds_ratio": -0.4251843988895416, "logits/chosen": -0.28254202008247375, "logits/rejected": -1.3985097408294678, "logps/chosen": -1.5068163871765137, "logps/rejected": -2.0416600704193115, "loss": 1.5559, "nll_loss": 1.5133352279663086, "rewards/accuracies": 1.0, "rewards/chosen": -0.15068164467811584, "rewards/margins": 0.053484346717596054, "rewards/rejected": -0.2041660100221634, "step": 380 }, { "epoch": 1.0497065930272695, "grad_norm": 0.20361238718032837, "learning_rate": 4.114460240588101e-06, "log_odds_chosen": 0.8017259836196899, "log_odds_ratio": -0.3787066638469696, "logits/chosen": -0.18577685952186584, "logits/rejected": -1.1138646602630615, "logps/chosen": -1.5362298488616943, "logps/rejected": -2.2140719890594482, "loss": 1.6003, "nll_loss": 1.5624499320983887, "rewards/accuracies": 1.0, "rewards/chosen": -0.15362299978733063, "rewards/margins": 0.06778421998023987, "rewards/rejected": -0.2214072048664093, "step": 381 }, { "epoch": 1.0524680704176734, "grad_norm": 0.18294233083724976, "learning_rate": 4.1083329018298356e-06, "log_odds_chosen": 0.7440272569656372, "log_odds_ratio": -0.39435288310050964, "logits/chosen": -0.18778713047504425, "logits/rejected": -1.1963489055633545, "logps/chosen": -1.4442341327667236, "logps/rejected": -2.0572502613067627, "loss": 1.5068, "nll_loss": 1.4673796892166138, "rewards/accuracies": 1.0, "rewards/chosen": -0.14442341029644012, "rewards/margins": 0.06130162253975868, "rewards/rejected": -0.2057250291109085, "step": 382 }, { "epoch": 1.0552295478080773, "grad_norm": 0.19120609760284424, "learning_rate": 4.102189034962561e-06, "log_odds_chosen": 0.6499487161636353, "log_odds_ratio": -0.4234417676925659, "logits/chosen": -0.12440590560436249, "logits/rejected": -1.0700702667236328, "logps/chosen": -1.5787014961242676, "logps/rejected": -2.125746726989746, "loss": 1.6325, "nll_loss": 1.5901223421096802, "rewards/accuracies": 1.0, "rewards/chosen": -0.15787014365196228, "rewards/margins": 0.05470450222492218, "rewards/rejected": -0.21257463097572327, "step": 383 }, { "epoch": 1.0579910251984812, "grad_norm": 0.20611584186553955, "learning_rate": 4.096028703124014e-06, "log_odds_chosen": 0.6581447720527649, "log_odds_ratio": -0.427496999502182, "logits/chosen": -0.243414044380188, "logits/rejected": -1.0197745561599731, "logps/chosen": -1.5716708898544312, "logps/rejected": -2.1266674995422363, "loss": 1.6172, "nll_loss": 1.5744341611862183, "rewards/accuracies": 1.0, "rewards/chosen": -0.15716709196567535, "rewards/margins": 0.055499687790870667, "rewards/rejected": -0.21266677975654602, "step": 384 }, { "epoch": 1.060752502588885, "grad_norm": 0.19265590608119965, "learning_rate": 4.089851969621138e-06, "log_odds_chosen": 0.7516465783119202, "log_odds_ratio": -0.4052088260650635, "logits/chosen": -0.19772981107234955, "logits/rejected": -1.2006863355636597, "logps/chosen": -1.4199714660644531, "logps/rejected": -2.0437204837799072, "loss": 1.5074, "nll_loss": 1.466860055923462, "rewards/accuracies": 1.0, "rewards/chosen": -0.14199714362621307, "rewards/margins": 0.062374889850616455, "rewards/rejected": -0.20437204837799072, "step": 385 }, { "epoch": 1.063513979979289, "grad_norm": 0.21174004673957825, "learning_rate": 4.083658897929425e-06, "log_odds_chosen": 0.6503540277481079, "log_odds_ratio": -0.4217776358127594, "logits/chosen": -0.2460387498140335, "logits/rejected": -1.1329982280731201, "logps/chosen": -1.5417426824569702, "logps/rejected": -2.0824155807495117, "loss": 1.605, "nll_loss": 1.5628407001495361, "rewards/accuracies": 1.0, "rewards/chosen": -0.15417428314685822, "rewards/margins": 0.05406728759407997, "rewards/rejected": -0.2082415521144867, "step": 386 }, { "epoch": 1.0662754573696929, "grad_norm": 0.19454148411750793, "learning_rate": 4.077449551692268e-06, "log_odds_chosen": 0.5684500336647034, "log_odds_ratio": -0.45333224534988403, "logits/chosen": -0.13818205893039703, "logits/rejected": -1.2238332033157349, "logps/chosen": -1.5496622323989868, "logps/rejected": -2.0205225944519043, "loss": 1.6156, "nll_loss": 1.570224404335022, "rewards/accuracies": 1.0, "rewards/chosen": -0.15496623516082764, "rewards/margins": 0.047086022794246674, "rewards/rejected": -0.2020522505044937, "step": 387 }, { "epoch": 1.0690369347600965, "grad_norm": 0.19282881915569305, "learning_rate": 4.071223994720309e-06, "log_odds_chosen": 0.7608233690261841, "log_odds_ratio": -0.3879122734069824, "logits/chosen": -0.26540690660476685, "logits/rejected": -1.2793084383010864, "logps/chosen": -1.4896953105926514, "logps/rejected": -2.125817060470581, "loss": 1.557, "nll_loss": 1.518183946609497, "rewards/accuracies": 1.0, "rewards/chosen": -0.14896953105926514, "rewards/margins": 0.06361216306686401, "rewards/rejected": -0.21258167922496796, "step": 388 }, { "epoch": 1.0717984121505004, "grad_norm": 0.18759317696094513, "learning_rate": 4.064982290990777e-06, "log_odds_chosen": 0.6020365357398987, "log_odds_ratio": -0.44241786003112793, "logits/chosen": -0.2355424463748932, "logits/rejected": -1.0852099657058716, "logps/chosen": -1.4403340816497803, "logps/rejected": -1.9294434785842896, "loss": 1.5147, "nll_loss": 1.4704298973083496, "rewards/accuracies": 1.0, "rewards/chosen": -0.14403343200683594, "rewards/margins": 0.04891093447804451, "rewards/rejected": -0.19294434785842896, "step": 389 }, { "epoch": 1.0745598895409043, "grad_norm": 0.19443197548389435, "learning_rate": 4.058724504646834e-06, "log_odds_chosen": 0.725110650062561, "log_odds_ratio": -0.4022667109966278, "logits/chosen": -0.25374311208724976, "logits/rejected": -1.207698106765747, "logps/chosen": -1.4946649074554443, "logps/rejected": -2.0988166332244873, "loss": 1.5592, "nll_loss": 1.518977403640747, "rewards/accuracies": 1.0, "rewards/chosen": -0.14946648478507996, "rewards/margins": 0.060415178537368774, "rewards/rejected": -0.20988167822360992, "step": 390 }, { "epoch": 1.0773213669313082, "grad_norm": 0.19158363342285156, "learning_rate": 4.0524506999969185e-06, "log_odds_chosen": 0.5608286261558533, "log_odds_ratio": -0.4582407772541046, "logits/chosen": -0.21252039074897766, "logits/rejected": -1.3294453620910645, "logps/chosen": -1.54707670211792, "logps/rejected": -2.0072052478790283, "loss": 1.606, "nll_loss": 1.560204029083252, "rewards/accuracies": 1.0, "rewards/chosen": -0.15470768511295319, "rewards/margins": 0.04601283743977547, "rewards/rejected": -0.20072051882743835, "step": 391 }, { "epoch": 1.080082844321712, "grad_norm": 0.18796156346797943, "learning_rate": 4.046160941514079e-06, "log_odds_chosen": 0.7382424473762512, "log_odds_ratio": -0.3966708481311798, "logits/chosen": -0.1265019178390503, "logits/rejected": -0.962821900844574, "logps/chosen": -1.4349395036697388, "logps/rejected": -2.038311719894409, "loss": 1.4915, "nll_loss": 1.4517992734909058, "rewards/accuracies": 1.0, "rewards/chosen": -0.14349395036697388, "rewards/margins": 0.06033723056316376, "rewards/rejected": -0.20383116602897644, "step": 392 }, { "epoch": 1.082844321712116, "grad_norm": 0.18811720609664917, "learning_rate": 4.039855293835316e-06, "log_odds_chosen": 0.6325368881225586, "log_odds_ratio": -0.4284062683582306, "logits/chosen": -0.2596244513988495, "logits/rejected": -1.2793852090835571, "logps/chosen": -1.4744428396224976, "logps/rejected": -1.99100923538208, "loss": 1.5311, "nll_loss": 1.4882373809814453, "rewards/accuracies": 1.0, "rewards/chosen": -0.14744427800178528, "rewards/margins": 0.05165664479136467, "rewards/rejected": -0.19910094141960144, "step": 393 }, { "epoch": 1.0856057991025199, "grad_norm": 0.17588721215724945, "learning_rate": 4.033533821760917e-06, "log_odds_chosen": 0.8095629215240479, "log_odds_ratio": -0.3785027265548706, "logits/chosen": -0.2389419972896576, "logits/rejected": -1.3366788625717163, "logps/chosen": -1.4380571842193604, "logps/rejected": -2.1090192794799805, "loss": 1.5041, "nll_loss": 1.4662492275238037, "rewards/accuracies": 1.0, "rewards/chosen": -0.14380571246147156, "rewards/margins": 0.06709621846675873, "rewards/rejected": -0.21090193092823029, "step": 394 }, { "epoch": 1.0883672764929238, "grad_norm": 0.18705010414123535, "learning_rate": 4.027196590253786e-06, "log_odds_chosen": 0.5284130573272705, "log_odds_ratio": -0.469849556684494, "logits/chosen": -0.17493271827697754, "logits/rejected": -1.0048437118530273, "logps/chosen": -1.4886696338653564, "logps/rejected": -1.9172314405441284, "loss": 1.5668, "nll_loss": 1.5198521614074707, "rewards/accuracies": 1.0, "rewards/chosen": -0.1488669514656067, "rewards/margins": 0.04285619407892227, "rewards/rejected": -0.19172316789627075, "step": 395 }, { "epoch": 1.0911287538833276, "grad_norm": 0.1907300055027008, "learning_rate": 4.020843664438783e-06, "log_odds_chosen": 0.6281604766845703, "log_odds_ratio": -0.43829861283302307, "logits/chosen": -0.19279785454273224, "logits/rejected": -1.2376521825790405, "logps/chosen": -1.4428083896636963, "logps/rejected": -1.9556207656860352, "loss": 1.5237, "nll_loss": 1.4798742532730103, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442808359861374, "rewards/margins": 0.05128122866153717, "rewards/rejected": -0.19556206464767456, "step": 396 }, { "epoch": 1.0938902312737315, "grad_norm": 0.18668295443058014, "learning_rate": 4.01447510960205e-06, "log_odds_chosen": 0.7544000148773193, "log_odds_ratio": -0.39146313071250916, "logits/chosen": -0.2915095388889313, "logits/rejected": -1.2768161296844482, "logps/chosen": -1.5208818912506104, "logps/rejected": -2.149923801422119, "loss": 1.5759, "nll_loss": 1.5367605686187744, "rewards/accuracies": 1.0, "rewards/chosen": -0.1520881950855255, "rewards/margins": 0.06290420144796371, "rewards/rejected": -0.21499237418174744, "step": 397 }, { "epoch": 1.0966517086641354, "grad_norm": 0.1973486691713333, "learning_rate": 4.008090991190341e-06, "log_odds_chosen": 0.5326504111289978, "log_odds_ratio": -0.46492505073547363, "logits/chosen": -0.09128017723560333, "logits/rejected": -1.2047038078308105, "logps/chosen": -1.5613563060760498, "logps/rejected": -2.0032505989074707, "loss": 1.6159, "nll_loss": 1.569425344467163, "rewards/accuracies": 1.0, "rewards/chosen": -0.15613561868667603, "rewards/margins": 0.0441894605755806, "rewards/rejected": -0.20032507181167603, "step": 398 }, { "epoch": 1.099413186054539, "grad_norm": 0.17714130878448486, "learning_rate": 4.001691374810352e-06, "log_odds_chosen": 0.815579354763031, "log_odds_ratio": -0.3770541846752167, "logits/chosen": -0.1730899065732956, "logits/rejected": -1.437078833580017, "logps/chosen": -1.513384222984314, "logps/rejected": -2.199054718017578, "loss": 1.5737, "nll_loss": 1.5360060930252075, "rewards/accuracies": 1.0, "rewards/chosen": -0.15133842825889587, "rewards/margins": 0.06856706738471985, "rewards/rejected": -0.21990549564361572, "step": 399 }, { "epoch": 1.102174663444943, "grad_norm": 0.16967174410820007, "learning_rate": 3.99527632622804e-06, "log_odds_chosen": 0.694899320602417, "log_odds_ratio": -0.4119797945022583, "logits/chosen": -0.21181365847587585, "logits/rejected": -1.1213148832321167, "logps/chosen": -1.4379510879516602, "logps/rejected": -2.007990837097168, "loss": 1.5198, "nll_loss": 1.478610634803772, "rewards/accuracies": 1.0, "rewards/chosen": -0.14379511773586273, "rewards/margins": 0.05700398236513138, "rewards/rejected": -0.2007990926504135, "step": 400 }, { "epoch": 1.1049361408353469, "grad_norm": 0.1991468220949173, "learning_rate": 3.988845911367957e-06, "log_odds_chosen": 0.665392279624939, "log_odds_ratio": -0.423291951417923, "logits/chosen": -0.19501212239265442, "logits/rejected": -1.1544811725616455, "logps/chosen": -1.5978668928146362, "logps/rejected": -2.159170389175415, "loss": 1.6409, "nll_loss": 1.598615050315857, "rewards/accuracies": 1.0, "rewards/chosen": -0.15978670120239258, "rewards/margins": 0.05613034963607788, "rewards/rejected": -0.21591705083847046, "step": 401 }, { "epoch": 1.1076976182257507, "grad_norm": 0.17201201617717743, "learning_rate": 3.982400196312565e-06, "log_odds_chosen": 0.7169694900512695, "log_odds_ratio": -0.39898163080215454, "logits/chosen": -0.18280558288097382, "logits/rejected": -1.0723071098327637, "logps/chosen": -1.4075336456298828, "logps/rejected": -1.985740303993225, "loss": 1.4851, "nll_loss": 1.4452052116394043, "rewards/accuracies": 1.0, "rewards/chosen": -0.140753373503685, "rewards/margins": 0.05782065540552139, "rewards/rejected": -0.198574036359787, "step": 402 }, { "epoch": 1.1104590956161546, "grad_norm": 0.17941723763942719, "learning_rate": 3.975939247301558e-06, "log_odds_chosen": 0.6592923998832703, "log_odds_ratio": -0.420391321182251, "logits/chosen": -0.22413836419582367, "logits/rejected": -1.327505111694336, "logps/chosen": -1.541399598121643, "logps/rejected": -2.0909154415130615, "loss": 1.6127, "nll_loss": 1.570648193359375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541399508714676, "rewards/margins": 0.05495157092809677, "rewards/rejected": -0.20909152925014496, "step": 403 }, { "epoch": 1.1132205730065585, "grad_norm": 0.17807722091674805, "learning_rate": 3.969463130731183e-06, "log_odds_chosen": 0.6306886076927185, "log_odds_ratio": -0.43381085991859436, "logits/chosen": -0.14202114939689636, "logits/rejected": -1.2107291221618652, "logps/chosen": -1.4741935729980469, "logps/rejected": -1.9913864135742188, "loss": 1.5431, "nll_loss": 1.4997388124465942, "rewards/accuracies": 1.0, "rewards/chosen": -0.14741936326026917, "rewards/margins": 0.051719292998313904, "rewards/rejected": -0.19913865625858307, "step": 404 }, { "epoch": 1.1159820503969624, "grad_norm": 0.1876784712076187, "learning_rate": 3.9629719131535595e-06, "log_odds_chosen": 0.5967572331428528, "log_odds_ratio": -0.4506050944328308, "logits/chosen": -0.24889226257801056, "logits/rejected": -1.0915982723236084, "logps/chosen": -1.4914859533309937, "logps/rejected": -1.9845982789993286, "loss": 1.5603, "nll_loss": 1.5152877569198608, "rewards/accuracies": 1.0, "rewards/chosen": -0.1491485983133316, "rewards/margins": 0.049311213195323944, "rewards/rejected": -0.19845981895923615, "step": 405 }, { "epoch": 1.1187435277873663, "grad_norm": 0.16931727528572083, "learning_rate": 3.9564656612759904e-06, "log_odds_chosen": 0.6932112574577332, "log_odds_ratio": -0.4113208055496216, "logits/chosen": -0.1822027862071991, "logits/rejected": -1.3579068183898926, "logps/chosen": -1.496368646621704, "logps/rejected": -2.0719945430755615, "loss": 1.563, "nll_loss": 1.521822214126587, "rewards/accuracies": 1.0, "rewards/chosen": -0.14963684976100922, "rewards/margins": 0.05756259709596634, "rewards/rejected": -0.20719945430755615, "step": 406 }, { "epoch": 1.1215050051777702, "grad_norm": 0.18602602183818817, "learning_rate": 3.94994444196028e-06, "log_odds_chosen": 0.7077181935310364, "log_odds_ratio": -0.4127015471458435, "logits/chosen": -0.15379171073436737, "logits/rejected": -1.1528825759887695, "logps/chosen": -1.5301098823547363, "logps/rejected": -2.1211650371551514, "loss": 1.5721, "nll_loss": 1.5308395624160767, "rewards/accuracies": 1.0, "rewards/chosen": -0.15301097929477692, "rewards/margins": 0.0591055229306221, "rewards/rejected": -0.21211649477481842, "step": 407 }, { "epoch": 1.124266482568174, "grad_norm": 0.17830830812454224, "learning_rate": 3.943408322222049e-06, "log_odds_chosen": 0.7005403637886047, "log_odds_ratio": -0.41374269127845764, "logits/chosen": -0.1289825439453125, "logits/rejected": -0.9252943396568298, "logps/chosen": -1.4544087648391724, "logps/rejected": -2.0350112915039062, "loss": 1.5331, "nll_loss": 1.4917351007461548, "rewards/accuracies": 1.0, "rewards/chosen": -0.14544087648391724, "rewards/margins": 0.05806024372577667, "rewards/rejected": -0.2035011202096939, "step": 408 }, { "epoch": 1.127027959958578, "grad_norm": 0.2026163637638092, "learning_rate": 3.936857369230037e-06, "log_odds_chosen": 0.5416731834411621, "log_odds_ratio": -0.4673037528991699, "logits/chosen": -0.12443944066762924, "logits/rejected": -1.1075230836868286, "logps/chosen": -1.5275638103485107, "logps/rejected": -1.9750971794128418, "loss": 1.5827, "nll_loss": 1.5359312295913696, "rewards/accuracies": 1.0, "rewards/chosen": -0.15275639295578003, "rewards/margins": 0.04475332424044609, "rewards/rejected": -0.19750970602035522, "step": 409 }, { "epoch": 1.1297894373489816, "grad_norm": 0.19184917211532593, "learning_rate": 3.930291650305424e-06, "log_odds_chosen": 0.741117000579834, "log_odds_ratio": -0.39208146929740906, "logits/chosen": -0.14100222289562225, "logits/rejected": -0.9723483920097351, "logps/chosen": -1.4884588718414307, "logps/rejected": -2.1040899753570557, "loss": 1.5337, "nll_loss": 1.494455099105835, "rewards/accuracies": 1.0, "rewards/chosen": -0.14884589612483978, "rewards/margins": 0.061563119292259216, "rewards/rejected": -0.2104090005159378, "step": 410 }, { "epoch": 1.1325509147393855, "grad_norm": 0.18398518860340118, "learning_rate": 3.92371123292113e-06, "log_odds_chosen": 0.665544867515564, "log_odds_ratio": -0.41778889298439026, "logits/chosen": -0.13632512092590332, "logits/rejected": -1.2031248807907104, "logps/chosen": -1.5206879377365112, "logps/rejected": -2.072800636291504, "loss": 1.5771, "nll_loss": 1.5352935791015625, "rewards/accuracies": 1.0, "rewards/chosen": -0.15206880867481232, "rewards/margins": 0.05521126464009285, "rewards/rejected": -0.20728005468845367, "step": 411 }, { "epoch": 1.1353123921297894, "grad_norm": 0.18211683630943298, "learning_rate": 3.917116184701125e-06, "log_odds_chosen": 0.7499443292617798, "log_odds_ratio": -0.39630863070487976, "logits/chosen": -0.17687299847602844, "logits/rejected": -1.3003441095352173, "logps/chosen": -1.4918804168701172, "logps/rejected": -2.1197023391723633, "loss": 1.5502, "nll_loss": 1.5105260610580444, "rewards/accuracies": 1.0, "rewards/chosen": -0.14918804168701172, "rewards/margins": 0.06278219819068909, "rewards/rejected": -0.2119702398777008, "step": 412 }, { "epoch": 1.1380738695201933, "grad_norm": 0.20253390073776245, "learning_rate": 3.910506573419734e-06, "log_odds_chosen": 0.9193601012229919, "log_odds_ratio": -0.34398138523101807, "logits/chosen": -0.14900241792201996, "logits/rejected": -1.0627330541610718, "logps/chosen": -1.4866719245910645, "logps/rejected": -2.259812593460083, "loss": 1.5385, "nll_loss": 1.5040783882141113, "rewards/accuracies": 1.0, "rewards/chosen": -0.14866718649864197, "rewards/margins": 0.07731407880783081, "rewards/rejected": -0.22598126530647278, "step": 413 }, { "epoch": 1.1408353469105972, "grad_norm": 0.18668848276138306, "learning_rate": 3.903882467000938e-06, "log_odds_chosen": 0.8500868678092957, "log_odds_ratio": -0.3602939248085022, "logits/chosen": -0.13838572800159454, "logits/rejected": -1.1434731483459473, "logps/chosen": -1.5281840562820435, "logps/rejected": -2.244469404220581, "loss": 1.5785, "nll_loss": 1.542461633682251, "rewards/accuracies": 1.0, "rewards/chosen": -0.15281839668750763, "rewards/margins": 0.07162855565547943, "rewards/rejected": -0.22444695234298706, "step": 414 }, { "epoch": 1.143596824301001, "grad_norm": 0.18311648070812225, "learning_rate": 3.897243933517679e-06, "log_odds_chosen": 0.7283087968826294, "log_odds_ratio": -0.40107017755508423, "logits/chosen": -0.2569331228733063, "logits/rejected": -1.2123123407363892, "logps/chosen": -1.4449797868728638, "logps/rejected": -2.044313669204712, "loss": 1.5036, "nll_loss": 1.463518738746643, "rewards/accuracies": 1.0, "rewards/chosen": -0.14449797570705414, "rewards/margins": 0.05993340164422989, "rewards/rejected": -0.20443139970302582, "step": 415 }, { "epoch": 1.146358301691405, "grad_norm": 0.18268708884716034, "learning_rate": 3.890591041191162e-06, "log_odds_chosen": 0.8010820150375366, "log_odds_ratio": -0.38091176748275757, "logits/chosen": -0.15429654717445374, "logits/rejected": -1.0080270767211914, "logps/chosen": -1.4056251049041748, "logps/rejected": -2.0608270168304443, "loss": 1.4656, "nll_loss": 1.4274916648864746, "rewards/accuracies": 1.0, "rewards/chosen": -0.1405625194311142, "rewards/margins": 0.06552018225193024, "rewards/rejected": -0.20608270168304443, "step": 416 }, { "epoch": 1.1491197790818088, "grad_norm": 0.198701411485672, "learning_rate": 3.883923858390149e-06, "log_odds_chosen": 0.818614661693573, "log_odds_ratio": -0.36802011728286743, "logits/chosen": -0.189392551779747, "logits/rejected": -1.2750192880630493, "logps/chosen": -1.5129953622817993, "logps/rejected": -2.201220750808716, "loss": 1.5794, "nll_loss": 1.5426466464996338, "rewards/accuracies": 1.0, "rewards/chosen": -0.15129955112934113, "rewards/margins": 0.06882252544164658, "rewards/rejected": -0.2201220840215683, "step": 417 }, { "epoch": 1.1518812564722127, "grad_norm": 0.17471423745155334, "learning_rate": 3.8772424536302565e-06, "log_odds_chosen": 0.6130560040473938, "log_odds_ratio": -0.4377739131450653, "logits/chosen": -0.19274210929870605, "logits/rejected": -1.2090108394622803, "logps/chosen": -1.4925827980041504, "logps/rejected": -1.9969313144683838, "loss": 1.5448, "nll_loss": 1.50102698802948, "rewards/accuracies": 1.0, "rewards/chosen": -0.14925827085971832, "rewards/margins": 0.05043485015630722, "rewards/rejected": -0.19969312846660614, "step": 418 }, { "epoch": 1.1546427338626164, "grad_norm": 0.19965821504592896, "learning_rate": 3.870546895573258e-06, "log_odds_chosen": 0.7444138526916504, "log_odds_ratio": -0.39596325159072876, "logits/chosen": -0.24373914301395416, "logits/rejected": -0.921118438243866, "logps/chosen": -1.4715092182159424, "logps/rejected": -2.086350202560425, "loss": 1.5252, "nll_loss": 1.4855574369430542, "rewards/accuracies": 1.0, "rewards/chosen": -0.147150918841362, "rewards/margins": 0.06148412078619003, "rewards/rejected": -0.20863503217697144, "step": 419 }, { "epoch": 1.1574042112530203, "grad_norm": 0.18526706099510193, "learning_rate": 3.863837253026372e-06, "log_odds_chosen": 0.8123873472213745, "log_odds_ratio": -0.37982794642448425, "logits/chosen": -0.23291738331317902, "logits/rejected": -1.1363976001739502, "logps/chosen": -1.4304343461990356, "logps/rejected": -2.1056971549987793, "loss": 1.4989, "nll_loss": 1.460911512374878, "rewards/accuracies": 1.0, "rewards/chosen": -0.1430434137582779, "rewards/margins": 0.06752629578113556, "rewards/rejected": -0.21056970953941345, "step": 420 }, { "epoch": 1.1601656886434242, "grad_norm": 0.18150660395622253, "learning_rate": 3.857113594941556e-06, "log_odds_chosen": 0.6511182188987732, "log_odds_ratio": -0.4259107708930969, "logits/chosen": -0.19661128520965576, "logits/rejected": -1.3280483484268188, "logps/chosen": -1.527599811553955, "logps/rejected": -2.070669174194336, "loss": 1.5857, "nll_loss": 1.5431334972381592, "rewards/accuracies": 1.0, "rewards/chosen": -0.15275999903678894, "rewards/margins": 0.054306939244270325, "rewards/rejected": -0.20706692337989807, "step": 421 }, { "epoch": 1.162927166033828, "grad_norm": 0.18328295648097992, "learning_rate": 3.8503759904148005e-06, "log_odds_chosen": 0.8013736605644226, "log_odds_ratio": -0.3758004903793335, "logits/chosen": -0.16402505338191986, "logits/rejected": -1.2019381523132324, "logps/chosen": -1.4596202373504639, "logps/rejected": -2.1256799697875977, "loss": 1.5176, "nll_loss": 1.479976773262024, "rewards/accuracies": 1.0, "rewards/chosen": -0.14596202969551086, "rewards/margins": 0.06660597026348114, "rewards/rejected": -0.2125680148601532, "step": 422 }, { "epoch": 1.165688643424232, "grad_norm": 0.18774062395095825, "learning_rate": 3.843624508685416e-06, "log_odds_chosen": 0.6741689443588257, "log_odds_ratio": -0.41605666279792786, "logits/chosen": -0.20964159071445465, "logits/rejected": -1.2953227758407593, "logps/chosen": -1.5373965501785278, "logps/rejected": -2.098203659057617, "loss": 1.6025, "nll_loss": 1.5609112977981567, "rewards/accuracies": 1.0, "rewards/chosen": -0.15373964607715607, "rewards/margins": 0.05608072504401207, "rewards/rejected": -0.20982035994529724, "step": 423 }, { "epoch": 1.1684501208146358, "grad_norm": 0.1771015226840973, "learning_rate": 3.8368592191353246e-06, "log_odds_chosen": 0.7563647031784058, "log_odds_ratio": -0.3966205418109894, "logits/chosen": -0.19912122189998627, "logits/rejected": -1.2726441621780396, "logps/chosen": -1.4663722515106201, "logps/rejected": -2.0917129516601562, "loss": 1.5165, "nll_loss": 1.476802945137024, "rewards/accuracies": 1.0, "rewards/chosen": -0.1466372162103653, "rewards/margins": 0.06253407895565033, "rewards/rejected": -0.20917129516601562, "step": 424 }, { "epoch": 1.1712115982050397, "grad_norm": 0.17853626608848572, "learning_rate": 3.830080191288342e-06, "log_odds_chosen": 0.672751784324646, "log_odds_ratio": -0.41323938965797424, "logits/chosen": -0.1510731726884842, "logits/rejected": -1.1135554313659668, "logps/chosen": -1.4936147928237915, "logps/rejected": -2.0494277477264404, "loss": 1.5598, "nll_loss": 1.518498420715332, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493614763021469, "rewards/margins": 0.05558129400014877, "rewards/rejected": -0.20494277775287628, "step": 425 }, { "epoch": 1.1739730755954436, "grad_norm": 0.19094951450824738, "learning_rate": 3.823287494809469e-06, "log_odds_chosen": 0.7997311949729919, "log_odds_ratio": -0.3781062364578247, "logits/chosen": -0.25045046210289, "logits/rejected": -1.166719913482666, "logps/chosen": -1.472108244895935, "logps/rejected": -2.1340489387512207, "loss": 1.5392, "nll_loss": 1.501369595527649, "rewards/accuracies": 1.0, "rewards/chosen": -0.14721082150936127, "rewards/margins": 0.0661940723657608, "rewards/rejected": -0.21340489387512207, "step": 426 }, { "epoch": 1.1767345529858475, "grad_norm": 0.1843455731868744, "learning_rate": 3.816481199504171e-06, "log_odds_chosen": 0.6943097114562988, "log_odds_ratio": -0.4069713354110718, "logits/chosen": -0.2397070825099945, "logits/rejected": -1.1366225481033325, "logps/chosen": -1.5034641027450562, "logps/rejected": -2.079597234725952, "loss": 1.5747, "nll_loss": 1.5340169668197632, "rewards/accuracies": 1.0, "rewards/chosen": -0.15034641325473785, "rewards/margins": 0.057613298296928406, "rewards/rejected": -0.20795971155166626, "step": 427 }, { "epoch": 1.1794960303762512, "grad_norm": 0.17388851940631866, "learning_rate": 3.8096613753176635e-06, "log_odds_chosen": 0.6214280128479004, "log_odds_ratio": -0.436628133058548, "logits/chosen": -0.1547197550535202, "logits/rejected": -1.0357825756072998, "logps/chosen": -1.3858633041381836, "logps/rejected": -1.8819453716278076, "loss": 1.4633, "nll_loss": 1.4196813106536865, "rewards/accuracies": 1.0, "rewards/chosen": -0.13858634233474731, "rewards/margins": 0.04960820823907852, "rewards/rejected": -0.18819454312324524, "step": 428 }, { "epoch": 1.1822575077666553, "grad_norm": 0.1828448623418808, "learning_rate": 3.8028280923341927e-06, "log_odds_chosen": 0.7184998393058777, "log_odds_ratio": -0.40824440121650696, "logits/chosen": -0.2581023573875427, "logits/rejected": -1.0807780027389526, "logps/chosen": -1.4268940687179565, "logps/rejected": -2.0194613933563232, "loss": 1.4935, "nll_loss": 1.4526987075805664, "rewards/accuracies": 1.0, "rewards/chosen": -0.14268942177295685, "rewards/margins": 0.05925672873854637, "rewards/rejected": -0.20194613933563232, "step": 429 }, { "epoch": 1.185018985157059, "grad_norm": 0.2609389126300812, "learning_rate": 3.7959814207763134e-06, "log_odds_chosen": 0.8415360450744629, "log_odds_ratio": -0.366547167301178, "logits/chosen": -0.2016637921333313, "logits/rejected": -1.2747056484222412, "logps/chosen": -1.539664387702942, "logps/rejected": -2.2510623931884766, "loss": 1.5835, "nll_loss": 1.5468266010284424, "rewards/accuracies": 1.0, "rewards/chosen": -0.15396642684936523, "rewards/margins": 0.07113979011774063, "rewards/rejected": -0.22510623931884766, "step": 430 }, { "epoch": 1.1877804625474628, "grad_norm": 0.18396784365177155, "learning_rate": 3.789121431004168e-06, "log_odds_chosen": 0.6930549144744873, "log_odds_ratio": -0.4097321629524231, "logits/chosen": -0.24285678565502167, "logits/rejected": -1.232874870300293, "logps/chosen": -1.5071865320205688, "logps/rejected": -2.07889986038208, "loss": 1.5539, "nll_loss": 1.5129239559173584, "rewards/accuracies": 1.0, "rewards/chosen": -0.15071865916252136, "rewards/margins": 0.05717131495475769, "rewards/rejected": -0.20788997411727905, "step": 431 }, { "epoch": 1.1905419399378667, "grad_norm": 0.1961180865764618, "learning_rate": 3.782248193514766e-06, "log_odds_chosen": 0.6803969144821167, "log_odds_ratio": -0.41614609956741333, "logits/chosen": -0.1777564138174057, "logits/rejected": -1.077903151512146, "logps/chosen": -1.4115687608718872, "logps/rejected": -1.9654287099838257, "loss": 1.4906, "nll_loss": 1.4489516019821167, "rewards/accuracies": 1.0, "rewards/chosen": -0.14115691184997559, "rewards/margins": 0.05538597330451012, "rewards/rejected": -0.1965428739786148, "step": 432 }, { "epoch": 1.1933034173282706, "grad_norm": 0.18955354392528534, "learning_rate": 3.775361778941257e-06, "log_odds_chosen": 0.7743215560913086, "log_odds_ratio": -0.3833809196949005, "logits/chosen": -0.19383692741394043, "logits/rejected": -1.1276659965515137, "logps/chosen": -1.4961435794830322, "logps/rejected": -2.144127607345581, "loss": 1.5554, "nll_loss": 1.5170167684555054, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496143639087677, "rewards/margins": 0.06479839235544205, "rewards/rejected": -0.21441277861595154, "step": 433 }, { "epoch": 1.1960648947186745, "grad_norm": 0.19281843304634094, "learning_rate": 3.7684622580522057e-06, "log_odds_chosen": 0.45453017950057983, "log_odds_ratio": -0.49613097310066223, "logits/chosen": -0.1650485396385193, "logits/rejected": -1.2051373720169067, "logps/chosen": -1.447697401046753, "logps/rejected": -1.8144056797027588, "loss": 1.5077, "nll_loss": 1.4580379724502563, "rewards/accuracies": 1.0, "rewards/chosen": -0.14476974308490753, "rewards/margins": 0.036670833826065063, "rewards/rejected": -0.1814405769109726, "step": 434 }, { "epoch": 1.1988263721090784, "grad_norm": 0.17417070269584656, "learning_rate": 3.761549701750865e-06, "log_odds_chosen": 0.802248477935791, "log_odds_ratio": -0.38353192806243896, "logits/chosen": -0.18037042021751404, "logits/rejected": -1.3770548105239868, "logps/chosen": -1.4576908349990845, "logps/rejected": -2.126838445663452, "loss": 1.5085, "nll_loss": 1.4701937437057495, "rewards/accuracies": 1.0, "rewards/chosen": -0.14576907455921173, "rewards/margins": 0.06691478192806244, "rewards/rejected": -0.21268387138843536, "step": 435 }, { "epoch": 1.2015878494994823, "grad_norm": 0.17217062413692474, "learning_rate": 3.7546241810744444e-06, "log_odds_chosen": 0.6275637149810791, "log_odds_ratio": -0.4340207278728485, "logits/chosen": -0.18775464594364166, "logits/rejected": -0.9219359159469604, "logps/chosen": -1.4593117237091064, "logps/rejected": -1.9751626253128052, "loss": 1.5097, "nll_loss": 1.4663246870040894, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459311693906784, "rewards/margins": 0.05158507823944092, "rewards/rejected": -0.19751626253128052, "step": 436 }, { "epoch": 1.2043493268898862, "grad_norm": 0.19680339097976685, "learning_rate": 3.747685767193385e-06, "log_odds_chosen": 0.6149947047233582, "log_odds_ratio": -0.4397943317890167, "logits/chosen": -0.24492347240447998, "logits/rejected": -1.0724200010299683, "logps/chosen": -1.5251544713974, "logps/rejected": -2.0360567569732666, "loss": 1.5969, "nll_loss": 1.5528908967971802, "rewards/accuracies": 1.0, "rewards/chosen": -0.1525154560804367, "rewards/margins": 0.05109023675322533, "rewards/rejected": -0.20360569655895233, "step": 437 }, { "epoch": 1.20711080428029, "grad_norm": 0.2228655368089676, "learning_rate": 3.740734531410626e-06, "log_odds_chosen": 0.7606922388076782, "log_odds_ratio": -0.39337587356567383, "logits/chosen": -0.2465159147977829, "logits/rejected": -1.2846262454986572, "logps/chosen": -1.448561191558838, "logps/rejected": -2.069669008255005, "loss": 1.5204, "nll_loss": 1.4810588359832764, "rewards/accuracies": 1.0, "rewards/chosen": -0.14485612511634827, "rewards/margins": 0.062110789120197296, "rewards/rejected": -0.20696690678596497, "step": 438 }, { "epoch": 1.2098722816706937, "grad_norm": 0.18277090787887573, "learning_rate": 3.7337705451608676e-06, "log_odds_chosen": 0.9290235638618469, "log_odds_ratio": -0.3463304042816162, "logits/chosen": -0.3210224509239197, "logits/rejected": -1.3052295446395874, "logps/chosen": -1.3797541856765747, "logps/rejected": -2.1371095180511475, "loss": 1.4436, "nll_loss": 1.4089810848236084, "rewards/accuracies": 1.0, "rewards/chosen": -0.13797542452812195, "rewards/margins": 0.07573550194501877, "rewards/rejected": -0.2137109339237213, "step": 439 }, { "epoch": 1.2126337590610976, "grad_norm": 0.19237011671066284, "learning_rate": 3.7267938800098454e-06, "log_odds_chosen": 0.9403778910636902, "log_odds_ratio": -0.3412759602069855, "logits/chosen": -0.4038398861885071, "logits/rejected": -1.273897409439087, "logps/chosen": -1.3479474782943726, "logps/rejected": -2.1216483116149902, "loss": 1.4158, "nll_loss": 1.3816239833831787, "rewards/accuracies": 1.0, "rewards/chosen": -0.13479475677013397, "rewards/margins": 0.0773700550198555, "rewards/rejected": -0.21216480433940887, "step": 440 }, { "epoch": 1.2153952364515015, "grad_norm": 0.2002696543931961, "learning_rate": 3.7198046076535865e-06, "log_odds_chosen": 0.7807128429412842, "log_odds_ratio": -0.3827586770057678, "logits/chosen": -0.2198580801486969, "logits/rejected": -1.0754787921905518, "logps/chosen": -1.4956470727920532, "logps/rejected": -2.1461679935455322, "loss": 1.5459, "nll_loss": 1.507658839225769, "rewards/accuracies": 1.0, "rewards/chosen": -0.1495647132396698, "rewards/margins": 0.0650520920753479, "rewards/rejected": -0.2146168202161789, "step": 441 }, { "epoch": 1.2181567138419054, "grad_norm": 0.18718883395195007, "learning_rate": 3.71280279991768e-06, "log_odds_chosen": 0.8355327248573303, "log_odds_ratio": -0.36628708243370056, "logits/chosen": -0.27813613414764404, "logits/rejected": -1.077792763710022, "logps/chosen": -1.4404205083847046, "logps/rejected": -2.134765625, "loss": 1.5121, "nll_loss": 1.4754693508148193, "rewards/accuracies": 1.0, "rewards/chosen": -0.1440420299768448, "rewards/margins": 0.0694345235824585, "rewards/rejected": -0.21347656846046448, "step": 442 }, { "epoch": 1.2209181912323093, "grad_norm": 0.17261095345020294, "learning_rate": 3.705788528756533e-06, "log_odds_chosen": 0.8777885437011719, "log_odds_ratio": -0.3522123098373413, "logits/chosen": -0.30880072712898254, "logits/rejected": -1.328255534172058, "logps/chosen": -1.4321343898773193, "logps/rejected": -2.1607189178466797, "loss": 1.4899, "nll_loss": 1.4546407461166382, "rewards/accuracies": 1.0, "rewards/chosen": -0.1432134509086609, "rewards/margins": 0.07285845279693604, "rewards/rejected": -0.21607188880443573, "step": 443 }, { "epoch": 1.2236796686227132, "grad_norm": 0.2517964541912079, "learning_rate": 3.698761866252635e-06, "log_odds_chosen": 0.6931225061416626, "log_odds_ratio": -0.42155617475509644, "logits/chosen": -0.262115478515625, "logits/rejected": -1.257841944694519, "logps/chosen": -1.5295952558517456, "logps/rejected": -2.1139965057373047, "loss": 1.5875, "nll_loss": 1.5453757047653198, "rewards/accuracies": 1.0, "rewards/chosen": -0.15295952558517456, "rewards/margins": 0.05844012275338173, "rewards/rejected": -0.211399644613266, "step": 444 }, { "epoch": 1.226441146013117, "grad_norm": 0.19526347517967224, "learning_rate": 3.691722884615814e-06, "log_odds_chosen": 0.6913182735443115, "log_odds_ratio": -0.41308411955833435, "logits/chosen": -0.17629948258399963, "logits/rejected": -0.7438918352127075, "logps/chosen": -1.4464210271835327, "logps/rejected": -2.0083394050598145, "loss": 1.5191, "nll_loss": 1.4777559041976929, "rewards/accuracies": 1.0, "rewards/chosen": -0.14464209973812103, "rewards/margins": 0.05619185417890549, "rewards/rejected": -0.20083396136760712, "step": 445 }, { "epoch": 1.229202623403521, "grad_norm": 0.1902356892824173, "learning_rate": 3.684671656182497e-06, "log_odds_chosen": 0.8285303115844727, "log_odds_ratio": -0.36952173709869385, "logits/chosen": -0.2879321277141571, "logits/rejected": -1.2174959182739258, "logps/chosen": -1.4798123836517334, "logps/rejected": -2.1687145233154297, "loss": 1.5442, "nll_loss": 1.507254719734192, "rewards/accuracies": 1.0, "rewards/chosen": -0.14798125624656677, "rewards/margins": 0.06889019906520844, "rewards/rejected": -0.216871440410614, "step": 446 }, { "epoch": 1.2319641007939248, "grad_norm": 0.19624045491218567, "learning_rate": 3.6776082534149664e-06, "log_odds_chosen": 0.7684917449951172, "log_odds_ratio": -0.3862813413143158, "logits/chosen": -0.272920161485672, "logits/rejected": -1.2850197553634644, "logps/chosen": -1.5005940198898315, "logps/rejected": -2.1409354209899902, "loss": 1.5377, "nll_loss": 1.4990581274032593, "rewards/accuracies": 1.0, "rewards/chosen": -0.15005940198898315, "rewards/margins": 0.06403413414955139, "rewards/rejected": -0.21409353613853455, "step": 447 }, { "epoch": 1.2347255781843287, "grad_norm": 0.1784961074590683, "learning_rate": 3.670532748900615e-06, "log_odds_chosen": 0.8203001618385315, "log_odds_ratio": -0.37937578558921814, "logits/chosen": -0.2516147494316101, "logits/rejected": -1.1510305404663086, "logps/chosen": -1.4173564910888672, "logps/rejected": -2.08891224861145, "loss": 1.4755, "nll_loss": 1.4375797510147095, "rewards/accuracies": 1.0, "rewards/chosen": -0.14173565804958344, "rewards/margins": 0.06715556979179382, "rewards/rejected": -0.20889122784137726, "step": 448 }, { "epoch": 1.2374870555747326, "grad_norm": 0.1783873736858368, "learning_rate": 3.663445215351198e-06, "log_odds_chosen": 0.9026142954826355, "log_odds_ratio": -0.35365813970565796, "logits/chosen": -0.3238006830215454, "logits/rejected": -1.0488125085830688, "logps/chosen": -1.4057360887527466, "logps/rejected": -2.1597814559936523, "loss": 1.4666, "nll_loss": 1.4311884641647339, "rewards/accuracies": 1.0, "rewards/chosen": -0.14057360589504242, "rewards/margins": 0.07540452480316162, "rewards/rejected": -0.21597814559936523, "step": 449 }, { "epoch": 1.2402485329651363, "grad_norm": 0.1859315186738968, "learning_rate": 3.656345725602089e-06, "log_odds_chosen": 0.9581824541091919, "log_odds_ratio": -0.3327292799949646, "logits/chosen": -0.22859053313732147, "logits/rejected": -1.3163652420043945, "logps/chosen": -1.5037806034088135, "logps/rejected": -2.316061019897461, "loss": 1.5517, "nll_loss": 1.5183771848678589, "rewards/accuracies": 1.0, "rewards/chosen": -0.1503780633211136, "rewards/margins": 0.08122803270816803, "rewards/rejected": -0.23160609602928162, "step": 450 }, { "epoch": 1.2430100103555402, "grad_norm": 0.19541142880916595, "learning_rate": 3.6492343526115292e-06, "log_odds_chosen": 0.7478545308113098, "log_odds_ratio": -0.3891213536262512, "logits/chosen": -0.1598832607269287, "logits/rejected": -1.259619116783142, "logps/chosen": -1.5255135297775269, "logps/rejected": -2.1506118774414062, "loss": 1.5591, "nll_loss": 1.5201823711395264, "rewards/accuracies": 1.0, "rewards/chosen": -0.15255135297775269, "rewards/margins": 0.06250984221696854, "rewards/rejected": -0.21506118774414062, "step": 451 }, { "epoch": 1.245771487745944, "grad_norm": 0.18374916911125183, "learning_rate": 3.642111169459879e-06, "log_odds_chosen": 0.5907194018363953, "log_odds_ratio": -0.4446646273136139, "logits/chosen": -0.2301802933216095, "logits/rejected": -1.1353992223739624, "logps/chosen": -1.4880855083465576, "logps/rejected": -1.973372459411621, "loss": 1.5312, "nll_loss": 1.4867122173309326, "rewards/accuracies": 1.0, "rewards/chosen": -0.148808553814888, "rewards/margins": 0.04852868244051933, "rewards/rejected": -0.19733723998069763, "step": 452 }, { "epoch": 1.248532965136348, "grad_norm": 0.17803093791007996, "learning_rate": 3.634976249348867e-06, "log_odds_chosen": 0.7315319776535034, "log_odds_ratio": -0.4015371799468994, "logits/chosen": -0.2719256281852722, "logits/rejected": -1.3226613998413086, "logps/chosen": -1.4791927337646484, "logps/rejected": -2.084644317626953, "loss": 1.54, "nll_loss": 1.4997994899749756, "rewards/accuracies": 1.0, "rewards/chosen": -0.14791928231716156, "rewards/margins": 0.06054516136646271, "rewards/rejected": -0.20846444368362427, "step": 453 }, { "epoch": 1.2512944425267518, "grad_norm": 0.2094297856092453, "learning_rate": 3.6278296656008366e-06, "log_odds_chosen": 0.9680303931236267, "log_odds_ratio": -0.3453172743320465, "logits/chosen": -0.2988622188568115, "logits/rejected": -1.1323243379592896, "logps/chosen": -1.350219964981079, "logps/rejected": -2.144009590148926, "loss": 1.4208, "nll_loss": 1.3862643241882324, "rewards/accuracies": 1.0, "rewards/chosen": -0.13502199947834015, "rewards/margins": 0.07937898486852646, "rewards/rejected": -0.21440096199512482, "step": 454 }, { "epoch": 1.2540559199171557, "grad_norm": 0.16929712891578674, "learning_rate": 3.6206714916579925e-06, "log_odds_chosen": 0.8621344566345215, "log_odds_ratio": -0.36102256178855896, "logits/chosen": -0.259186714887619, "logits/rejected": -1.2395236492156982, "logps/chosen": -1.3943541049957275, "logps/rejected": -2.1026394367218018, "loss": 1.4532, "nll_loss": 1.4170664548873901, "rewards/accuracies": 1.0, "rewards/chosen": -0.13943539559841156, "rewards/margins": 0.07082855701446533, "rewards/rejected": -0.2102639526128769, "step": 455 }, { "epoch": 1.2568173973075596, "grad_norm": 0.18482358753681183, "learning_rate": 3.613501801081648e-06, "log_odds_chosen": 0.6896051168441772, "log_odds_ratio": -0.4120646119117737, "logits/chosen": -0.24805930256843567, "logits/rejected": -1.0325236320495605, "logps/chosen": -1.4436662197113037, "logps/rejected": -2.008178472518921, "loss": 1.5044, "nll_loss": 1.463235855102539, "rewards/accuracies": 1.0, "rewards/chosen": -0.14436662197113037, "rewards/margins": 0.056451231241226196, "rewards/rejected": -0.20081785321235657, "step": 456 }, { "epoch": 1.2595788746979635, "grad_norm": 0.19444166123867035, "learning_rate": 3.606320667551466e-06, "log_odds_chosen": 0.815831184387207, "log_odds_ratio": -0.3756023645401001, "logits/chosen": -0.2379026561975479, "logits/rejected": -1.2224661111831665, "logps/chosen": -1.4957644939422607, "logps/rejected": -2.179534673690796, "loss": 1.5552, "nll_loss": 1.5175931453704834, "rewards/accuracies": 1.0, "rewards/chosen": -0.14957645535469055, "rewards/margins": 0.06837702542543411, "rewards/rejected": -0.21795348823070526, "step": 457 }, { "epoch": 1.2623403520883674, "grad_norm": 0.18903037905693054, "learning_rate": 3.599128164864706e-06, "log_odds_chosen": 0.8830035924911499, "log_odds_ratio": -0.35656800866127014, "logits/chosen": -0.3331121802330017, "logits/rejected": -1.2537503242492676, "logps/chosen": -1.4486404657363892, "logps/rejected": -2.1861586570739746, "loss": 1.4942, "nll_loss": 1.458591341972351, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448640525341034, "rewards/margins": 0.07375183701515198, "rewards/rejected": -0.21861587464809418, "step": 458 }, { "epoch": 1.265101829478771, "grad_norm": 0.19099265336990356, "learning_rate": 3.5919243669354585e-06, "log_odds_chosen": 0.653681218624115, "log_odds_ratio": -0.4278097450733185, "logits/chosen": -0.3065020442008972, "logits/rejected": -0.8541854023933411, "logps/chosen": -1.5121026039123535, "logps/rejected": -2.0558552742004395, "loss": 1.5649, "nll_loss": 1.5221350193023682, "rewards/accuracies": 1.0, "rewards/chosen": -0.1512102633714676, "rewards/margins": 0.054375264793634415, "rewards/rejected": -0.2055855393409729, "step": 459 }, { "epoch": 1.2678633068691751, "grad_norm": 0.2050788551568985, "learning_rate": 3.5847093477938955e-06, "log_odds_chosen": 0.719269871711731, "log_odds_ratio": -0.403872549533844, "logits/chosen": -0.3556126356124878, "logits/rejected": -1.1764421463012695, "logps/chosen": -1.5112565755844116, "logps/rejected": -2.108696460723877, "loss": 1.5556, "nll_loss": 1.5151852369308472, "rewards/accuracies": 1.0, "rewards/chosen": -0.15112565457820892, "rewards/margins": 0.059744007885456085, "rewards/rejected": -0.2108696550130844, "step": 460 }, { "epoch": 1.2706247842595788, "grad_norm": 0.18415868282318115, "learning_rate": 3.5774831815855017e-06, "log_odds_chosen": 0.9078155755996704, "log_odds_ratio": -0.34306374192237854, "logits/chosen": -0.34916579723358154, "logits/rejected": -1.1399445533752441, "logps/chosen": -1.4003783464431763, "logps/rejected": -2.1475114822387695, "loss": 1.4682, "nll_loss": 1.4339377880096436, "rewards/accuracies": 1.0, "rewards/chosen": -0.14003783464431763, "rewards/margins": 0.0747133195400238, "rewards/rejected": -0.21475116908550262, "step": 461 }, { "epoch": 1.2733862616499827, "grad_norm": 0.1681346744298935, "learning_rate": 3.5702459425703146e-06, "log_odds_chosen": 0.9646233320236206, "log_odds_ratio": -0.3330087959766388, "logits/chosen": -0.3065449595451355, "logits/rejected": -1.3354847431182861, "logps/chosen": -1.4999949932098389, "logps/rejected": -2.319182872772217, "loss": 1.5538, "nll_loss": 1.5204527378082275, "rewards/accuracies": 1.0, "rewards/chosen": -0.14999951422214508, "rewards/margins": 0.08191878348588943, "rewards/rejected": -0.23191829025745392, "step": 462 }, { "epoch": 1.2761477390403866, "grad_norm": 0.18663005530834198, "learning_rate": 3.562997705122162e-06, "log_odds_chosen": 0.8870431184768677, "log_odds_ratio": -0.34840965270996094, "logits/chosen": -0.2869050204753876, "logits/rejected": -1.1682400703430176, "logps/chosen": -1.4178146123886108, "logps/rejected": -2.1509153842926025, "loss": 1.4721, "nll_loss": 1.4372905492782593, "rewards/accuracies": 1.0, "rewards/chosen": -0.14178146421909332, "rewards/margins": 0.07331006228923798, "rewards/rejected": -0.2150915116071701, "step": 463 }, { "epoch": 1.2789092164307905, "grad_norm": 0.17033839225769043, "learning_rate": 3.5557385437279e-06, "log_odds_chosen": 0.923761785030365, "log_odds_ratio": -0.3432496190071106, "logits/chosen": -0.2502524256706238, "logits/rejected": -0.997616708278656, "logps/chosen": -1.4954802989959717, "logps/rejected": -2.2727675437927246, "loss": 1.5311, "nll_loss": 1.4967255592346191, "rewards/accuracies": 1.0, "rewards/chosen": -0.1495480239391327, "rewards/margins": 0.07772872596979141, "rewards/rejected": -0.2272767573595047, "step": 464 }, { "epoch": 1.2816706938211944, "grad_norm": 0.195389986038208, "learning_rate": 3.5484685329866424e-06, "log_odds_chosen": 0.6268041133880615, "log_odds_ratio": -0.4326058626174927, "logits/chosen": -0.2863166928291321, "logits/rejected": -1.0226547718048096, "logps/chosen": -1.5096933841705322, "logps/rejected": -2.028193950653076, "loss": 1.5479, "nll_loss": 1.5046144723892212, "rewards/accuracies": 1.0, "rewards/chosen": -0.15096935629844666, "rewards/margins": 0.05185003951191902, "rewards/rejected": -0.20281939208507538, "step": 465 }, { "epoch": 1.2844321712115983, "grad_norm": 0.17330817878246307, "learning_rate": 3.541187747608998e-06, "log_odds_chosen": 1.0745373964309692, "log_odds_ratio": -0.31021982431411743, "logits/chosen": -0.37704816460609436, "logits/rejected": -1.22407865524292, "logps/chosen": -1.5697916746139526, "logps/rejected": -2.4969704151153564, "loss": 1.5937, "nll_loss": 1.562636375427246, "rewards/accuracies": 1.0, "rewards/chosen": -0.15697917342185974, "rewards/margins": 0.09271789342164993, "rewards/rejected": -0.24969705939292908, "step": 466 }, { "epoch": 1.2871936486020021, "grad_norm": 0.20620717108249664, "learning_rate": 3.533896262416302e-06, "log_odds_chosen": 0.8361949920654297, "log_odds_ratio": -0.36747199296951294, "logits/chosen": -0.31524112820625305, "logits/rejected": -1.093992829322815, "logps/chosen": -1.4830838441848755, "logps/rejected": -2.1828489303588867, "loss": 1.5323, "nll_loss": 1.4955836534500122, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483083963394165, "rewards/margins": 0.06997650861740112, "rewards/rejected": -0.21828490495681763, "step": 467 }, { "epoch": 1.2899551259924058, "grad_norm": 0.20733030140399933, "learning_rate": 3.5265941523398455e-06, "log_odds_chosen": 0.9227453470230103, "log_odds_ratio": -0.3383994400501251, "logits/chosen": -0.3382907211780548, "logits/rejected": -1.0753273963928223, "logps/chosen": -1.5061396360397339, "logps/rejected": -2.2841649055480957, "loss": 1.5394, "nll_loss": 1.5055965185165405, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506139636039734, "rewards/margins": 0.07780253887176514, "rewards/rejected": -0.22841650247573853, "step": 468 }, { "epoch": 1.29271660338281, "grad_norm": 0.1836915910243988, "learning_rate": 3.519281492420108e-06, "log_odds_chosen": 0.5979352593421936, "log_odds_ratio": -0.446799635887146, "logits/chosen": -0.29712000489234924, "logits/rejected": -1.041902780532837, "logps/chosen": -1.5359019041061401, "logps/rejected": -2.0312960147857666, "loss": 1.5753, "nll_loss": 1.5306230783462524, "rewards/accuracies": 1.0, "rewards/chosen": -0.15359018743038177, "rewards/margins": 0.04953942447900772, "rewards/rejected": -0.2031296193599701, "step": 469 }, { "epoch": 1.2954780807732136, "grad_norm": 0.19433459639549255, "learning_rate": 3.5119583578059845e-06, "log_odds_chosen": 0.7800890803337097, "log_odds_ratio": -0.3985646963119507, "logits/chosen": -0.2822642922401428, "logits/rejected": -1.1507655382156372, "logps/chosen": -1.6121432781219482, "logps/rejected": -2.281339406967163, "loss": 1.6286, "nll_loss": 1.588757038116455, "rewards/accuracies": 1.0, "rewards/chosen": -0.16121432185173035, "rewards/margins": 0.06691960990428925, "rewards/rejected": -0.2281339317560196, "step": 470 }, { "epoch": 1.2982395581636175, "grad_norm": 0.1745569258928299, "learning_rate": 3.504624823754014e-06, "log_odds_chosen": 0.9689381122589111, "log_odds_ratio": -0.32553234696388245, "logits/chosen": -0.3070724904537201, "logits/rejected": -1.4188494682312012, "logps/chosen": -1.4123187065124512, "logps/rejected": -2.2118630409240723, "loss": 1.4628, "nll_loss": 1.4302698373794556, "rewards/accuracies": 1.0, "rewards/chosen": -0.14123189449310303, "rewards/margins": 0.07995443046092987, "rewards/rejected": -0.2211862951517105, "step": 471 }, { "epoch": 1.3010010355540214, "grad_norm": 0.18898555636405945, "learning_rate": 3.4972809656276047e-06, "log_odds_chosen": 0.8960251212120056, "log_odds_ratio": -0.3498467206954956, "logits/chosen": -0.273061066865921, "logits/rejected": -1.3158522844314575, "logps/chosen": -1.4974440336227417, "logps/rejected": -2.2527313232421875, "loss": 1.5449, "nll_loss": 1.5098881721496582, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497444063425064, "rewards/margins": 0.07552873343229294, "rewards/rejected": -0.22527314722537994, "step": 472 }, { "epoch": 1.3037625129444252, "grad_norm": 0.1694575846195221, "learning_rate": 3.4899268588962613e-06, "log_odds_chosen": 0.9621706604957581, "log_odds_ratio": -0.32910794019699097, "logits/chosen": -0.3364591598510742, "logits/rejected": -1.1894400119781494, "logps/chosen": -1.363714575767517, "logps/rejected": -2.1495935916900635, "loss": 1.4326, "nll_loss": 1.3997074365615845, "rewards/accuracies": 1.0, "rewards/chosen": -0.1363714635372162, "rewards/margins": 0.07858789712190628, "rewards/rejected": -0.21495933830738068, "step": 473 }, { "epoch": 1.3065239903348291, "grad_norm": 0.18671829998493195, "learning_rate": 3.4825625791348093e-06, "log_odds_chosen": 0.9318122863769531, "log_odds_ratio": -0.3445737957954407, "logits/chosen": -0.29330548644065857, "logits/rejected": -1.1094152927398682, "logps/chosen": -1.4132261276245117, "logps/rejected": -2.188481569290161, "loss": 1.4629, "nll_loss": 1.4284807443618774, "rewards/accuracies": 1.0, "rewards/chosen": -0.14132261276245117, "rewards/margins": 0.0775255486369133, "rewards/rejected": -0.21884815394878387, "step": 474 }, { "epoch": 1.309285467725233, "grad_norm": 0.18166309595108032, "learning_rate": 3.4751882020226174e-06, "log_odds_chosen": 0.8953260779380798, "log_odds_ratio": -0.35324275493621826, "logits/chosen": -0.30813199281692505, "logits/rejected": -1.1583383083343506, "logps/chosen": -1.5182853937149048, "logps/rejected": -2.2794241905212402, "loss": 1.5593, "nll_loss": 1.5240164995193481, "rewards/accuracies": 1.0, "rewards/chosen": -0.15182854235172272, "rewards/margins": 0.07611385732889175, "rewards/rejected": -0.22794242203235626, "step": 475 }, { "epoch": 1.312046945115637, "grad_norm": 0.1913100630044937, "learning_rate": 3.467803803342821e-06, "log_odds_chosen": 0.8503288626670837, "log_odds_ratio": -0.36000919342041016, "logits/chosen": -0.2365928590297699, "logits/rejected": -1.175534725189209, "logps/chosen": -1.4916030168533325, "logps/rejected": -2.2044079303741455, "loss": 1.5322, "nll_loss": 1.4961673021316528, "rewards/accuracies": 1.0, "rewards/chosen": -0.14916031062602997, "rewards/margins": 0.07128050923347473, "rewards/rejected": -0.2204408198595047, "step": 476 }, { "epoch": 1.3148084225060408, "grad_norm": 0.1990288347005844, "learning_rate": 3.4604094589815402e-06, "log_odds_chosen": 0.7814106345176697, "log_odds_ratio": -0.3809904456138611, "logits/chosen": -0.33347588777542114, "logits/rejected": -1.1712278127670288, "logps/chosen": -1.4147124290466309, "logps/rejected": -2.0545654296875, "loss": 1.4683, "nll_loss": 1.4301787614822388, "rewards/accuracies": 1.0, "rewards/chosen": -0.1414712369441986, "rewards/margins": 0.06398531049489975, "rewards/rejected": -0.20545653998851776, "step": 477 }, { "epoch": 1.3175698998964447, "grad_norm": 0.18192388117313385, "learning_rate": 3.4530052449271044e-06, "log_odds_chosen": 0.9670149087905884, "log_odds_ratio": -0.33280444145202637, "logits/chosen": -0.372698038816452, "logits/rejected": -1.1448473930358887, "logps/chosen": -1.4152264595031738, "logps/rejected": -2.2173709869384766, "loss": 1.4648, "nll_loss": 1.4315613508224487, "rewards/accuracies": 1.0, "rewards/chosen": -0.14152264595031738, "rewards/margins": 0.08021444082260132, "rewards/rejected": -0.2217371016740799, "step": 478 }, { "epoch": 1.3203313772868484, "grad_norm": 0.19329139590263367, "learning_rate": 3.4455912372692696e-06, "log_odds_chosen": 0.6430727243423462, "log_odds_ratio": -0.430493026971817, "logits/chosen": -0.28841686248779297, "logits/rejected": -1.1759401559829712, "logps/chosen": -1.4796922206878662, "logps/rejected": -2.0091733932495117, "loss": 1.5189, "nll_loss": 1.475854516029358, "rewards/accuracies": 1.0, "rewards/chosen": -0.14796923100948334, "rewards/margins": 0.05294811725616455, "rewards/rejected": -0.2009173333644867, "step": 479 }, { "epoch": 1.3230928546772525, "grad_norm": 0.18555399775505066, "learning_rate": 3.438167512198436e-06, "log_odds_chosen": 0.6809258460998535, "log_odds_ratio": -0.4141601622104645, "logits/chosen": -0.31838759779930115, "logits/rejected": -1.10935640335083, "logps/chosen": -1.522485613822937, "logps/rejected": -2.0886290073394775, "loss": 1.5706, "nll_loss": 1.529231309890747, "rewards/accuracies": 1.0, "rewards/chosen": -0.1522485464811325, "rewards/margins": 0.05661435052752495, "rewards/rejected": -0.20886291563510895, "step": 480 }, { "epoch": 1.3258543320676561, "grad_norm": 0.185867041349411, "learning_rate": 3.4307341460048633e-06, "log_odds_chosen": 0.8023340106010437, "log_odds_ratio": -0.37456732988357544, "logits/chosen": -0.29839807748794556, "logits/rejected": -1.1352858543395996, "logps/chosen": -1.5517854690551758, "logps/rejected": -2.226045846939087, "loss": 1.5881, "nll_loss": 1.5506727695465088, "rewards/accuracies": 1.0, "rewards/chosen": -0.15517854690551758, "rewards/margins": 0.06742605566978455, "rewards/rejected": -0.22260460257530212, "step": 481 }, { "epoch": 1.32861580945806, "grad_norm": 0.19534382224082947, "learning_rate": 3.4232912150778914e-06, "log_odds_chosen": 0.8740422129631042, "log_odds_ratio": -0.35599130392074585, "logits/chosen": -0.3661443293094635, "logits/rejected": -1.2379000186920166, "logps/chosen": -1.4489246606826782, "logps/rejected": -2.17354416847229, "loss": 1.5169, "nll_loss": 1.4813332557678223, "rewards/accuracies": 1.0, "rewards/chosen": -0.14489248394966125, "rewards/margins": 0.07246193289756775, "rewards/rejected": -0.2173544317483902, "step": 482 }, { "epoch": 1.331377286848464, "grad_norm": 0.18097054958343506, "learning_rate": 3.415838795905151e-06, "log_odds_chosen": 1.0503449440002441, "log_odds_ratio": -0.3068162798881531, "logits/chosen": -0.4407689571380615, "logits/rejected": -1.4052069187164307, "logps/chosen": -1.4251679182052612, "logps/rejected": -2.2970073223114014, "loss": 1.4703, "nll_loss": 1.4396311044692993, "rewards/accuracies": 1.0, "rewards/chosen": -0.14251679182052612, "rewards/margins": 0.08718395978212357, "rewards/rejected": -0.2297007441520691, "step": 483 }, { "epoch": 1.3341387642388678, "grad_norm": 0.19162198901176453, "learning_rate": 3.408376965071779e-06, "log_odds_chosen": 0.7587519884109497, "log_odds_ratio": -0.3879585862159729, "logits/chosen": -0.28846538066864014, "logits/rejected": -1.1753207445144653, "logps/chosen": -1.5185155868530273, "logps/rejected": -2.149603843688965, "loss": 1.5523, "nll_loss": 1.5134819746017456, "rewards/accuracies": 1.0, "rewards/chosen": -0.1518515795469284, "rewards/margins": 0.06310880929231644, "rewards/rejected": -0.21496038138866425, "step": 484 }, { "epoch": 1.3369002416292717, "grad_norm": 0.1803843080997467, "learning_rate": 3.400905799259634e-06, "log_odds_chosen": 0.7836498022079468, "log_odds_ratio": -0.3856756389141083, "logits/chosen": -0.3168273866176605, "logits/rejected": -1.1095941066741943, "logps/chosen": -1.4380414485931396, "logps/rejected": -2.0824549198150635, "loss": 1.4834, "nll_loss": 1.444858431816101, "rewards/accuracies": 1.0, "rewards/chosen": -0.143804132938385, "rewards/margins": 0.06444136798381805, "rewards/rejected": -0.20824551582336426, "step": 485 }, { "epoch": 1.3396617190196756, "grad_norm": 0.18349698185920715, "learning_rate": 3.393425375246503e-06, "log_odds_chosen": 0.7499913573265076, "log_odds_ratio": -0.3889506757259369, "logits/chosen": -0.34318801760673523, "logits/rejected": -1.1768746376037598, "logps/chosen": -1.4634371995925903, "logps/rejected": -2.080674648284912, "loss": 1.5059, "nll_loss": 1.4669642448425293, "rewards/accuracies": 1.0, "rewards/chosen": -0.14634370803833008, "rewards/margins": 0.0617237389087677, "rewards/rejected": -0.20806746184825897, "step": 486 }, { "epoch": 1.3424231964100795, "grad_norm": 0.19035322964191437, "learning_rate": 3.3859357699053165e-06, "log_odds_chosen": 0.8554298877716064, "log_odds_ratio": -0.36611807346343994, "logits/chosen": -0.31382206082344055, "logits/rejected": -1.058935523033142, "logps/chosen": -1.3999180793762207, "logps/rejected": -2.1072771549224854, "loss": 1.4434, "nll_loss": 1.4067771434783936, "rewards/accuracies": 1.0, "rewards/chosen": -0.13999181985855103, "rewards/margins": 0.0707358866930008, "rewards/rejected": -0.210727721452713, "step": 487 }, { "epoch": 1.3451846738004831, "grad_norm": 0.1878138780593872, "learning_rate": 3.3784370602033572e-06, "log_odds_chosen": 0.992766797542572, "log_odds_ratio": -0.3239111304283142, "logits/chosen": -0.40365132689476013, "logits/rejected": -1.291216254234314, "logps/chosen": -1.4312440156936646, "logps/rejected": -2.263004779815674, "loss": 1.4795, "nll_loss": 1.4471262693405151, "rewards/accuracies": 1.0, "rewards/chosen": -0.14312440156936646, "rewards/margins": 0.08317607641220093, "rewards/rejected": -0.22630049288272858, "step": 488 }, { "epoch": 1.3479461511908872, "grad_norm": 0.19307951629161835, "learning_rate": 3.3709293232014705e-06, "log_odds_chosen": 0.8466112613677979, "log_odds_ratio": -0.36574095487594604, "logits/chosen": -0.36816614866256714, "logits/rejected": -1.4561880826950073, "logps/chosen": -1.4958292245864868, "logps/rejected": -2.210787534713745, "loss": 1.5377, "nll_loss": 1.501145839691162, "rewards/accuracies": 1.0, "rewards/chosen": -0.14958293735980988, "rewards/margins": 0.07149583101272583, "rewards/rejected": -0.2210787534713745, "step": 489 }, { "epoch": 1.350707628581291, "grad_norm": 0.1835506707429886, "learning_rate": 3.3634126360532694e-06, "log_odds_chosen": 0.9759219288825989, "log_odds_ratio": -0.3324146568775177, "logits/chosen": -0.24411487579345703, "logits/rejected": -1.3185425996780396, "logps/chosen": -1.5735536813735962, "logps/rejected": -2.4141926765441895, "loss": 1.6083, "nll_loss": 1.5750340223312378, "rewards/accuracies": 1.0, "rewards/chosen": -0.15735535323619843, "rewards/margins": 0.08406390994787216, "rewards/rejected": -0.24141928553581238, "step": 490 }, { "epoch": 1.3534691059716948, "grad_norm": 0.20382438600063324, "learning_rate": 3.355887076004345e-06, "log_odds_chosen": 0.8967198133468628, "log_odds_ratio": -0.3447577953338623, "logits/chosen": -0.45378029346466064, "logits/rejected": -1.0346810817718506, "logps/chosen": -1.4815733432769775, "logps/rejected": -2.233039140701294, "loss": 1.5336, "nll_loss": 1.4991440773010254, "rewards/accuracies": 1.0, "rewards/chosen": -0.14815731346607208, "rewards/margins": 0.07514660060405731, "rewards/rejected": -0.2233039289712906, "step": 491 }, { "epoch": 1.3562305833620987, "grad_norm": 0.1867835968732834, "learning_rate": 3.3483527203914694e-06, "log_odds_chosen": 0.9929035902023315, "log_odds_ratio": -0.3237488567829132, "logits/chosen": -0.3180537819862366, "logits/rejected": -1.2090908288955688, "logps/chosen": -1.517293095588684, "logps/rejected": -2.360647678375244, "loss": 1.5624, "nll_loss": 1.5299937725067139, "rewards/accuracies": 1.0, "rewards/chosen": -0.15172931551933289, "rewards/margins": 0.08433545380830765, "rewards/rejected": -0.23606477677822113, "step": 492 }, { "epoch": 1.3589920607525026, "grad_norm": 0.18780100345611572, "learning_rate": 3.340809646641805e-06, "log_odds_chosen": 0.8353948593139648, "log_odds_ratio": -0.36322087049484253, "logits/chosen": -0.23974277079105377, "logits/rejected": -1.23904550075531, "logps/chosen": -1.5875341892242432, "logps/rejected": -2.3006584644317627, "loss": 1.6096, "nll_loss": 1.5732414722442627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1587534248828888, "rewards/margins": 0.07131239771842957, "rewards/rejected": -0.23006582260131836, "step": 493 }, { "epoch": 1.3617535381429065, "grad_norm": 0.18200430274009705, "learning_rate": 3.333257932272105e-06, "log_odds_chosen": 0.9004358053207397, "log_odds_ratio": -0.3532771170139313, "logits/chosen": -0.2815527021884918, "logits/rejected": -1.1550835371017456, "logps/chosen": -1.4390370845794678, "logps/rejected": -2.194286346435547, "loss": 1.4934, "nll_loss": 1.4581207036972046, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439037024974823, "rewards/margins": 0.07552491873502731, "rewards/rejected": -0.21942861378192902, "step": 494 }, { "epoch": 1.3645150155333103, "grad_norm": 0.19926373660564423, "learning_rate": 3.3256976548879183e-06, "log_odds_chosen": 0.8690962195396423, "log_odds_ratio": -0.3561793863773346, "logits/chosen": -0.3800399601459503, "logits/rejected": -1.047258973121643, "logps/chosen": -1.4909493923187256, "logps/rejected": -2.219834327697754, "loss": 1.5367, "nll_loss": 1.5010950565338135, "rewards/accuracies": 1.0, "rewards/chosen": -0.14909493923187256, "rewards/margins": 0.07288849353790283, "rewards/rejected": -0.2219834327697754, "step": 495 }, { "epoch": 1.3672764929237142, "grad_norm": 0.18317997455596924, "learning_rate": 3.3181288921827925e-06, "log_odds_chosen": 0.9415011405944824, "log_odds_ratio": -0.3410661220550537, "logits/chosen": -0.27754640579223633, "logits/rejected": -1.0399134159088135, "logps/chosen": -1.4013381004333496, "logps/rejected": -2.179905414581299, "loss": 1.465, "nll_loss": 1.4309097528457642, "rewards/accuracies": 1.0, "rewards/chosen": -0.1401338279247284, "rewards/margins": 0.07785671204328537, "rewards/rejected": -0.21799054741859436, "step": 496 }, { "epoch": 1.3700379703141181, "grad_norm": 0.1992860585451126, "learning_rate": 3.310551721937475e-06, "log_odds_chosen": 0.9155603647232056, "log_odds_ratio": -0.340352326631546, "logits/chosen": -0.30399274826049805, "logits/rejected": -1.2022323608398438, "logps/chosen": -1.4278628826141357, "logps/rejected": -2.1877853870391846, "loss": 1.4555, "nll_loss": 1.4214258193969727, "rewards/accuracies": 1.0, "rewards/chosen": -0.14278629422187805, "rewards/margins": 0.07599223405122757, "rewards/rejected": -0.21877853572368622, "step": 497 }, { "epoch": 1.372799447704522, "grad_norm": 0.2043248564004898, "learning_rate": 3.3029662220191146e-06, "log_odds_chosen": 0.9598441123962402, "log_odds_ratio": -0.3286186754703522, "logits/chosen": -0.38024866580963135, "logits/rejected": -1.2354223728179932, "logps/chosen": -1.33193838596344, "logps/rejected": -2.1157238483428955, "loss": 1.388, "nll_loss": 1.3551721572875977, "rewards/accuracies": 1.0, "rewards/chosen": -0.13319383561611176, "rewards/margins": 0.07837854325771332, "rewards/rejected": -0.21157239377498627, "step": 498 }, { "epoch": 1.3755609250949257, "grad_norm": 0.17761792242527008, "learning_rate": 3.2953724703804572e-06, "log_odds_chosen": 1.0203739404678345, "log_odds_ratio": -0.3161318898200989, "logits/chosen": -0.31488168239593506, "logits/rejected": -1.2814425230026245, "logps/chosen": -1.4082773923873901, "logps/rejected": -2.2629306316375732, "loss": 1.4532, "nll_loss": 1.4215905666351318, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408277451992035, "rewards/margins": 0.08546529710292816, "rewards/rejected": -0.22629307210445404, "step": 499 }, { "epoch": 1.3783224024853298, "grad_norm": 0.19437268376350403, "learning_rate": 3.2877705450590525e-06, "log_odds_chosen": 0.8638174533843994, "log_odds_ratio": -0.35613882541656494, "logits/chosen": -0.3166804015636444, "logits/rejected": -1.1927976608276367, "logps/chosen": -1.4706673622131348, "logps/rejected": -2.1903886795043945, "loss": 1.5439, "nll_loss": 1.5082558393478394, "rewards/accuracies": 1.0, "rewards/chosen": -0.14706675708293915, "rewards/margins": 0.07197214663028717, "rewards/rejected": -0.21903888881206512, "step": 500 } ], "logging_steps": 1, "max_steps": 1089, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }