{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.575256107171001, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015760441292356187, "grad_norm": 0.8302866220474243, "learning_rate": 0.0, "log_odds_chosen": 0.46107953786849976, "log_odds_ratio": -0.5080549120903015, "logits/chosen": -1.1763941049575806, "logits/rejected": -0.5066124200820923, "logps/chosen": -2.0480191707611084, "logps/rejected": -2.4605765342712402, "loss": 2.3016, "nll_loss": 2.2507846355438232, "rewards/accuracies": 1.0, "rewards/chosen": -0.20480191707611084, "rewards/margins": 0.041255734860897064, "rewards/rejected": -0.2460576593875885, "step": 1 }, { "epoch": 0.0031520882584712374, "grad_norm": 0.7972496747970581, "learning_rate": 3.1496062992125985e-08, "log_odds_chosen": 0.44702666997909546, "log_odds_ratio": -0.5035712718963623, "logits/chosen": -1.1409798860549927, "logits/rejected": -0.378469854593277, "logps/chosen": -1.9499034881591797, "logps/rejected": -2.3424463272094727, "loss": 2.1951, "nll_loss": 2.1447677612304688, "rewards/accuracies": 1.0, "rewards/chosen": -0.1949903815984726, "rewards/margins": 0.039254240691661835, "rewards/rejected": -0.23424461483955383, "step": 2 }, { "epoch": 0.004728132387706856, "grad_norm": 0.8205430507659912, "learning_rate": 6.299212598425197e-08, "log_odds_chosen": 0.643511176109314, "log_odds_ratio": -0.4351435601711273, "logits/chosen": -1.1954814195632935, "logits/rejected": -0.46929946541786194, "logps/chosen": -2.0658984184265137, "logps/rejected": -2.6481122970581055, "loss": 2.3057, "nll_loss": 2.2622337341308594, "rewards/accuracies": 1.0, "rewards/chosen": -0.20658984780311584, "rewards/margins": 0.05822139233350754, "rewards/rejected": -0.264811247587204, "step": 3 }, { "epoch": 0.006304176516942475, "grad_norm": 0.7553535103797913, "learning_rate": 9.448818897637795e-08, "log_odds_chosen": 0.6655425429344177, "log_odds_ratio": -0.4300715923309326, "logits/chosen": -1.0765116214752197, "logits/rejected": -0.35491788387298584, "logps/chosen": -2.062553882598877, "logps/rejected": -2.668004274368286, "loss": 2.3004, "nll_loss": 2.2574236392974854, "rewards/accuracies": 0.875, "rewards/chosen": -0.20625539124011993, "rewards/margins": 0.06054502725601196, "rewards/rejected": -0.2668004333972931, "step": 4 }, { "epoch": 0.007880220646178092, "grad_norm": 0.8500381708145142, "learning_rate": 1.2598425196850394e-07, "log_odds_chosen": 0.8081256151199341, "log_odds_ratio": -0.3941689729690552, "logits/chosen": -1.2188713550567627, "logits/rejected": -0.5489533543586731, "logps/chosen": -1.9932475090026855, "logps/rejected": -2.7177062034606934, "loss": 2.2247, "nll_loss": 2.1853041648864746, "rewards/accuracies": 1.0, "rewards/chosen": -0.19932474195957184, "rewards/margins": 0.07244586944580078, "rewards/rejected": -0.2717706263065338, "step": 5 }, { "epoch": 0.009456264775413711, "grad_norm": 0.7359945178031921, "learning_rate": 1.5748031496062992e-07, "log_odds_chosen": 0.4542209506034851, "log_odds_ratio": -0.49476009607315063, "logits/chosen": -1.026477336883545, "logits/rejected": -0.3336237370967865, "logps/chosen": -1.9348886013031006, "logps/rejected": -2.3354790210723877, "loss": 2.2114, "nll_loss": 2.16192626953125, "rewards/accuracies": 1.0, "rewards/chosen": -0.19348883628845215, "rewards/margins": 0.04005904495716095, "rewards/rejected": -0.2335478812456131, "step": 6 }, { "epoch": 0.01103230890464933, "grad_norm": 0.7563782334327698, "learning_rate": 1.889763779527559e-07, "log_odds_chosen": 0.6660692095756531, "log_odds_ratio": -0.42836689949035645, "logits/chosen": -1.041650652885437, "logits/rejected": -0.4485802948474884, "logps/chosen": -2.0038270950317383, "logps/rejected": -2.6053221225738525, "loss": 2.2398, "nll_loss": 2.1969573497772217, "rewards/accuracies": 1.0, "rewards/chosen": -0.20038272440433502, "rewards/margins": 0.06014948710799217, "rewards/rejected": -0.2605322301387787, "step": 7 }, { "epoch": 0.01260835303388495, "grad_norm": 1.0274051427841187, "learning_rate": 2.2047244094488187e-07, "log_odds_chosen": 0.9828638434410095, "log_odds_ratio": -0.32935816049575806, "logits/chosen": -1.1843122243881226, "logits/rejected": -0.48524755239486694, "logps/chosen": -2.126309871673584, "logps/rejected": -3.025899648666382, "loss": 2.3618, "nll_loss": 2.32889461517334, "rewards/accuracies": 1.0, "rewards/chosen": -0.2126309871673584, "rewards/margins": 0.08995895087718964, "rewards/rejected": -0.30258995294570923, "step": 8 }, { "epoch": 0.014184397163120567, "grad_norm": 0.8085570931434631, "learning_rate": 2.519685039370079e-07, "log_odds_chosen": 0.5246677398681641, "log_odds_ratio": -0.47295087575912476, "logits/chosen": -1.1274431943893433, "logits/rejected": -0.4057602882385254, "logps/chosen": -2.0213370323181152, "logps/rejected": -2.4909844398498535, "loss": 2.2419, "nll_loss": 2.19463849067688, "rewards/accuracies": 1.0, "rewards/chosen": -0.20213371515274048, "rewards/margins": 0.04696475714445114, "rewards/rejected": -0.24909847974777222, "step": 9 }, { "epoch": 0.015760441292356184, "grad_norm": 0.8120251893997192, "learning_rate": 2.8346456692913386e-07, "log_odds_chosen": 0.4664788842201233, "log_odds_ratio": -0.4924715459346771, "logits/chosen": -1.1194875240325928, "logits/rejected": -0.5692275166511536, "logps/chosen": -2.0412657260894775, "logps/rejected": -2.459512710571289, "loss": 2.2764, "nll_loss": 2.227172613143921, "rewards/accuracies": 1.0, "rewards/chosen": -0.20412659645080566, "rewards/margins": 0.04182467237114906, "rewards/rejected": -0.24595126509666443, "step": 10 }, { "epoch": 0.017336485421591805, "grad_norm": 0.8484521508216858, "learning_rate": 3.1496062992125984e-07, "log_odds_chosen": 0.5531177520751953, "log_odds_ratio": -0.4599895477294922, "logits/chosen": -1.0869638919830322, "logits/rejected": -0.5173229575157166, "logps/chosen": -2.1112473011016846, "logps/rejected": -2.6104073524475098, "loss": 2.345, "nll_loss": 2.299004077911377, "rewards/accuracies": 1.0, "rewards/chosen": -0.2111247330904007, "rewards/margins": 0.04991600662469864, "rewards/rejected": -0.26104071736335754, "step": 11 }, { "epoch": 0.018912529550827423, "grad_norm": 1.0205734968185425, "learning_rate": 3.464566929133858e-07, "log_odds_chosen": 0.6464576125144958, "log_odds_ratio": -0.4250268340110779, "logits/chosen": -1.194590449333191, "logits/rejected": -0.35468789935112, "logps/chosen": -2.164844274520874, "logps/rejected": -2.754187822341919, "loss": 2.4044, "nll_loss": 2.3619258403778076, "rewards/accuracies": 1.0, "rewards/chosen": -0.2164844274520874, "rewards/margins": 0.05893436074256897, "rewards/rejected": -0.27541878819465637, "step": 12 }, { "epoch": 0.02048857368006304, "grad_norm": 0.9676651954650879, "learning_rate": 3.779527559055118e-07, "log_odds_chosen": 0.6292009949684143, "log_odds_ratio": -0.43755629658699036, "logits/chosen": -1.2696969509124756, "logits/rejected": -0.32901716232299805, "logps/chosen": -2.032942295074463, "logps/rejected": -2.598473072052002, "loss": 2.2742, "nll_loss": 2.2304177284240723, "rewards/accuracies": 1.0, "rewards/chosen": -0.20329423248767853, "rewards/margins": 0.05655309930443764, "rewards/rejected": -0.25984734296798706, "step": 13 }, { "epoch": 0.02206461780929866, "grad_norm": 0.8347820043563843, "learning_rate": 4.0944881889763777e-07, "log_odds_chosen": 0.5126060247421265, "log_odds_ratio": -0.4873553514480591, "logits/chosen": -1.1185718774795532, "logits/rejected": -0.6450862884521484, "logps/chosen": -2.0508875846862793, "logps/rejected": -2.513838291168213, "loss": 2.2854, "nll_loss": 2.2367072105407715, "rewards/accuracies": 1.0, "rewards/chosen": -0.2050887644290924, "rewards/margins": 0.04629506170749664, "rewards/rejected": -0.25138384103775024, "step": 14 }, { "epoch": 0.02364066193853428, "grad_norm": 0.9428237080574036, "learning_rate": 4.4094488188976375e-07, "log_odds_chosen": 0.45611634850502014, "log_odds_ratio": -0.49452677369117737, "logits/chosen": -1.2242058515548706, "logits/rejected": -0.40273138880729675, "logps/chosen": -2.0463950634002686, "logps/rejected": -2.456277847290039, "loss": 2.2971, "nll_loss": 2.2476325035095215, "rewards/accuracies": 1.0, "rewards/chosen": -0.2046394795179367, "rewards/margins": 0.040988270193338394, "rewards/rejected": -0.245627760887146, "step": 15 }, { "epoch": 0.0252167060677699, "grad_norm": 0.8700807690620422, "learning_rate": 4.7244094488188973e-07, "log_odds_chosen": 0.48002687096595764, "log_odds_ratio": -0.4858929514884949, "logits/chosen": -1.1735236644744873, "logits/rejected": -0.44880083203315735, "logps/chosen": -2.0610547065734863, "logps/rejected": -2.49220871925354, "loss": 2.3018, "nll_loss": 2.253174304962158, "rewards/accuracies": 1.0, "rewards/chosen": -0.20610548555850983, "rewards/margins": 0.04311539977788925, "rewards/rejected": -0.24922087788581848, "step": 16 }, { "epoch": 0.026792750197005517, "grad_norm": 0.7779992818832397, "learning_rate": 5.039370078740158e-07, "log_odds_chosen": 0.7011566162109375, "log_odds_ratio": -0.4202171266078949, "logits/chosen": -1.0814855098724365, "logits/rejected": -0.5505629777908325, "logps/chosen": -1.9978816509246826, "logps/rejected": -2.631817102432251, "loss": 2.2434, "nll_loss": 2.201402425765991, "rewards/accuracies": 1.0, "rewards/chosen": -0.1997881531715393, "rewards/margins": 0.06339354813098907, "rewards/rejected": -0.2631816864013672, "step": 17 }, { "epoch": 0.028368794326241134, "grad_norm": 0.8743848204612732, "learning_rate": 5.354330708661418e-07, "log_odds_chosen": 0.46841195225715637, "log_odds_ratio": -0.49842947721481323, "logits/chosen": -1.1495157480239868, "logits/rejected": -0.2747257649898529, "logps/chosen": -2.0266854763031006, "logps/rejected": -2.4442427158355713, "loss": 2.2795, "nll_loss": 2.22969651222229, "rewards/accuracies": 0.875, "rewards/chosen": -0.20266854763031006, "rewards/margins": 0.04175572097301483, "rewards/rejected": -0.2444242537021637, "step": 18 }, { "epoch": 0.029944838455476755, "grad_norm": 0.804558277130127, "learning_rate": 5.669291338582677e-07, "log_odds_chosen": 0.7196086645126343, "log_odds_ratio": -0.4083862602710724, "logits/chosen": -1.2688336372375488, "logits/rejected": -0.5452137589454651, "logps/chosen": -1.9217634201049805, "logps/rejected": -2.5629072189331055, "loss": 2.1817, "nll_loss": 2.140861988067627, "rewards/accuracies": 1.0, "rewards/chosen": -0.19217635691165924, "rewards/margins": 0.06411437690258026, "rewards/rejected": -0.2562907338142395, "step": 19 }, { "epoch": 0.03152088258471237, "grad_norm": 0.8739065527915955, "learning_rate": 5.984251968503937e-07, "log_odds_chosen": 0.6575019359588623, "log_odds_ratio": -0.43753376603126526, "logits/chosen": -1.1189440488815308, "logits/rejected": -0.4846518933773041, "logps/chosen": -2.0866808891296387, "logps/rejected": -2.6833090782165527, "loss": 2.3356, "nll_loss": 2.291797637939453, "rewards/accuracies": 0.875, "rewards/chosen": -0.2086680680513382, "rewards/margins": 0.05966280773282051, "rewards/rejected": -0.2683309018611908, "step": 20 }, { "epoch": 0.03309692671394799, "grad_norm": 0.8919215798377991, "learning_rate": 6.299212598425197e-07, "log_odds_chosen": 0.4334430694580078, "log_odds_ratio": -0.505458652973175, "logits/chosen": -1.1694822311401367, "logits/rejected": -0.40514320135116577, "logps/chosen": -2.128809928894043, "logps/rejected": -2.52128529548645, "loss": 2.3712, "nll_loss": 2.3206286430358887, "rewards/accuracies": 1.0, "rewards/chosen": -0.21288101375102997, "rewards/margins": 0.039247527718544006, "rewards/rejected": -0.252128541469574, "step": 21 }, { "epoch": 0.03467297084318361, "grad_norm": 0.7651381492614746, "learning_rate": 6.614173228346457e-07, "log_odds_chosen": 0.47628453373908997, "log_odds_ratio": -0.4947575330734253, "logits/chosen": -1.1822243928909302, "logits/rejected": -0.3983638286590576, "logps/chosen": -2.058316469192505, "logps/rejected": -2.486650228500366, "loss": 2.309, "nll_loss": 2.2594895362854004, "rewards/accuracies": 1.0, "rewards/chosen": -0.2058316469192505, "rewards/margins": 0.042833391577005386, "rewards/rejected": -0.24866501986980438, "step": 22 }, { "epoch": 0.036249014972419225, "grad_norm": 0.8524187803268433, "learning_rate": 6.929133858267716e-07, "log_odds_chosen": 0.495402067899704, "log_odds_ratio": -0.4792757034301758, "logits/chosen": -1.201756238937378, "logits/rejected": -0.469428151845932, "logps/chosen": -2.114231824874878, "logps/rejected": -2.5619707107543945, "loss": 2.3369, "nll_loss": 2.2889456748962402, "rewards/accuracies": 1.0, "rewards/chosen": -0.21142315864562988, "rewards/margins": 0.0447738915681839, "rewards/rejected": -0.25619709491729736, "step": 23 }, { "epoch": 0.037825059101654845, "grad_norm": 0.882310152053833, "learning_rate": 7.244094488188977e-07, "log_odds_chosen": 0.5385924577713013, "log_odds_ratio": -0.4700610339641571, "logits/chosen": -0.9846169948577881, "logits/rejected": -0.40137702226638794, "logps/chosen": -2.0582144260406494, "logps/rejected": -2.543081521987915, "loss": 2.3, "nll_loss": 2.2530057430267334, "rewards/accuracies": 1.0, "rewards/chosen": -0.2058214396238327, "rewards/margins": 0.048486702144145966, "rewards/rejected": -0.25430813431739807, "step": 24 }, { "epoch": 0.039401103230890466, "grad_norm": 0.8820914030075073, "learning_rate": 7.559055118110236e-07, "log_odds_chosen": 0.4686305522918701, "log_odds_ratio": -0.496835321187973, "logits/chosen": -1.1956602334976196, "logits/rejected": -0.4693169891834259, "logps/chosen": -2.0715174674987793, "logps/rejected": -2.4980099201202393, "loss": 2.3124, "nll_loss": 2.262730598449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.20715174078941345, "rewards/margins": 0.04264923930168152, "rewards/rejected": -0.24980099499225616, "step": 25 }, { "epoch": 0.04097714736012608, "grad_norm": 0.8616840243339539, "learning_rate": 7.874015748031496e-07, "log_odds_chosen": 0.5564500093460083, "log_odds_ratio": -0.4641777276992798, "logits/chosen": -1.3434436321258545, "logits/rejected": -0.5958712100982666, "logps/chosen": -2.038738965988159, "logps/rejected": -2.541964292526245, "loss": 2.283, "nll_loss": 2.2365853786468506, "rewards/accuracies": 1.0, "rewards/chosen": -0.2038739174604416, "rewards/margins": 0.050322525203228, "rewards/rejected": -0.254196435213089, "step": 26 }, { "epoch": 0.0425531914893617, "grad_norm": 0.8561832308769226, "learning_rate": 8.188976377952755e-07, "log_odds_chosen": 0.857874870300293, "log_odds_ratio": -0.37291765213012695, "logits/chosen": -1.1830471754074097, "logits/rejected": -0.4488833248615265, "logps/chosen": -2.08211612701416, "logps/rejected": -2.865851640701294, "loss": 2.3253, "nll_loss": 2.2880330085754395, "rewards/accuracies": 1.0, "rewards/chosen": -0.20821160078048706, "rewards/margins": 0.07837357372045517, "rewards/rejected": -0.28658515214920044, "step": 27 }, { "epoch": 0.04412923561859732, "grad_norm": 0.8932236433029175, "learning_rate": 8.503937007874016e-07, "log_odds_chosen": 0.48930874466896057, "log_odds_ratio": -0.488573282957077, "logits/chosen": -1.2496106624603271, "logits/rejected": -0.4534827470779419, "logps/chosen": -2.0396242141723633, "logps/rejected": -2.4796881675720215, "loss": 2.2979, "nll_loss": 2.249030590057373, "rewards/accuracies": 0.875, "rewards/chosen": -0.20396243035793304, "rewards/margins": 0.044006384909152985, "rewards/rejected": -0.24796880781650543, "step": 28 }, { "epoch": 0.045705279747832936, "grad_norm": 0.7671561241149902, "learning_rate": 8.818897637795275e-07, "log_odds_chosen": 0.45720863342285156, "log_odds_ratio": -0.4968447685241699, "logits/chosen": -0.9729629755020142, "logits/rejected": -0.4253592789173126, "logps/chosen": -2.1160941123962402, "logps/rejected": -2.5309629440307617, "loss": 2.3587, "nll_loss": 2.3090202808380127, "rewards/accuracies": 1.0, "rewards/chosen": -0.21160940825939178, "rewards/margins": 0.04148688167333603, "rewards/rejected": -0.2530962824821472, "step": 29 }, { "epoch": 0.04728132387706856, "grad_norm": 0.9391067624092102, "learning_rate": 9.133858267716535e-07, "log_odds_chosen": 0.6915267109870911, "log_odds_ratio": -0.4149508774280548, "logits/chosen": -1.2198799848556519, "logits/rejected": -0.5714092254638672, "logps/chosen": -2.0917234420776367, "logps/rejected": -2.7195608615875244, "loss": 2.3653, "nll_loss": 2.3238537311553955, "rewards/accuracies": 1.0, "rewards/chosen": -0.2091723531484604, "rewards/margins": 0.06278377026319504, "rewards/rejected": -0.27195608615875244, "step": 30 }, { "epoch": 0.04885736800630418, "grad_norm": 0.9174475073814392, "learning_rate": 9.448818897637795e-07, "log_odds_chosen": 0.7068430185317993, "log_odds_ratio": -0.4144725501537323, "logits/chosen": -1.1542574167251587, "logits/rejected": -0.2714862823486328, "logps/chosen": -2.1507794857025146, "logps/rejected": -2.7977755069732666, "loss": 2.3781, "nll_loss": 2.336669921875, "rewards/accuracies": 1.0, "rewards/chosen": -0.2150779515504837, "rewards/margins": 0.06469962000846863, "rewards/rejected": -0.27977755665779114, "step": 31 }, { "epoch": 0.0504334121355398, "grad_norm": 0.7859240174293518, "learning_rate": 9.763779527559055e-07, "log_odds_chosen": 0.7807643413543701, "log_odds_ratio": -0.3978583812713623, "logits/chosen": -1.1065948009490967, "logits/rejected": -0.49257779121398926, "logps/chosen": -2.019965410232544, "logps/rejected": -2.7294366359710693, "loss": 2.2535, "nll_loss": 2.2136945724487305, "rewards/accuracies": 1.0, "rewards/chosen": -0.20199653506278992, "rewards/margins": 0.07094711065292358, "rewards/rejected": -0.2729436457157135, "step": 32 }, { "epoch": 0.05200945626477541, "grad_norm": 0.7924520969390869, "learning_rate": 1.0078740157480315e-06, "log_odds_chosen": 0.6617920994758606, "log_odds_ratio": -0.42144879698753357, "logits/chosen": -1.0062627792358398, "logits/rejected": -0.4094982147216797, "logps/chosen": -2.043147563934326, "logps/rejected": -2.640801191329956, "loss": 2.2707, "nll_loss": 2.2286031246185303, "rewards/accuracies": 1.0, "rewards/chosen": -0.20431476831436157, "rewards/margins": 0.059765346348285675, "rewards/rejected": -0.26408010721206665, "step": 33 }, { "epoch": 0.05358550039401103, "grad_norm": 0.8600611090660095, "learning_rate": 1.0393700787401573e-06, "log_odds_chosen": 0.3994464874267578, "log_odds_ratio": -0.5219913721084595, "logits/chosen": -1.075833797454834, "logits/rejected": -0.47573941946029663, "logps/chosen": -1.9926190376281738, "logps/rejected": -2.3437416553497314, "loss": 2.2453, "nll_loss": 2.1931371688842773, "rewards/accuracies": 1.0, "rewards/chosen": -0.19926190376281738, "rewards/margins": 0.035112280398607254, "rewards/rejected": -0.23437418043613434, "step": 34 }, { "epoch": 0.055161544523246654, "grad_norm": 1.0218589305877686, "learning_rate": 1.0708661417322836e-06, "log_odds_chosen": 0.5247014760971069, "log_odds_ratio": -0.4662986695766449, "logits/chosen": -1.2422465085983276, "logits/rejected": -0.2719176113605499, "logps/chosen": -2.167281150817871, "logps/rejected": -2.6426916122436523, "loss": 2.4104, "nll_loss": 2.3638148307800293, "rewards/accuracies": 1.0, "rewards/chosen": -0.2167281061410904, "rewards/margins": 0.047541067004203796, "rewards/rejected": -0.2642691731452942, "step": 35 }, { "epoch": 0.05673758865248227, "grad_norm": 0.8558648824691772, "learning_rate": 1.1023622047244094e-06, "log_odds_chosen": 0.643022358417511, "log_odds_ratio": -0.4252229630947113, "logits/chosen": -1.2594010829925537, "logits/rejected": -0.3225390613079071, "logps/chosen": -1.9943788051605225, "logps/rejected": -2.570138931274414, "loss": 2.2371, "nll_loss": 2.1945650577545166, "rewards/accuracies": 1.0, "rewards/chosen": -0.19943787157535553, "rewards/margins": 0.0575760118663311, "rewards/rejected": -0.25701388716697693, "step": 36 }, { "epoch": 0.05831363278171789, "grad_norm": 0.7476872205734253, "learning_rate": 1.1338582677165354e-06, "log_odds_chosen": 0.4776890277862549, "log_odds_ratio": -0.4936005175113678, "logits/chosen": -1.096699595451355, "logits/rejected": -0.3185568153858185, "logps/chosen": -1.997284173965454, "logps/rejected": -2.4194047451019287, "loss": 2.2546, "nll_loss": 2.205209732055664, "rewards/accuracies": 1.0, "rewards/chosen": -0.19972839951515198, "rewards/margins": 0.04221208393573761, "rewards/rejected": -0.24194049835205078, "step": 37 }, { "epoch": 0.05988967691095351, "grad_norm": 0.8627468347549438, "learning_rate": 1.1653543307086612e-06, "log_odds_chosen": 0.464214563369751, "log_odds_ratio": -0.4933066964149475, "logits/chosen": -1.277733325958252, "logits/rejected": -0.5121052861213684, "logps/chosen": -1.991890788078308, "logps/rejected": -2.401747226715088, "loss": 2.2537, "nll_loss": 2.20432186126709, "rewards/accuracies": 1.0, "rewards/chosen": -0.19918911159038544, "rewards/margins": 0.040985628962516785, "rewards/rejected": -0.24017474055290222, "step": 38 }, { "epoch": 0.061465721040189124, "grad_norm": 0.9292140007019043, "learning_rate": 1.1968503937007875e-06, "log_odds_chosen": 0.30634605884552, "log_odds_ratio": -0.5660471320152283, "logits/chosen": -1.211570382118225, "logits/rejected": -0.37499088048934937, "logps/chosen": -2.146724224090576, "logps/rejected": -2.425945997238159, "loss": 2.4043, "nll_loss": 2.3476977348327637, "rewards/accuracies": 0.75, "rewards/chosen": -0.21467243134975433, "rewards/margins": 0.027922188863158226, "rewards/rejected": -0.2425946146249771, "step": 39 }, { "epoch": 0.06304176516942474, "grad_norm": 0.7580855488777161, "learning_rate": 1.2283464566929133e-06, "log_odds_chosen": 0.4294065237045288, "log_odds_ratio": -0.5072289705276489, "logits/chosen": -1.155901312828064, "logits/rejected": -0.45106494426727295, "logps/chosen": -1.9809616804122925, "logps/rejected": -2.3610317707061768, "loss": 2.2388, "nll_loss": 2.188109874725342, "rewards/accuracies": 1.0, "rewards/chosen": -0.1980961561203003, "rewards/margins": 0.03800702840089798, "rewards/rejected": -0.23610319197177887, "step": 40 }, { "epoch": 0.06461780929866036, "grad_norm": 0.7090384364128113, "learning_rate": 1.2598425196850393e-06, "log_odds_chosen": 0.32460513710975647, "log_odds_ratio": -0.5508446097373962, "logits/chosen": -1.0451905727386475, "logits/rejected": -0.36920034885406494, "logps/chosen": -2.0002119541168213, "logps/rejected": -2.289102792739868, "loss": 2.2423, "nll_loss": 2.187190294265747, "rewards/accuracies": 0.875, "rewards/chosen": -0.2000211924314499, "rewards/margins": 0.02888909913599491, "rewards/rejected": -0.22891029715538025, "step": 41 }, { "epoch": 0.06619385342789598, "grad_norm": 0.7185447812080383, "learning_rate": 1.2913385826771652e-06, "log_odds_chosen": 0.38940221071243286, "log_odds_ratio": -0.5234697461128235, "logits/chosen": -1.081929326057434, "logits/rejected": -0.42588090896606445, "logps/chosen": -2.010281562805176, "logps/rejected": -2.356483221054077, "loss": 2.2614, "nll_loss": 2.209080457687378, "rewards/accuracies": 1.0, "rewards/chosen": -0.20102813839912415, "rewards/margins": 0.03462015092372894, "rewards/rejected": -0.23564831912517548, "step": 42 }, { "epoch": 0.0677698975571316, "grad_norm": 0.784934937953949, "learning_rate": 1.3228346456692914e-06, "log_odds_chosen": 0.23201489448547363, "log_odds_ratio": -0.592602550983429, "logits/chosen": -1.0133355855941772, "logits/rejected": -0.35346853733062744, "logps/chosen": -2.0479795932769775, "logps/rejected": -2.253965377807617, "loss": 2.2972, "nll_loss": 2.237945318222046, "rewards/accuracies": 0.75, "rewards/chosen": -0.20479796826839447, "rewards/margins": 0.02059856988489628, "rewards/rejected": -0.2253965437412262, "step": 43 }, { "epoch": 0.06934594168636722, "grad_norm": 0.8619470000267029, "learning_rate": 1.3543307086614172e-06, "log_odds_chosen": 0.5962916612625122, "log_odds_ratio": -0.44894152879714966, "logits/chosen": -1.2180397510528564, "logits/rejected": -0.26432231068611145, "logps/chosen": -2.0331180095672607, "logps/rejected": -2.5704658031463623, "loss": 2.2728, "nll_loss": 2.2278692722320557, "rewards/accuracies": 1.0, "rewards/chosen": -0.20331181585788727, "rewards/margins": 0.053734779357910156, "rewards/rejected": -0.25704658031463623, "step": 44 }, { "epoch": 0.07092198581560284, "grad_norm": 0.7422481179237366, "learning_rate": 1.3858267716535433e-06, "log_odds_chosen": 0.6413304805755615, "log_odds_ratio": -0.43954819440841675, "logits/chosen": -1.012795329093933, "logits/rejected": -0.33944788575172424, "logps/chosen": -1.9753161668777466, "logps/rejected": -2.550520420074463, "loss": 2.2443, "nll_loss": 2.2003304958343506, "rewards/accuracies": 0.875, "rewards/chosen": -0.19753161072731018, "rewards/margins": 0.057520411908626556, "rewards/rejected": -0.25505203008651733, "step": 45 }, { "epoch": 0.07249802994483845, "grad_norm": 0.9379796981811523, "learning_rate": 1.417322834645669e-06, "log_odds_chosen": 0.6898038983345032, "log_odds_ratio": -0.4229612946510315, "logits/chosen": -1.2984787225723267, "logits/rejected": -0.6316989064216614, "logps/chosen": -2.0245230197906494, "logps/rejected": -2.6471643447875977, "loss": 2.2772, "nll_loss": 2.2349252700805664, "rewards/accuracies": 1.0, "rewards/chosen": -0.20245231688022614, "rewards/margins": 0.06226412206888199, "rewards/rejected": -0.2647164463996887, "step": 46 }, { "epoch": 0.07407407407407407, "grad_norm": 0.7300897240638733, "learning_rate": 1.4488188976377953e-06, "log_odds_chosen": 0.34559765458106995, "log_odds_ratio": -0.5484777688980103, "logits/chosen": -1.1782827377319336, "logits/rejected": -0.4611784517765045, "logps/chosen": -2.0315334796905518, "logps/rejected": -2.342702627182007, "loss": 2.2798, "nll_loss": 2.2249908447265625, "rewards/accuracies": 0.875, "rewards/chosen": -0.2031533569097519, "rewards/margins": 0.031116927042603493, "rewards/rejected": -0.23427024483680725, "step": 47 }, { "epoch": 0.07565011820330969, "grad_norm": 0.9040903449058533, "learning_rate": 1.4803149606299211e-06, "log_odds_chosen": 0.48583200573921204, "log_odds_ratio": -0.4895709156990051, "logits/chosen": -1.0726096630096436, "logits/rejected": -0.4579828083515167, "logps/chosen": -2.1052684783935547, "logps/rejected": -2.546491861343384, "loss": 2.3626, "nll_loss": 2.313596487045288, "rewards/accuracies": 1.0, "rewards/chosen": -0.21052685379981995, "rewards/margins": 0.04412233084440231, "rewards/rejected": -0.25464919209480286, "step": 48 }, { "epoch": 0.07722616233254531, "grad_norm": 0.7999246716499329, "learning_rate": 1.5118110236220472e-06, "log_odds_chosen": 0.586567759513855, "log_odds_ratio": -0.4494791030883789, "logits/chosen": -1.1561152935028076, "logits/rejected": -0.3217093050479889, "logps/chosen": -1.985177993774414, "logps/rejected": -2.5069286823272705, "loss": 2.2018, "nll_loss": 2.1568918228149414, "rewards/accuracies": 1.0, "rewards/chosen": -0.1985177993774414, "rewards/margins": 0.05217508226633072, "rewards/rejected": -0.25069287419319153, "step": 49 }, { "epoch": 0.07880220646178093, "grad_norm": 0.8259245753288269, "learning_rate": 1.543307086614173e-06, "log_odds_chosen": 0.36837247014045715, "log_odds_ratio": -0.5344362258911133, "logits/chosen": -1.125555157661438, "logits/rejected": -0.47867536544799805, "logps/chosen": -2.066678285598755, "logps/rejected": -2.398474931716919, "loss": 2.3174, "nll_loss": 2.26397967338562, "rewards/accuracies": 1.0, "rewards/chosen": -0.20666784048080444, "rewards/margins": 0.0331796295940876, "rewards/rejected": -0.23984746634960175, "step": 50 }, { "epoch": 0.08037825059101655, "grad_norm": 0.781063973903656, "learning_rate": 1.5748031496062992e-06, "log_odds_chosen": 0.6516568660736084, "log_odds_ratio": -0.42411038279533386, "logits/chosen": -1.0272550582885742, "logits/rejected": -0.3421846330165863, "logps/chosen": -1.9430203437805176, "logps/rejected": -2.520514488220215, "loss": 2.1832, "nll_loss": 2.1408329010009766, "rewards/accuracies": 1.0, "rewards/chosen": -0.1943020224571228, "rewards/margins": 0.057749420404434204, "rewards/rejected": -0.252051442861557, "step": 51 }, { "epoch": 0.08195429472025216, "grad_norm": 0.7726197242736816, "learning_rate": 1.6062992125984253e-06, "log_odds_chosen": 0.6141126751899719, "log_odds_ratio": -0.441342830657959, "logits/chosen": -1.0188478231430054, "logits/rejected": -0.29862481355667114, "logps/chosen": -2.097641706466675, "logps/rejected": -2.6549832820892334, "loss": 2.3284, "nll_loss": 2.2842369079589844, "rewards/accuracies": 1.0, "rewards/chosen": -0.20976416766643524, "rewards/margins": 0.055734165012836456, "rewards/rejected": -0.2654983401298523, "step": 52 }, { "epoch": 0.08353033884948778, "grad_norm": 0.8193818926811218, "learning_rate": 1.637795275590551e-06, "log_odds_chosen": 0.3866994082927704, "log_odds_ratio": -0.5209422707557678, "logits/chosen": -1.1183687448501587, "logits/rejected": -0.4406161308288574, "logps/chosen": -2.0367820262908936, "logps/rejected": -2.3801722526550293, "loss": 2.2824, "nll_loss": 2.2303147315979004, "rewards/accuracies": 1.0, "rewards/chosen": -0.2036782205104828, "rewards/margins": 0.03433902934193611, "rewards/rejected": -0.2380172461271286, "step": 53 }, { "epoch": 0.0851063829787234, "grad_norm": 0.7303316593170166, "learning_rate": 1.6692913385826771e-06, "log_odds_chosen": 0.3992087244987488, "log_odds_ratio": -0.5159623622894287, "logits/chosen": -1.1815376281738281, "logits/rejected": -0.397248238325119, "logps/chosen": -1.9723021984100342, "logps/rejected": -2.3256380558013916, "loss": 2.2264, "nll_loss": 2.1747708320617676, "rewards/accuracies": 1.0, "rewards/chosen": -0.19723021984100342, "rewards/margins": 0.035333603620529175, "rewards/rejected": -0.2325638383626938, "step": 54 }, { "epoch": 0.08668242710795902, "grad_norm": 0.8129874467849731, "learning_rate": 1.7007874015748031e-06, "log_odds_chosen": 0.47329652309417725, "log_odds_ratio": -0.4876212477684021, "logits/chosen": -1.096902847290039, "logits/rejected": -0.4683460593223572, "logps/chosen": -2.063098907470703, "logps/rejected": -2.4882240295410156, "loss": 2.3196, "nll_loss": 2.2708041667938232, "rewards/accuracies": 1.0, "rewards/chosen": -0.20630988478660583, "rewards/margins": 0.04251253604888916, "rewards/rejected": -0.248822420835495, "step": 55 }, { "epoch": 0.08825847123719464, "grad_norm": 0.911749541759491, "learning_rate": 1.7322834645669292e-06, "log_odds_chosen": 0.4285861551761627, "log_odds_ratio": -0.514061450958252, "logits/chosen": -1.0361069440841675, "logits/rejected": -0.3017573356628418, "logps/chosen": -2.170567512512207, "logps/rejected": -2.559295654296875, "loss": 2.4113, "nll_loss": 2.359891176223755, "rewards/accuracies": 0.875, "rewards/chosen": -0.2170567363500595, "rewards/margins": 0.03887280821800232, "rewards/rejected": -0.255929559469223, "step": 56 }, { "epoch": 0.08983451536643026, "grad_norm": 0.7290955185890198, "learning_rate": 1.763779527559055e-06, "log_odds_chosen": 0.44968611001968384, "log_odds_ratio": -0.4962458908557892, "logits/chosen": -1.018329381942749, "logits/rejected": -0.4052811563014984, "logps/chosen": -2.0387189388275146, "logps/rejected": -2.4409403800964355, "loss": 2.2741, "nll_loss": 2.2244579792022705, "rewards/accuracies": 1.0, "rewards/chosen": -0.20387189090251923, "rewards/margins": 0.040222153067588806, "rewards/rejected": -0.24409404397010803, "step": 57 }, { "epoch": 0.09141055949566587, "grad_norm": 0.769049346446991, "learning_rate": 1.795275590551181e-06, "log_odds_chosen": 0.4315187931060791, "log_odds_ratio": -0.5120344161987305, "logits/chosen": -1.0596637725830078, "logits/rejected": -0.5503028631210327, "logps/chosen": -1.9930483102798462, "logps/rejected": -2.3822011947631836, "loss": 2.2429, "nll_loss": 2.1917452812194824, "rewards/accuracies": 1.0, "rewards/chosen": -0.19930484890937805, "rewards/margins": 0.038915280252695084, "rewards/rejected": -0.23822011053562164, "step": 58 }, { "epoch": 0.09298660362490149, "grad_norm": 0.8504804968833923, "learning_rate": 1.826771653543307e-06, "log_odds_chosen": 0.7453770637512207, "log_odds_ratio": -0.39689797163009644, "logits/chosen": -0.9750216603279114, "logits/rejected": -0.22442440688610077, "logps/chosen": -2.101952075958252, "logps/rejected": -2.782055139541626, "loss": 2.3218, "nll_loss": 2.2820687294006348, "rewards/accuracies": 1.0, "rewards/chosen": -0.21019521355628967, "rewards/margins": 0.06801028549671173, "rewards/rejected": -0.2782055139541626, "step": 59 }, { "epoch": 0.09456264775413711, "grad_norm": 0.861810028553009, "learning_rate": 1.858267716535433e-06, "log_odds_chosen": 0.5790901184082031, "log_odds_ratio": -0.45646488666534424, "logits/chosen": -1.1360684633255005, "logits/rejected": -0.53347247838974, "logps/chosen": -2.0499930381774902, "logps/rejected": -2.5749099254608154, "loss": 2.2932, "nll_loss": 2.2475974559783936, "rewards/accuracies": 1.0, "rewards/chosen": -0.20499931275844574, "rewards/margins": 0.0524916872382164, "rewards/rejected": -0.25749099254608154, "step": 60 }, { "epoch": 0.09613869188337273, "grad_norm": 0.84038907289505, "learning_rate": 1.889763779527559e-06, "log_odds_chosen": 0.3383483290672302, "log_odds_ratio": -0.5470318794250488, "logits/chosen": -0.9991433620452881, "logits/rejected": -0.4732619822025299, "logps/chosen": -2.1595866680145264, "logps/rejected": -2.4666683673858643, "loss": 2.3946, "nll_loss": 2.3399457931518555, "rewards/accuracies": 0.75, "rewards/chosen": -0.21595866978168488, "rewards/margins": 0.03070816770195961, "rewards/rejected": -0.24666684865951538, "step": 61 }, { "epoch": 0.09771473601260836, "grad_norm": 1.0108712911605835, "learning_rate": 1.9212598425196847e-06, "log_odds_chosen": 0.37255755066871643, "log_odds_ratio": -0.5352697968482971, "logits/chosen": -1.1140410900115967, "logits/rejected": -0.4322108030319214, "logps/chosen": -2.160331964492798, "logps/rejected": -2.4993157386779785, "loss": 2.4019, "nll_loss": 2.3483951091766357, "rewards/accuracies": 0.75, "rewards/chosen": -0.2160331904888153, "rewards/margins": 0.03389836102724075, "rewards/rejected": -0.24993157386779785, "step": 62 }, { "epoch": 0.09929078014184398, "grad_norm": 0.7643742561340332, "learning_rate": 1.952755905511811e-06, "log_odds_chosen": 0.6516906023025513, "log_odds_ratio": -0.42522722482681274, "logits/chosen": -1.1658883094787598, "logits/rejected": -0.45858240127563477, "logps/chosen": -1.878890037536621, "logps/rejected": -2.449535608291626, "loss": 2.138, "nll_loss": 2.0955119132995605, "rewards/accuracies": 1.0, "rewards/chosen": -0.1878889948129654, "rewards/margins": 0.057064566761255264, "rewards/rejected": -0.24495357275009155, "step": 63 }, { "epoch": 0.1008668242710796, "grad_norm": 0.7522913813591003, "learning_rate": 1.9842519685039368e-06, "log_odds_chosen": 0.766040563583374, "log_odds_ratio": -0.397356241941452, "logits/chosen": -1.1226662397384644, "logits/rejected": -0.3395186960697174, "logps/chosen": -1.8949854373931885, "logps/rejected": -2.5772881507873535, "loss": 2.1507, "nll_loss": 2.110943555831909, "rewards/accuracies": 1.0, "rewards/chosen": -0.18949854373931885, "rewards/margins": 0.06823026388883591, "rewards/rejected": -0.25772881507873535, "step": 64 }, { "epoch": 0.1024428684003152, "grad_norm": 0.7364575266838074, "learning_rate": 2.015748031496063e-06, "log_odds_chosen": 0.6714511513710022, "log_odds_ratio": -0.4194810390472412, "logits/chosen": -1.0760542154312134, "logits/rejected": -0.5785982012748718, "logps/chosen": -1.9915119409561157, "logps/rejected": -2.5946145057678223, "loss": 2.2392, "nll_loss": 2.1972577571868896, "rewards/accuracies": 1.0, "rewards/chosen": -0.1991512030363083, "rewards/margins": 0.060310248285532, "rewards/rejected": -0.2594614326953888, "step": 65 }, { "epoch": 0.10401891252955082, "grad_norm": 0.7563380599021912, "learning_rate": 2.047244094488189e-06, "log_odds_chosen": 0.5152915716171265, "log_odds_ratio": -0.4739663004875183, "logits/chosen": -1.0727609395980835, "logits/rejected": -0.3997894525527954, "logps/chosen": -2.100337028503418, "logps/rejected": -2.5641911029815674, "loss": 2.353, "nll_loss": 2.305567741394043, "rewards/accuracies": 1.0, "rewards/chosen": -0.21003369987010956, "rewards/margins": 0.046385399997234344, "rewards/rejected": -0.2564191222190857, "step": 66 }, { "epoch": 0.10559495665878645, "grad_norm": 0.8397193551063538, "learning_rate": 2.0787401574803147e-06, "log_odds_chosen": 0.6061793565750122, "log_odds_ratio": -0.4402039349079132, "logits/chosen": -1.0465192794799805, "logits/rejected": -0.266944944858551, "logps/chosen": -2.039686441421509, "logps/rejected": -2.583190441131592, "loss": 2.2643, "nll_loss": 2.220245122909546, "rewards/accuracies": 1.0, "rewards/chosen": -0.20396864414215088, "rewards/margins": 0.054350391030311584, "rewards/rejected": -0.25831905007362366, "step": 67 }, { "epoch": 0.10717100078802207, "grad_norm": 0.8782281875610352, "learning_rate": 2.1102362204724405e-06, "log_odds_chosen": 0.4924987852573395, "log_odds_ratio": -0.4848763942718506, "logits/chosen": -1.1445033550262451, "logits/rejected": -0.40652817487716675, "logps/chosen": -2.0268895626068115, "logps/rejected": -2.46834397315979, "loss": 2.3111, "nll_loss": 2.2625656127929688, "rewards/accuracies": 1.0, "rewards/chosen": -0.20268897712230682, "rewards/margins": 0.04414542764425278, "rewards/rejected": -0.246834397315979, "step": 68 }, { "epoch": 0.10874704491725769, "grad_norm": 0.8901786208152771, "learning_rate": 2.141732283464567e-06, "log_odds_chosen": 0.28019726276397705, "log_odds_ratio": -0.572468638420105, "logits/chosen": -0.9071764945983887, "logits/rejected": -0.36978206038475037, "logps/chosen": -2.1059303283691406, "logps/rejected": -2.357771396636963, "loss": 2.3697, "nll_loss": 2.3124704360961914, "rewards/accuracies": 0.875, "rewards/chosen": -0.21059304475784302, "rewards/margins": 0.025184106081724167, "rewards/rejected": -0.2357771247625351, "step": 69 }, { "epoch": 0.11032308904649331, "grad_norm": 0.7347759008407593, "learning_rate": 2.173228346456693e-06, "log_odds_chosen": 0.5119097232818604, "log_odds_ratio": -0.47549861669540405, "logits/chosen": -0.9662876129150391, "logits/rejected": -0.3414289951324463, "logps/chosen": -2.018106460571289, "logps/rejected": -2.4745588302612305, "loss": 2.2778, "nll_loss": 2.230213165283203, "rewards/accuracies": 1.0, "rewards/chosen": -0.20181065797805786, "rewards/margins": 0.04564525932073593, "rewards/rejected": -0.247455894947052, "step": 70 }, { "epoch": 0.11189913317572892, "grad_norm": 0.8959264159202576, "learning_rate": 2.204724409448819e-06, "log_odds_chosen": 0.5562199354171753, "log_odds_ratio": -0.46300366520881653, "logits/chosen": -1.2209943532943726, "logits/rejected": -0.5177184343338013, "logps/chosen": -1.9901155233383179, "logps/rejected": -2.489285707473755, "loss": 2.2383, "nll_loss": 2.1919736862182617, "rewards/accuracies": 1.0, "rewards/chosen": -0.19901156425476074, "rewards/margins": 0.04991701990365982, "rewards/rejected": -0.24892857670783997, "step": 71 }, { "epoch": 0.11347517730496454, "grad_norm": 0.8339924216270447, "learning_rate": 2.2362204724409446e-06, "log_odds_chosen": 0.37862321734428406, "log_odds_ratio": -0.5375041961669922, "logits/chosen": -1.1112946271896362, "logits/rejected": -0.3363880217075348, "logps/chosen": -2.0221199989318848, "logps/rejected": -2.3621301651000977, "loss": 2.2684, "nll_loss": 2.2146549224853516, "rewards/accuracies": 0.875, "rewards/chosen": -0.20221197605133057, "rewards/margins": 0.03400101885199547, "rewards/rejected": -0.23621301352977753, "step": 72 }, { "epoch": 0.11505122143420016, "grad_norm": 0.7132512927055359, "learning_rate": 2.267716535433071e-06, "log_odds_chosen": 0.3846838176250458, "log_odds_ratio": -0.5223243236541748, "logits/chosen": -0.9467731714248657, "logits/rejected": -0.4224381446838379, "logps/chosen": -2.0288405418395996, "logps/rejected": -2.371960163116455, "loss": 2.2859, "nll_loss": 2.233703136444092, "rewards/accuracies": 1.0, "rewards/chosen": -0.20288404822349548, "rewards/margins": 0.03431195765733719, "rewards/rejected": -0.23719602823257446, "step": 73 }, { "epoch": 0.11662726556343578, "grad_norm": 0.8109890222549438, "learning_rate": 2.2992125984251967e-06, "log_odds_chosen": 0.49797457456588745, "log_odds_ratio": -0.4787079095840454, "logits/chosen": -1.117811918258667, "logits/rejected": -0.44548889994621277, "logps/chosen": -1.9113142490386963, "logps/rejected": -2.3477611541748047, "loss": 2.1852, "nll_loss": 2.1373038291931152, "rewards/accuracies": 1.0, "rewards/chosen": -0.19113142788410187, "rewards/margins": 0.04364468902349472, "rewards/rejected": -0.23477612435817719, "step": 74 }, { "epoch": 0.1182033096926714, "grad_norm": 0.7836765646934509, "learning_rate": 2.3307086614173225e-06, "log_odds_chosen": 0.5877864956855774, "log_odds_ratio": -0.4508923292160034, "logits/chosen": -1.0647002458572388, "logits/rejected": -0.5113080143928528, "logps/chosen": -1.9603990316390991, "logps/rejected": -2.4868850708007812, "loss": 2.207, "nll_loss": 2.1619343757629395, "rewards/accuracies": 1.0, "rewards/chosen": -0.19603991508483887, "rewards/margins": 0.052648596465587616, "rewards/rejected": -0.2486885040998459, "step": 75 }, { "epoch": 0.11977935382190702, "grad_norm": 0.7919722199440002, "learning_rate": 2.3622047244094483e-06, "log_odds_chosen": 0.6009241342544556, "log_odds_ratio": -0.4470832049846649, "logits/chosen": -0.9855466485023499, "logits/rejected": -0.2876882553100586, "logps/chosen": -2.008293390274048, "logps/rejected": -2.5480880737304688, "loss": 2.2372, "nll_loss": 2.192521333694458, "rewards/accuracies": 1.0, "rewards/chosen": -0.20082935690879822, "rewards/margins": 0.053979478776454926, "rewards/rejected": -0.25480881333351135, "step": 76 }, { "epoch": 0.12135539795114263, "grad_norm": 0.8516606688499451, "learning_rate": 2.393700787401575e-06, "log_odds_chosen": 0.5337265729904175, "log_odds_ratio": -0.47431042790412903, "logits/chosen": -1.0908458232879639, "logits/rejected": -0.4919825494289398, "logps/chosen": -2.0387442111968994, "logps/rejected": -2.518265724182129, "loss": 2.2933, "nll_loss": 2.245901346206665, "rewards/accuracies": 1.0, "rewards/chosen": -0.20387442409992218, "rewards/margins": 0.04795214533805847, "rewards/rejected": -0.25182655453681946, "step": 77 }, { "epoch": 0.12293144208037825, "grad_norm": 0.7652468085289001, "learning_rate": 2.425196850393701e-06, "log_odds_chosen": 0.4612125754356384, "log_odds_ratio": -0.49879589676856995, "logits/chosen": -0.9081002473831177, "logits/rejected": -0.6156851053237915, "logps/chosen": -2.115405559539795, "logps/rejected": -2.532501697540283, "loss": 2.3363, "nll_loss": 2.28646183013916, "rewards/accuracies": 0.875, "rewards/chosen": -0.2115405648946762, "rewards/margins": 0.04170961678028107, "rewards/rejected": -0.2532501816749573, "step": 78 }, { "epoch": 0.12450748620961387, "grad_norm": 0.7574028372764587, "learning_rate": 2.4566929133858266e-06, "log_odds_chosen": 0.5914417505264282, "log_odds_ratio": -0.45016127824783325, "logits/chosen": -0.937514066696167, "logits/rejected": -0.5195568799972534, "logps/chosen": -2.0114662647247314, "logps/rejected": -2.5420799255371094, "loss": 2.2491, "nll_loss": 2.2040576934814453, "rewards/accuracies": 1.0, "rewards/chosen": -0.20114661753177643, "rewards/margins": 0.05306137725710869, "rewards/rejected": -0.2542080283164978, "step": 79 }, { "epoch": 0.12608353033884948, "grad_norm": 0.7048937678337097, "learning_rate": 2.488188976377953e-06, "log_odds_chosen": 0.5971450209617615, "log_odds_ratio": -0.443267822265625, "logits/chosen": -1.0094927549362183, "logits/rejected": -0.4586561918258667, "logps/chosen": -1.9250359535217285, "logps/rejected": -2.4535298347473145, "loss": 2.1666, "nll_loss": 2.122244358062744, "rewards/accuracies": 1.0, "rewards/chosen": -0.19250361621379852, "rewards/margins": 0.05284937843680382, "rewards/rejected": -0.24535296857357025, "step": 80 }, { "epoch": 0.1276595744680851, "grad_norm": 0.66707843542099, "learning_rate": 2.5196850393700787e-06, "log_odds_chosen": 0.43576544523239136, "log_odds_ratio": -0.5106025338172913, "logits/chosen": -0.8745508193969727, "logits/rejected": -0.5163943767547607, "logps/chosen": -2.004204273223877, "logps/rejected": -2.396484851837158, "loss": 2.2337, "nll_loss": 2.182626724243164, "rewards/accuracies": 1.0, "rewards/chosen": -0.20042043924331665, "rewards/margins": 0.039228055626153946, "rewards/rejected": -0.2396484911441803, "step": 81 }, { "epoch": 0.12923561859732072, "grad_norm": 0.6626860499382019, "learning_rate": 2.5511811023622045e-06, "log_odds_chosen": 0.6387014985084534, "log_odds_ratio": -0.4447665214538574, "logits/chosen": -0.8805458545684814, "logits/rejected": -0.41358864307403564, "logps/chosen": -1.9391623735427856, "logps/rejected": -2.517059803009033, "loss": 2.1816, "nll_loss": 2.137143611907959, "rewards/accuracies": 1.0, "rewards/chosen": -0.19391624629497528, "rewards/margins": 0.05778975784778595, "rewards/rejected": -0.25170600414276123, "step": 82 }, { "epoch": 0.13081166272655634, "grad_norm": 0.7182599306106567, "learning_rate": 2.5826771653543303e-06, "log_odds_chosen": 0.43168193101882935, "log_odds_ratio": -0.5112774968147278, "logits/chosen": -0.8949125409126282, "logits/rejected": -0.4265552759170532, "logps/chosen": -1.9767383337020874, "logps/rejected": -2.3620519638061523, "loss": 2.2121, "nll_loss": 2.160935401916504, "rewards/accuracies": 1.0, "rewards/chosen": -0.19767382740974426, "rewards/margins": 0.038531385362148285, "rewards/rejected": -0.23620523512363434, "step": 83 }, { "epoch": 0.13238770685579196, "grad_norm": 0.7394052743911743, "learning_rate": 2.6141732283464566e-06, "log_odds_chosen": 0.6156221628189087, "log_odds_ratio": -0.449825644493103, "logits/chosen": -0.9797751903533936, "logits/rejected": -0.4194489121437073, "logps/chosen": -1.9231019020080566, "logps/rejected": -2.4722418785095215, "loss": 2.1964, "nll_loss": 2.1514475345611572, "rewards/accuracies": 1.0, "rewards/chosen": -0.19231019914150238, "rewards/margins": 0.054913993924856186, "rewards/rejected": -0.24722418189048767, "step": 84 }, { "epoch": 0.13396375098502758, "grad_norm": 0.6888207197189331, "learning_rate": 2.645669291338583e-06, "log_odds_chosen": 0.5399656295776367, "log_odds_ratio": -0.4604187309741974, "logits/chosen": -0.813186526298523, "logits/rejected": -0.4043978750705719, "logps/chosen": -1.9997724294662476, "logps/rejected": -2.4816389083862305, "loss": 2.2291, "nll_loss": 2.1830945014953613, "rewards/accuracies": 1.0, "rewards/chosen": -0.19997724890708923, "rewards/margins": 0.04818664863705635, "rewards/rejected": -0.24816389381885529, "step": 85 }, { "epoch": 0.1355397951142632, "grad_norm": 0.7315590381622314, "learning_rate": 2.6771653543307086e-06, "log_odds_chosen": 0.5595324039459229, "log_odds_ratio": -0.45697805285453796, "logits/chosen": -1.0269588232040405, "logits/rejected": -0.3898090124130249, "logps/chosen": -1.973376989364624, "logps/rejected": -2.4706506729125977, "loss": 2.216, "nll_loss": 2.1702566146850586, "rewards/accuracies": 1.0, "rewards/chosen": -0.19733770191669464, "rewards/margins": 0.04972738027572632, "rewards/rejected": -0.24706508219242096, "step": 86 }, { "epoch": 0.13711583924349882, "grad_norm": 0.672721803188324, "learning_rate": 2.7086614173228344e-06, "log_odds_chosen": 0.38126063346862793, "log_odds_ratio": -0.5237147212028503, "logits/chosen": -0.8164312243461609, "logits/rejected": -0.5732689499855042, "logps/chosen": -1.9951059818267822, "logps/rejected": -2.3318116664886475, "loss": 2.2279, "nll_loss": 2.175508737564087, "rewards/accuracies": 1.0, "rewards/chosen": -0.1995105892419815, "rewards/margins": 0.033670589327812195, "rewards/rejected": -0.2331811785697937, "step": 87 }, { "epoch": 0.13869188337273444, "grad_norm": 0.7415196895599365, "learning_rate": 2.7401574803149607e-06, "log_odds_chosen": 0.6299477815628052, "log_odds_ratio": -0.44362398982048035, "logits/chosen": -1.0215085744857788, "logits/rejected": -0.34737205505371094, "logps/chosen": -1.8929115533828735, "logps/rejected": -2.453705310821533, "loss": 2.1671, "nll_loss": 2.1227200031280518, "rewards/accuracies": 0.875, "rewards/chosen": -0.18929119408130646, "rewards/margins": 0.056079376488924026, "rewards/rejected": -0.2453705370426178, "step": 88 }, { "epoch": 0.14026792750197006, "grad_norm": 0.7291280627250671, "learning_rate": 2.7716535433070865e-06, "log_odds_chosen": 0.5796247124671936, "log_odds_ratio": -0.4462001621723175, "logits/chosen": -0.8788402676582336, "logits/rejected": -0.38318932056427, "logps/chosen": -1.9982614517211914, "logps/rejected": -2.5162978172302246, "loss": 2.2492, "nll_loss": 2.2046027183532715, "rewards/accuracies": 1.0, "rewards/chosen": -0.19982615113258362, "rewards/margins": 0.05180366337299347, "rewards/rejected": -0.2516297996044159, "step": 89 }, { "epoch": 0.14184397163120568, "grad_norm": 0.7039697170257568, "learning_rate": 2.8031496062992123e-06, "log_odds_chosen": 0.4399077296257019, "log_odds_ratio": -0.5028520822525024, "logits/chosen": -0.80506831407547, "logits/rejected": -0.3771530091762543, "logps/chosen": -2.065699577331543, "logps/rejected": -2.4619250297546387, "loss": 2.2826, "nll_loss": 2.232358932495117, "rewards/accuracies": 1.0, "rewards/chosen": -0.20656995475292206, "rewards/margins": 0.03962254151701927, "rewards/rejected": -0.24619249999523163, "step": 90 }, { "epoch": 0.1434200157604413, "grad_norm": 0.6275699138641357, "learning_rate": 2.834645669291338e-06, "log_odds_chosen": 0.49364161491394043, "log_odds_ratio": -0.48472219705581665, "logits/chosen": -0.8276112675666809, "logits/rejected": -0.5154532790184021, "logps/chosen": -1.857814908027649, "logps/rejected": -2.2916698455810547, "loss": 2.0939, "nll_loss": 2.0454044342041016, "rewards/accuracies": 1.0, "rewards/chosen": -0.18578149378299713, "rewards/margins": 0.04338550567626953, "rewards/rejected": -0.22916698455810547, "step": 91 }, { "epoch": 0.1449960598896769, "grad_norm": 0.6308448314666748, "learning_rate": 2.8661417322834644e-06, "log_odds_chosen": 0.5374231934547424, "log_odds_ratio": -0.4650093913078308, "logits/chosen": -0.8458312153816223, "logits/rejected": -0.35253289341926575, "logps/chosen": -1.8836688995361328, "logps/rejected": -2.357243776321411, "loss": 2.136, "nll_loss": 2.0894808769226074, "rewards/accuracies": 1.0, "rewards/chosen": -0.18836688995361328, "rewards/margins": 0.04735749959945679, "rewards/rejected": -0.23572437465190887, "step": 92 }, { "epoch": 0.14657210401891252, "grad_norm": 0.7316494584083557, "learning_rate": 2.8976377952755906e-06, "log_odds_chosen": 0.5097925662994385, "log_odds_ratio": -0.49444442987442017, "logits/chosen": -0.8357096910476685, "logits/rejected": -0.41637447476387024, "logps/chosen": -1.9938026666641235, "logps/rejected": -2.4533045291900635, "loss": 2.2321, "nll_loss": 2.1826608180999756, "rewards/accuracies": 0.75, "rewards/chosen": -0.1993802785873413, "rewards/margins": 0.045950211584568024, "rewards/rejected": -0.24533049762248993, "step": 93 }, { "epoch": 0.14814814814814814, "grad_norm": 0.639308750629425, "learning_rate": 2.9291338582677165e-06, "log_odds_chosen": 0.49024736881256104, "log_odds_ratio": -0.4883335828781128, "logits/chosen": -0.8006777763366699, "logits/rejected": -0.49406468868255615, "logps/chosen": -1.9806185960769653, "logps/rejected": -2.414760112762451, "loss": 2.2086, "nll_loss": 2.1598000526428223, "rewards/accuracies": 0.875, "rewards/chosen": -0.19806188344955444, "rewards/margins": 0.043414145708084106, "rewards/rejected": -0.24147599935531616, "step": 94 }, { "epoch": 0.14972419227738376, "grad_norm": 0.7330454587936401, "learning_rate": 2.9606299212598423e-06, "log_odds_chosen": 0.6175916790962219, "log_odds_ratio": -0.44003960490226746, "logits/chosen": -0.8804186582565308, "logits/rejected": -0.3671495020389557, "logps/chosen": -1.9839096069335938, "logps/rejected": -2.5373644828796387, "loss": 2.225, "nll_loss": 2.1809871196746826, "rewards/accuracies": 1.0, "rewards/chosen": -0.19839096069335938, "rewards/margins": 0.05534551292657852, "rewards/rejected": -0.2537364363670349, "step": 95 }, { "epoch": 0.15130023640661938, "grad_norm": 0.6999992728233337, "learning_rate": 2.9921259842519685e-06, "log_odds_chosen": 0.5814043283462524, "log_odds_ratio": -0.44918328523635864, "logits/chosen": -0.8479225635528564, "logits/rejected": -0.4675160050392151, "logps/chosen": -1.991041660308838, "logps/rejected": -2.510003089904785, "loss": 2.2023, "nll_loss": 2.1574201583862305, "rewards/accuracies": 1.0, "rewards/chosen": -0.1991041898727417, "rewards/margins": 0.051896125078201294, "rewards/rejected": -0.2510002851486206, "step": 96 }, { "epoch": 0.152876280535855, "grad_norm": 0.6133814454078674, "learning_rate": 3.0236220472440943e-06, "log_odds_chosen": 0.4028412103652954, "log_odds_ratio": -0.5129313468933105, "logits/chosen": -0.8440722227096558, "logits/rejected": -0.4072326421737671, "logps/chosen": -1.9427242279052734, "logps/rejected": -2.2954676151275635, "loss": 2.1638, "nll_loss": 2.112459897994995, "rewards/accuracies": 1.0, "rewards/chosen": -0.19427242875099182, "rewards/margins": 0.03527434170246124, "rewards/rejected": -0.22954675555229187, "step": 97 }, { "epoch": 0.15445232466509062, "grad_norm": 0.651321291923523, "learning_rate": 3.05511811023622e-06, "log_odds_chosen": 0.42689526081085205, "log_odds_ratio": -0.5087873935699463, "logits/chosen": -0.7033334374427795, "logits/rejected": -0.442612886428833, "logps/chosen": -2.056950092315674, "logps/rejected": -2.4395601749420166, "loss": 2.2954, "nll_loss": 2.2444937229156494, "rewards/accuracies": 1.0, "rewards/chosen": -0.2056950181722641, "rewards/margins": 0.038261011242866516, "rewards/rejected": -0.24395602941513062, "step": 98 }, { "epoch": 0.15602836879432624, "grad_norm": 0.7059087157249451, "learning_rate": 3.086614173228346e-06, "log_odds_chosen": 0.29518887400627136, "log_odds_ratio": -0.5591641068458557, "logits/chosen": -0.9677470922470093, "logits/rejected": -0.5977285504341125, "logps/chosen": -1.9788846969604492, "logps/rejected": -2.2380669116973877, "loss": 2.2282, "nll_loss": 2.1722562313079834, "rewards/accuracies": 1.0, "rewards/chosen": -0.19788846373558044, "rewards/margins": 0.025918252766132355, "rewards/rejected": -0.2238067388534546, "step": 99 }, { "epoch": 0.15760441292356187, "grad_norm": 0.6437969207763672, "learning_rate": 3.1181102362204722e-06, "log_odds_chosen": 0.41778114438056946, "log_odds_ratio": -0.5134991407394409, "logits/chosen": -0.8367648124694824, "logits/rejected": -0.4996865689754486, "logps/chosen": -1.8884031772613525, "logps/rejected": -2.258099317550659, "loss": 2.1384, "nll_loss": 2.0870912075042725, "rewards/accuracies": 1.0, "rewards/chosen": -0.18884031474590302, "rewards/margins": 0.036969613283872604, "rewards/rejected": -0.2258099466562271, "step": 100 }, { "epoch": 0.15918045705279749, "grad_norm": 0.5721230506896973, "learning_rate": 3.1496062992125985e-06, "log_odds_chosen": 0.5020022392272949, "log_odds_ratio": -0.4791727066040039, "logits/chosen": -0.7251911163330078, "logits/rejected": -0.40484821796417236, "logps/chosen": -1.8599562644958496, "logps/rejected": -2.3015549182891846, "loss": 2.1179, "nll_loss": 2.069948673248291, "rewards/accuracies": 1.0, "rewards/chosen": -0.18599562346935272, "rewards/margins": 0.044159889221191406, "rewards/rejected": -0.23015552759170532, "step": 101 }, { "epoch": 0.1607565011820331, "grad_norm": 0.5419020652770996, "learning_rate": 3.1811023622047243e-06, "log_odds_chosen": 0.4814803898334503, "log_odds_ratio": -0.49633079767227173, "logits/chosen": -0.7592243552207947, "logits/rejected": -0.513361930847168, "logps/chosen": -1.8242498636245728, "logps/rejected": -2.2473723888397217, "loss": 2.0742, "nll_loss": 2.024540662765503, "rewards/accuracies": 1.0, "rewards/chosen": -0.18242499232292175, "rewards/margins": 0.04231225699186325, "rewards/rejected": -0.2247372418642044, "step": 102 }, { "epoch": 0.16233254531126873, "grad_norm": 0.53934645652771, "learning_rate": 3.2125984251968505e-06, "log_odds_chosen": 0.5330245494842529, "log_odds_ratio": -0.4746904671192169, "logits/chosen": -0.6480450630187988, "logits/rejected": -0.27741724252700806, "logps/chosen": -1.8202755451202393, "logps/rejected": -2.283982992172241, "loss": 2.0747, "nll_loss": 2.0272209644317627, "rewards/accuracies": 0.875, "rewards/chosen": -0.18202754855155945, "rewards/margins": 0.046370729804039, "rewards/rejected": -0.22839829325675964, "step": 103 }, { "epoch": 0.16390858944050432, "grad_norm": 0.6298537254333496, "learning_rate": 3.2440944881889763e-06, "log_odds_chosen": 0.5380266904830933, "log_odds_ratio": -0.4674755930900574, "logits/chosen": -0.6194661855697632, "logits/rejected": -0.28310033679008484, "logps/chosen": -1.9528425931930542, "logps/rejected": -2.4316210746765137, "loss": 2.1984, "nll_loss": 2.1516404151916504, "rewards/accuracies": 1.0, "rewards/chosen": -0.19528424739837646, "rewards/margins": 0.047877877950668335, "rewards/rejected": -0.2431621104478836, "step": 104 }, { "epoch": 0.16548463356973994, "grad_norm": 0.5288484692573547, "learning_rate": 3.275590551181102e-06, "log_odds_chosen": 0.5468251705169678, "log_odds_ratio": -0.4628355801105499, "logits/chosen": -0.7265529036521912, "logits/rejected": -0.3702164590358734, "logps/chosen": -1.8432681560516357, "logps/rejected": -2.3218398094177246, "loss": 2.1017, "nll_loss": 2.0554370880126953, "rewards/accuracies": 1.0, "rewards/chosen": -0.18432679772377014, "rewards/margins": 0.04785720631480217, "rewards/rejected": -0.2321840077638626, "step": 105 }, { "epoch": 0.16706067769897556, "grad_norm": 0.5358848571777344, "learning_rate": 3.307086614173228e-06, "log_odds_chosen": 0.4864036440849304, "log_odds_ratio": -0.4927322268486023, "logits/chosen": -0.6903339624404907, "logits/rejected": -0.27819010615348816, "logps/chosen": -1.8541500568389893, "logps/rejected": -2.2708048820495605, "loss": 2.0971, "nll_loss": 2.0478692054748535, "rewards/accuracies": 0.875, "rewards/chosen": -0.18541499972343445, "rewards/margins": 0.04166547954082489, "rewards/rejected": -0.22708047926425934, "step": 106 }, { "epoch": 0.16863672182821118, "grad_norm": 0.5739277601242065, "learning_rate": 3.3385826771653542e-06, "log_odds_chosen": 0.48060721158981323, "log_odds_ratio": -0.4924314022064209, "logits/chosen": -0.6585574746131897, "logits/rejected": -0.4523526728153229, "logps/chosen": -1.9171538352966309, "logps/rejected": -2.3419744968414307, "loss": 2.1566, "nll_loss": 2.1073999404907227, "rewards/accuracies": 0.875, "rewards/chosen": -0.19171538949012756, "rewards/margins": 0.04248207062482834, "rewards/rejected": -0.2341974675655365, "step": 107 }, { "epoch": 0.1702127659574468, "grad_norm": 0.511557936668396, "learning_rate": 3.37007874015748e-06, "log_odds_chosen": 0.365371972322464, "log_odds_ratio": -0.5382389426231384, "logits/chosen": -0.605251669883728, "logits/rejected": -0.4923926591873169, "logps/chosen": -1.8904098272323608, "logps/rejected": -2.2083511352539062, "loss": 2.132, "nll_loss": 2.0781614780426025, "rewards/accuracies": 0.875, "rewards/chosen": -0.18904098868370056, "rewards/margins": 0.03179413452744484, "rewards/rejected": -0.2208351194858551, "step": 108 }, { "epoch": 0.17178881008668243, "grad_norm": 0.5645051002502441, "learning_rate": 3.4015748031496063e-06, "log_odds_chosen": 0.5187886953353882, "log_odds_ratio": -0.47012859582901, "logits/chosen": -0.5434896945953369, "logits/rejected": -0.47223836183547974, "logps/chosen": -1.965927004814148, "logps/rejected": -2.424842596054077, "loss": 2.1994, "nll_loss": 2.152377128601074, "rewards/accuracies": 1.0, "rewards/chosen": -0.19659270346164703, "rewards/margins": 0.04589153826236725, "rewards/rejected": -0.24248425662517548, "step": 109 }, { "epoch": 0.17336485421591805, "grad_norm": 0.5361037254333496, "learning_rate": 3.433070866141732e-06, "log_odds_chosen": 0.5615408420562744, "log_odds_ratio": -0.4578676223754883, "logits/chosen": -0.6213514804840088, "logits/rejected": -0.4045155346393585, "logps/chosen": -1.8413625955581665, "logps/rejected": -2.3273088932037354, "loss": 2.0554, "nll_loss": 2.009617805480957, "rewards/accuracies": 1.0, "rewards/chosen": -0.1841362714767456, "rewards/margins": 0.04859462380409241, "rewards/rejected": -0.23273088037967682, "step": 110 }, { "epoch": 0.17494089834515367, "grad_norm": 0.591759443283081, "learning_rate": 3.4645669291338583e-06, "log_odds_chosen": 0.5804085731506348, "log_odds_ratio": -0.4556342363357544, "logits/chosen": -0.6123775839805603, "logits/rejected": -0.4591195583343506, "logps/chosen": -1.9501187801361084, "logps/rejected": -2.4626777172088623, "loss": 2.1912, "nll_loss": 2.145684242248535, "rewards/accuracies": 1.0, "rewards/chosen": -0.19501186907291412, "rewards/margins": 0.051255881786346436, "rewards/rejected": -0.24626776576042175, "step": 111 }, { "epoch": 0.1765169424743893, "grad_norm": 0.5324497222900391, "learning_rate": 3.496062992125984e-06, "log_odds_chosen": 0.4026699662208557, "log_odds_ratio": -0.5162889957427979, "logits/chosen": -0.5372949838638306, "logits/rejected": -0.2981308400630951, "logps/chosen": -1.9884939193725586, "logps/rejected": -2.3435468673706055, "loss": 2.2198, "nll_loss": 2.16818904876709, "rewards/accuracies": 1.0, "rewards/chosen": -0.1988493949174881, "rewards/margins": 0.03550529479980469, "rewards/rejected": -0.23435468971729279, "step": 112 }, { "epoch": 0.1780929866036249, "grad_norm": 0.553754448890686, "learning_rate": 3.52755905511811e-06, "log_odds_chosen": 0.40890055894851685, "log_odds_ratio": -0.5179369449615479, "logits/chosen": -0.7111020088195801, "logits/rejected": -0.3697856664657593, "logps/chosen": -1.8428016901016235, "logps/rejected": -2.200305700302124, "loss": 2.0924, "nll_loss": 2.040616512298584, "rewards/accuracies": 1.0, "rewards/chosen": -0.1842801570892334, "rewards/margins": 0.03575042262673378, "rewards/rejected": -0.22003059089183807, "step": 113 }, { "epoch": 0.17966903073286053, "grad_norm": 0.5356653332710266, "learning_rate": 3.559055118110236e-06, "log_odds_chosen": 0.3291313946247101, "log_odds_ratio": -0.5585582852363586, "logits/chosen": -0.4512363076210022, "logits/rejected": -0.43510857224464417, "logps/chosen": -1.9846851825714111, "logps/rejected": -2.2790331840515137, "loss": 2.2259, "nll_loss": 2.1699955463409424, "rewards/accuracies": 0.875, "rewards/chosen": -0.19846852123737335, "rewards/margins": 0.02943480759859085, "rewards/rejected": -0.2279033213853836, "step": 114 }, { "epoch": 0.18124507486209615, "grad_norm": 0.5100815296173096, "learning_rate": 3.590551181102362e-06, "log_odds_chosen": 0.49927371740341187, "log_odds_ratio": -0.48482194542884827, "logits/chosen": -0.5574390888214111, "logits/rejected": -0.5628021955490112, "logps/chosen": -1.8782962560653687, "logps/rejected": -2.317220687866211, "loss": 2.11, "nll_loss": 2.0615200996398926, "rewards/accuracies": 0.875, "rewards/chosen": -0.1878296285867691, "rewards/margins": 0.04389244690537453, "rewards/rejected": -0.23172207176685333, "step": 115 }, { "epoch": 0.18282111899133174, "grad_norm": 0.5005074739456177, "learning_rate": 3.622047244094488e-06, "log_odds_chosen": 0.39984914660453796, "log_odds_ratio": -0.5320521593093872, "logits/chosen": -0.5708422660827637, "logits/rejected": -0.3728347420692444, "logps/chosen": -1.935167670249939, "logps/rejected": -2.2881405353546143, "loss": 2.1566, "nll_loss": 2.103365898132324, "rewards/accuracies": 0.75, "rewards/chosen": -0.19351676106452942, "rewards/margins": 0.035297293215990067, "rewards/rejected": -0.2288140505552292, "step": 116 }, { "epoch": 0.18439716312056736, "grad_norm": 0.5071951746940613, "learning_rate": 3.653543307086614e-06, "log_odds_chosen": 0.5789820551872253, "log_odds_ratio": -0.46050727367401123, "logits/chosen": -0.55865877866745, "logits/rejected": -0.624116837978363, "logps/chosen": -1.8576948642730713, "logps/rejected": -2.3663711547851562, "loss": 2.1024, "nll_loss": 2.0563366413116455, "rewards/accuracies": 0.875, "rewards/chosen": -0.18576949834823608, "rewards/margins": 0.05086761713027954, "rewards/rejected": -0.23663711547851562, "step": 117 }, { "epoch": 0.18597320724980299, "grad_norm": 0.5110164880752563, "learning_rate": 3.68503937007874e-06, "log_odds_chosen": 0.3938359022140503, "log_odds_ratio": -0.5259670615196228, "logits/chosen": -0.4433292746543884, "logits/rejected": -0.4191721975803375, "logps/chosen": -1.8846337795257568, "logps/rejected": -2.2300126552581787, "loss": 2.1262, "nll_loss": 2.0735833644866943, "rewards/accuracies": 0.875, "rewards/chosen": -0.18846337497234344, "rewards/margins": 0.03453788906335831, "rewards/rejected": -0.22300127148628235, "step": 118 }, { "epoch": 0.1875492513790386, "grad_norm": 0.5024784803390503, "learning_rate": 3.716535433070866e-06, "log_odds_chosen": 0.33231812715530396, "log_odds_ratio": -0.5509342551231384, "logits/chosen": -0.4613322913646698, "logits/rejected": -0.3955138325691223, "logps/chosen": -1.8655431270599365, "logps/rejected": -2.155794858932495, "loss": 2.1247, "nll_loss": 2.0695743560791016, "rewards/accuracies": 0.875, "rewards/chosen": -0.18655429780483246, "rewards/margins": 0.029025178402662277, "rewards/rejected": -0.21557949483394623, "step": 119 }, { "epoch": 0.18912529550827423, "grad_norm": 0.47591114044189453, "learning_rate": 3.748031496062992e-06, "log_odds_chosen": 0.6754725575447083, "log_odds_ratio": -0.42014336585998535, "logits/chosen": -0.5019458532333374, "logits/rejected": -0.39064449071884155, "logps/chosen": -1.7608760595321655, "logps/rejected": -2.3479392528533936, "loss": 1.9898, "nll_loss": 1.9478257894515991, "rewards/accuracies": 1.0, "rewards/chosen": -0.1760876327753067, "rewards/margins": 0.05870632082223892, "rewards/rejected": -0.23479396104812622, "step": 120 }, { "epoch": 0.19070133963750985, "grad_norm": 0.4543094336986542, "learning_rate": 3.779527559055118e-06, "log_odds_chosen": 0.6511515378952026, "log_odds_ratio": -0.42243996262550354, "logits/chosen": -0.5351437926292419, "logits/rejected": -0.5634927153587341, "logps/chosen": -1.8719269037246704, "logps/rejected": -2.4463436603546143, "loss": 2.0912, "nll_loss": 2.0489981174468994, "rewards/accuracies": 1.0, "rewards/chosen": -0.18719267845153809, "rewards/margins": 0.057441674172878265, "rewards/rejected": -0.24463436007499695, "step": 121 }, { "epoch": 0.19227738376674547, "grad_norm": 0.4782586395740509, "learning_rate": 3.8110236220472436e-06, "log_odds_chosen": 0.5679782032966614, "log_odds_ratio": -0.46660980582237244, "logits/chosen": -0.582790195941925, "logits/rejected": -0.4242536127567291, "logps/chosen": -1.790369987487793, "logps/rejected": -2.2882192134857178, "loss": 2.0132, "nll_loss": 1.9665888547897339, "rewards/accuracies": 0.875, "rewards/chosen": -0.17903698980808258, "rewards/margins": 0.04978492856025696, "rewards/rejected": -0.22882193326950073, "step": 122 }, { "epoch": 0.1938534278959811, "grad_norm": 0.5209844708442688, "learning_rate": 3.8425196850393695e-06, "log_odds_chosen": 0.43676841259002686, "log_odds_ratio": -0.5001434087753296, "logits/chosen": -0.41858866810798645, "logits/rejected": -0.46535009145736694, "logps/chosen": -1.8954147100448608, "logps/rejected": -2.27744197845459, "loss": 2.1337, "nll_loss": 2.0836901664733887, "rewards/accuracies": 1.0, "rewards/chosen": -0.18954147398471832, "rewards/margins": 0.038202736526727676, "rewards/rejected": -0.2277442067861557, "step": 123 }, { "epoch": 0.1954294720252167, "grad_norm": 0.43751877546310425, "learning_rate": 3.874015748031496e-06, "log_odds_chosen": 0.5021814703941345, "log_odds_ratio": -0.48468679189682007, "logits/chosen": -0.5214758515357971, "logits/rejected": -0.42654621601104736, "logps/chosen": -1.7678717374801636, "logps/rejected": -2.193263292312622, "loss": 1.9906, "nll_loss": 1.9421806335449219, "rewards/accuracies": 1.0, "rewards/chosen": -0.17678718268871307, "rewards/margins": 0.04253914952278137, "rewards/rejected": -0.21932633221149445, "step": 124 }, { "epoch": 0.19700551615445233, "grad_norm": 0.46247461438179016, "learning_rate": 3.905511811023622e-06, "log_odds_chosen": 0.4802154302597046, "log_odds_ratio": -0.48964813351631165, "logits/chosen": -0.49467337131500244, "logits/rejected": -0.5816658735275269, "logps/chosen": -1.8051879405975342, "logps/rejected": -2.221101760864258, "loss": 2.0493, "nll_loss": 2.0002968311309814, "rewards/accuracies": 1.0, "rewards/chosen": -0.18051880598068237, "rewards/margins": 0.04159136861562729, "rewards/rejected": -0.22211016714572906, "step": 125 }, { "epoch": 0.19858156028368795, "grad_norm": 0.5007897615432739, "learning_rate": 3.937007874015748e-06, "log_odds_chosen": 0.6812261939048767, "log_odds_ratio": -0.4165576696395874, "logits/chosen": -0.5011131167411804, "logits/rejected": -0.3981563448905945, "logps/chosen": -1.824156403541565, "logps/rejected": -2.42551589012146, "loss": 2.0473, "nll_loss": 2.0055999755859375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1824156492948532, "rewards/margins": 0.06013592332601547, "rewards/rejected": -0.24255158007144928, "step": 126 }, { "epoch": 0.20015760441292357, "grad_norm": 0.5308762192726135, "learning_rate": 3.9685039370078736e-06, "log_odds_chosen": 0.5868005752563477, "log_odds_ratio": -0.4556260108947754, "logits/chosen": -0.47778087854385376, "logits/rejected": -0.39855217933654785, "logps/chosen": -1.8529423475265503, "logps/rejected": -2.371786594390869, "loss": 2.1134, "nll_loss": 2.0678582191467285, "rewards/accuracies": 1.0, "rewards/chosen": -0.1852942407131195, "rewards/margins": 0.05188443511724472, "rewards/rejected": -0.23717869818210602, "step": 127 }, { "epoch": 0.2017336485421592, "grad_norm": 0.4748990535736084, "learning_rate": 4e-06, "log_odds_chosen": 0.36945709586143494, "log_odds_ratio": -0.529854953289032, "logits/chosen": -0.31703251600265503, "logits/rejected": -0.261596143245697, "logps/chosen": -1.9309293031692505, "logps/rejected": -2.2549469470977783, "loss": 2.1479, "nll_loss": 2.094945192337036, "rewards/accuracies": 1.0, "rewards/chosen": -0.1930929273366928, "rewards/margins": 0.032401762902736664, "rewards/rejected": -0.22549471259117126, "step": 128 }, { "epoch": 0.2033096926713948, "grad_norm": 0.46808141469955444, "learning_rate": 3.999992445477635e-06, "log_odds_chosen": 0.4924158751964569, "log_odds_ratio": -0.4864352345466614, "logits/chosen": -0.3923236131668091, "logits/rejected": -0.5614160299301147, "logps/chosen": -1.926079273223877, "logps/rejected": -2.361663579940796, "loss": 2.1291, "nll_loss": 2.080496311187744, "rewards/accuracies": 0.875, "rewards/chosen": -0.19260793924331665, "rewards/margins": 0.04355840012431145, "rewards/rejected": -0.2361663281917572, "step": 129 }, { "epoch": 0.2048857368006304, "grad_norm": 0.4758654534816742, "learning_rate": 3.999969781967615e-06, "log_odds_chosen": 0.3168904185295105, "log_odds_ratio": -0.5500271916389465, "logits/chosen": -0.3438476622104645, "logits/rejected": -0.36578765511512756, "logps/chosen": -2.0286383628845215, "logps/rejected": -2.31013560295105, "loss": 2.2287, "nll_loss": 2.1736607551574707, "rewards/accuracies": 1.0, "rewards/chosen": -0.20286381244659424, "rewards/margins": 0.028149710968136787, "rewards/rejected": -0.23101355135440826, "step": 130 }, { "epoch": 0.20646178092986603, "grad_norm": 0.4352067708969116, "learning_rate": 3.99993200964115e-06, "log_odds_chosen": 0.41043001413345337, "log_odds_ratio": -0.5113096237182617, "logits/chosen": -0.4144492745399475, "logits/rejected": -0.5521720051765442, "logps/chosen": -1.8470234870910645, "logps/rejected": -2.2023518085479736, "loss": 2.0658, "nll_loss": 2.014646053314209, "rewards/accuracies": 1.0, "rewards/chosen": -0.18470235168933868, "rewards/margins": 0.03553282842040062, "rewards/rejected": -0.2202351987361908, "step": 131 }, { "epoch": 0.20803782505910165, "grad_norm": 0.5278255939483643, "learning_rate": 3.99987912878359e-06, "log_odds_chosen": 0.21506069600582123, "log_odds_ratio": -0.5979167222976685, "logits/chosen": -0.43242937326431274, "logits/rejected": -0.22841165959835052, "logps/chosen": -1.8215473890304565, "logps/rejected": -2.0002083778381348, "loss": 2.0893, "nll_loss": 2.029538631439209, "rewards/accuracies": 0.875, "rewards/chosen": -0.18215471506118774, "rewards/margins": 0.017866121605038643, "rewards/rejected": -0.20002083480358124, "step": 132 }, { "epoch": 0.20961386918833727, "grad_norm": 0.44630110263824463, "learning_rate": 3.999811139794429e-06, "log_odds_chosen": 0.4566144049167633, "log_odds_ratio": -0.4949381947517395, "logits/chosen": -0.38445478677749634, "logits/rejected": -0.503831148147583, "logps/chosen": -1.8512368202209473, "logps/rejected": -2.2488574981689453, "loss": 2.0611, "nll_loss": 2.0115725994110107, "rewards/accuracies": 1.0, "rewards/chosen": -0.18512369692325592, "rewards/margins": 0.039762042462825775, "rewards/rejected": -0.2248857468366623, "step": 133 }, { "epoch": 0.2111899133175729, "grad_norm": 0.47832149267196655, "learning_rate": 3.999728043187288e-06, "log_odds_chosen": 0.4043869972229004, "log_odds_ratio": -0.5232419967651367, "logits/chosen": -0.3849250078201294, "logits/rejected": -0.4331829845905304, "logps/chosen": -1.916317105293274, "logps/rejected": -2.273916721343994, "loss": 2.1495, "nll_loss": 2.0971601009368896, "rewards/accuracies": 0.875, "rewards/chosen": -0.1916317194700241, "rewards/margins": 0.03575997054576874, "rewards/rejected": -0.22739167511463165, "step": 134 }, { "epoch": 0.2127659574468085, "grad_norm": 0.4719514548778534, "learning_rate": 3.999629839589922e-06, "log_odds_chosen": 0.2638218402862549, "log_odds_ratio": -0.5730746388435364, "logits/chosen": -0.2890802025794983, "logits/rejected": -0.6256790161132812, "logps/chosen": -1.9855725765228271, "logps/rejected": -2.2172610759735107, "loss": 2.2169, "nll_loss": 2.1596412658691406, "rewards/accuracies": 0.875, "rewards/chosen": -0.19855724275112152, "rewards/margins": 0.02316887117922306, "rewards/rejected": -0.22172610461711884, "step": 135 }, { "epoch": 0.21434200157604413, "grad_norm": 0.4406312108039856, "learning_rate": 3.999516529744215e-06, "log_odds_chosen": 0.38678330183029175, "log_odds_ratio": -0.5220622420310974, "logits/chosen": -0.22331157326698303, "logits/rejected": -0.5899175405502319, "logps/chosen": -1.8535821437835693, "logps/rejected": -2.1891560554504395, "loss": 2.0776, "nll_loss": 2.0253748893737793, "rewards/accuracies": 1.0, "rewards/chosen": -0.1853582262992859, "rewards/margins": 0.033557381480932236, "rewards/rejected": -0.21891558170318604, "step": 136 }, { "epoch": 0.21591804570527975, "grad_norm": 0.4380626380443573, "learning_rate": 3.999388114506166e-06, "log_odds_chosen": 0.4394814074039459, "log_odds_ratio": -0.5038915872573853, "logits/chosen": -0.3584860861301422, "logits/rejected": -0.6305355429649353, "logps/chosen": -1.76337730884552, "logps/rejected": -2.14279842376709, "loss": 1.9779, "nll_loss": 1.9275436401367188, "rewards/accuracies": 1.0, "rewards/chosen": -0.17633774876594543, "rewards/margins": 0.03794211149215698, "rewards/rejected": -0.21427986025810242, "step": 137 }, { "epoch": 0.21749408983451538, "grad_norm": 0.4562850594520569, "learning_rate": 3.999244594845892e-06, "log_odds_chosen": 0.3186016082763672, "log_odds_ratio": -0.5537752509117126, "logits/chosen": -0.3234477937221527, "logits/rejected": -0.5436648726463318, "logps/chosen": -1.8082385063171387, "logps/rejected": -2.0849738121032715, "loss": 2.0387, "nll_loss": 1.9833385944366455, "rewards/accuracies": 1.0, "rewards/chosen": -0.18082384765148163, "rewards/margins": 0.027673546224832535, "rewards/rejected": -0.20849739015102386, "step": 138 }, { "epoch": 0.219070133963751, "grad_norm": 0.4723531901836395, "learning_rate": 3.999085971847616e-06, "log_odds_chosen": 0.2793772220611572, "log_odds_ratio": -0.5707423090934753, "logits/chosen": -0.3115157186985016, "logits/rejected": -0.6855476498603821, "logps/chosen": -1.8707714080810547, "logps/rejected": -2.1145730018615723, "loss": 2.0908, "nll_loss": 2.033709764480591, "rewards/accuracies": 0.875, "rewards/chosen": -0.18707714974880219, "rewards/margins": 0.02438015677034855, "rewards/rejected": -0.211457297205925, "step": 139 }, { "epoch": 0.22064617809298662, "grad_norm": 0.47324880957603455, "learning_rate": 3.998912246709658e-06, "log_odds_chosen": 0.43111467361450195, "log_odds_ratio": -0.5151071548461914, "logits/chosen": -0.3707660734653473, "logits/rejected": -0.6392085552215576, "logps/chosen": -1.7731448411941528, "logps/rejected": -2.146456480026245, "loss": 1.9936, "nll_loss": 1.942044973373413, "rewards/accuracies": 0.875, "rewards/chosen": -0.17731450498104095, "rewards/margins": 0.03733116388320923, "rewards/rejected": -0.214645653963089, "step": 140 }, { "epoch": 0.2222222222222222, "grad_norm": 0.49398210644721985, "learning_rate": 3.9987234207444295e-06, "log_odds_chosen": 0.4180254638195038, "log_odds_ratio": -0.5135904550552368, "logits/chosen": -0.3750038146972656, "logits/rejected": -0.5034846067428589, "logps/chosen": -1.868428111076355, "logps/rejected": -2.2344772815704346, "loss": 2.092, "nll_loss": 2.0405919551849365, "rewards/accuracies": 1.0, "rewards/chosen": -0.18684281408786774, "rewards/margins": 0.03660491853952408, "rewards/rejected": -0.2234477400779724, "step": 141 }, { "epoch": 0.22379826635145783, "grad_norm": 0.4737972617149353, "learning_rate": 3.998519495378419e-06, "log_odds_chosen": 0.397806853055954, "log_odds_ratio": -0.5191542506217957, "logits/chosen": -0.2983032763004303, "logits/rejected": -0.4037090539932251, "logps/chosen": -1.9413141012191772, "logps/rejected": -2.2922468185424805, "loss": 2.1382, "nll_loss": 2.086291551589966, "rewards/accuracies": 1.0, "rewards/chosen": -0.19413141906261444, "rewards/margins": 0.0350932851433754, "rewards/rejected": -0.22922469675540924, "step": 142 }, { "epoch": 0.22537431048069345, "grad_norm": 0.42424681782722473, "learning_rate": 3.998300472152187e-06, "log_odds_chosen": 0.37584933638572693, "log_odds_ratio": -0.5324170589447021, "logits/chosen": -0.3758591413497925, "logits/rejected": -0.7293614745140076, "logps/chosen": -1.730176568031311, "logps/rejected": -2.052025079727173, "loss": 1.9556, "nll_loss": 1.9023665189743042, "rewards/accuracies": 0.875, "rewards/chosen": -0.17301765084266663, "rewards/margins": 0.03218483552336693, "rewards/rejected": -0.20520250499248505, "step": 143 }, { "epoch": 0.22695035460992907, "grad_norm": 0.46012207865715027, "learning_rate": 3.998066352720347e-06, "log_odds_chosen": 0.48952746391296387, "log_odds_ratio": -0.4826250374317169, "logits/chosen": -0.1988898515701294, "logits/rejected": -0.5319833755493164, "logps/chosen": -1.8468257188796997, "logps/rejected": -2.2748098373413086, "loss": 2.0578, "nll_loss": 2.009584665298462, "rewards/accuracies": 1.0, "rewards/chosen": -0.18468256294727325, "rewards/margins": 0.04279841482639313, "rewards/rejected": -0.22748097777366638, "step": 144 }, { "epoch": 0.2285263987391647, "grad_norm": 0.4577552080154419, "learning_rate": 3.997817138851562e-06, "log_odds_chosen": 0.3373531997203827, "log_odds_ratio": -0.5514544248580933, "logits/chosen": -0.2713150680065155, "logits/rejected": -0.46479517221450806, "logps/chosen": -1.9415301084518433, "logps/rejected": -2.2407379150390625, "loss": 2.1553, "nll_loss": 2.100133180618286, "rewards/accuracies": 0.875, "rewards/chosen": -0.1941530406475067, "rewards/margins": 0.029920781031250954, "rewards/rejected": -0.22407379746437073, "step": 145 }, { "epoch": 0.23010244286840031, "grad_norm": 0.4259653389453888, "learning_rate": 3.997552832428522e-06, "log_odds_chosen": 0.43442001938819885, "log_odds_ratio": -0.5027788281440735, "logits/chosen": -0.3454759120941162, "logits/rejected": -0.5418952703475952, "logps/chosen": -1.732759714126587, "logps/rejected": -2.1025242805480957, "loss": 1.974, "nll_loss": 1.9237622022628784, "rewards/accuracies": 1.0, "rewards/chosen": -0.17327596247196198, "rewards/margins": 0.03697647154331207, "rewards/rejected": -0.21025243401527405, "step": 146 }, { "epoch": 0.23167848699763594, "grad_norm": 0.4360640347003937, "learning_rate": 3.9972734354479366e-06, "log_odds_chosen": 0.39015060663223267, "log_odds_ratio": -0.5267472863197327, "logits/chosen": -0.2306804656982422, "logits/rejected": -0.7212989330291748, "logps/chosen": -1.725273847579956, "logps/rejected": -2.0561888217926025, "loss": 1.9515, "nll_loss": 1.8987786769866943, "rewards/accuracies": 1.0, "rewards/chosen": -0.17252738773822784, "rewards/margins": 0.03309149667620659, "rewards/rejected": -0.20561888813972473, "step": 147 }, { "epoch": 0.23325453112687156, "grad_norm": 0.4830004870891571, "learning_rate": 3.996978950020517e-06, "log_odds_chosen": 0.2517290413379669, "log_odds_ratio": -0.589171826839447, "logits/chosen": -0.220802903175354, "logits/rejected": -0.5540719032287598, "logps/chosen": -1.916334629058838, "logps/rejected": -2.1323041915893555, "loss": 2.1418, "nll_loss": 2.082899570465088, "rewards/accuracies": 0.75, "rewards/chosen": -0.1916334629058838, "rewards/margins": 0.021596934646368027, "rewards/rejected": -0.21323040127754211, "step": 148 }, { "epoch": 0.23483057525610718, "grad_norm": 0.4216887950897217, "learning_rate": 3.996669378370959e-06, "log_odds_chosen": 0.5976248979568481, "log_odds_ratio": -0.44520941376686096, "logits/chosen": -0.2759765088558197, "logits/rejected": -0.5601774454116821, "logps/chosen": -1.6960978507995605, "logps/rejected": -2.2107391357421875, "loss": 1.9157, "nll_loss": 1.8712081909179688, "rewards/accuracies": 1.0, "rewards/chosen": -0.16960978507995605, "rewards/margins": 0.05146413296461105, "rewards/rejected": -0.2210739254951477, "step": 149 }, { "epoch": 0.2364066193853428, "grad_norm": 0.4519270658493042, "learning_rate": 3.996344722837929e-06, "log_odds_chosen": 0.5262175798416138, "log_odds_ratio": -0.46979856491088867, "logits/chosen": -0.2899366617202759, "logits/rejected": -0.48541364073753357, "logps/chosen": -1.868219256401062, "logps/rejected": -2.3306198120117188, "loss": 2.0673, "nll_loss": 2.0203018188476562, "rewards/accuracies": 1.0, "rewards/chosen": -0.18682192265987396, "rewards/margins": 0.04624004662036896, "rewards/rejected": -0.23306196928024292, "step": 150 }, { "epoch": 0.23798266351457842, "grad_norm": 0.39388707280158997, "learning_rate": 3.996004985874043e-06, "log_odds_chosen": 0.2795698940753937, "log_odds_ratio": -0.5656891465187073, "logits/chosen": -0.16603723168373108, "logits/rejected": -0.464844673871994, "logps/chosen": -1.7436405420303345, "logps/rejected": -1.9796698093414307, "loss": 1.9733, "nll_loss": 1.9167754650115967, "rewards/accuracies": 1.0, "rewards/chosen": -0.17436404526233673, "rewards/margins": 0.023602934554219246, "rewards/rejected": -0.19796699285507202, "step": 151 }, { "epoch": 0.23955870764381404, "grad_norm": 0.4452219605445862, "learning_rate": 3.995650170045855e-06, "log_odds_chosen": 0.3562104403972626, "log_odds_ratio": -0.5355157256126404, "logits/chosen": -0.2569115161895752, "logits/rejected": -0.5924301743507385, "logps/chosen": -1.8175992965698242, "logps/rejected": -2.1236917972564697, "loss": 2.0328, "nll_loss": 1.9792625904083252, "rewards/accuracies": 1.0, "rewards/chosen": -0.18175993859767914, "rewards/margins": 0.030609263107180595, "rewards/rejected": -0.21236920356750488, "step": 152 }, { "epoch": 0.24113475177304963, "grad_norm": 0.43792277574539185, "learning_rate": 3.995280278033825e-06, "log_odds_chosen": 0.25159722566604614, "log_odds_ratio": -0.5846930146217346, "logits/chosen": -0.1688135266304016, "logits/rejected": -0.4975782036781311, "logps/chosen": -1.919213891029358, "logps/rejected": -2.1364455223083496, "loss": 2.1351, "nll_loss": 2.0765841007232666, "rewards/accuracies": 0.75, "rewards/chosen": -0.1919213980436325, "rewards/margins": 0.021723175421357155, "rewards/rejected": -0.2136445790529251, "step": 153 }, { "epoch": 0.24271079590228525, "grad_norm": 0.4284445345401764, "learning_rate": 3.994895312632314e-06, "log_odds_chosen": 0.583359956741333, "log_odds_ratio": -0.4485571086406708, "logits/chosen": -0.14007489383220673, "logits/rejected": -0.9004555940628052, "logps/chosen": -1.787564754486084, "logps/rejected": -2.293922185897827, "loss": 1.9995, "nll_loss": 1.9546149969100952, "rewards/accuracies": 1.0, "rewards/chosen": -0.1787564754486084, "rewards/margins": 0.05063574016094208, "rewards/rejected": -0.22939221560955048, "step": 154 }, { "epoch": 0.24428684003152087, "grad_norm": 0.4128466546535492, "learning_rate": 3.994495276749549e-06, "log_odds_chosen": 0.5421090722084045, "log_odds_ratio": -0.4647715091705322, "logits/chosen": -0.21934741735458374, "logits/rejected": -0.5551028251647949, "logps/chosen": -1.8062323331832886, "logps/rejected": -2.2790470123291016, "loss": 2.006, "nll_loss": 1.959566593170166, "rewards/accuracies": 1.0, "rewards/chosen": -0.18062324821949005, "rewards/margins": 0.047281473875045776, "rewards/rejected": -0.22790470719337463, "step": 155 }, { "epoch": 0.2458628841607565, "grad_norm": 0.40027129650115967, "learning_rate": 3.994080173407612e-06, "log_odds_chosen": 0.3059383034706116, "log_odds_ratio": -0.5557867884635925, "logits/chosen": -0.1562999039888382, "logits/rejected": -0.658608615398407, "logps/chosen": -1.844901442527771, "logps/rejected": -2.1075332164764404, "loss": 2.0498, "nll_loss": 1.9942554235458374, "rewards/accuracies": 1.0, "rewards/chosen": -0.1844901591539383, "rewards/margins": 0.02626318484544754, "rewards/rejected": -0.21075333654880524, "step": 156 }, { "epoch": 0.24743892828999212, "grad_norm": 0.42715558409690857, "learning_rate": 3.993650005742409e-06, "log_odds_chosen": 0.3714084029197693, "log_odds_ratio": -0.5295048952102661, "logits/chosen": -0.26001468300819397, "logits/rejected": -0.5350618958473206, "logps/chosen": -1.8118646144866943, "logps/rejected": -2.1319515705108643, "loss": 2.0239, "nll_loss": 1.9709175825119019, "rewards/accuracies": 1.0, "rewards/chosen": -0.18118645250797272, "rewards/margins": 0.03200869262218475, "rewards/rejected": -0.21319516003131866, "step": 157 }, { "epoch": 0.24901497241922774, "grad_norm": 0.3839566707611084, "learning_rate": 3.993204777003652e-06, "log_odds_chosen": 0.393911749124527, "log_odds_ratio": -0.5220350623130798, "logits/chosen": -0.1995089203119278, "logits/rejected": -0.6347463130950928, "logps/chosen": -1.7502682209014893, "logps/rejected": -2.0890250205993652, "loss": 1.9534, "nll_loss": 1.9011733531951904, "rewards/accuracies": 1.0, "rewards/chosen": -0.1750268191099167, "rewards/margins": 0.033875688910484314, "rewards/rejected": -0.208902508020401, "step": 158 }, { "epoch": 0.25059101654846333, "grad_norm": 0.4023450016975403, "learning_rate": 3.992744490554832e-06, "log_odds_chosen": 0.38417020440101624, "log_odds_ratio": -0.5247639417648315, "logits/chosen": -0.0957825779914856, "logits/rejected": -0.511121392250061, "logps/chosen": -1.786711573600769, "logps/rejected": -2.116995334625244, "loss": 2.0068, "nll_loss": 1.9543219804763794, "rewards/accuracies": 1.0, "rewards/chosen": -0.17867115139961243, "rewards/margins": 0.033028386533260345, "rewards/rejected": -0.21169954538345337, "step": 159 }, { "epoch": 0.25216706067769895, "grad_norm": 0.37720558047294617, "learning_rate": 3.992269149873192e-06, "log_odds_chosen": 0.37647151947021484, "log_odds_ratio": -0.5282561779022217, "logits/chosen": -0.18793515861034393, "logits/rejected": -0.6444622874259949, "logps/chosen": -1.759155511856079, "logps/rejected": -2.0770745277404785, "loss": 1.9655, "nll_loss": 1.9126847982406616, "rewards/accuracies": 1.0, "rewards/chosen": -0.17591555416584015, "rewards/margins": 0.03179190307855606, "rewards/rejected": -0.2077074497938156, "step": 160 }, { "epoch": 0.25374310480693457, "grad_norm": 0.3518170416355133, "learning_rate": 3.991778758549705e-06, "log_odds_chosen": 0.2357814759016037, "log_odds_ratio": -0.5892237424850464, "logits/chosen": -0.039820123463869095, "logits/rejected": -0.4496867060661316, "logps/chosen": -1.8794186115264893, "logps/rejected": -2.0856754779815674, "loss": 2.0673, "nll_loss": 2.008375644683838, "rewards/accuracies": 0.75, "rewards/chosen": -0.18794189393520355, "rewards/margins": 0.020625660195946693, "rewards/rejected": -0.2085675299167633, "step": 161 }, { "epoch": 0.2553191489361702, "grad_norm": 0.38300883769989014, "learning_rate": 3.9912733202890415e-06, "log_odds_chosen": 0.381821870803833, "log_odds_ratio": -0.5251815319061279, "logits/chosen": -0.13694928586483002, "logits/rejected": -0.618095338344574, "logps/chosen": -1.8078151941299438, "logps/rejected": -2.135481834411621, "loss": 2.0087, "nll_loss": 1.9562112092971802, "rewards/accuracies": 1.0, "rewards/chosen": -0.1807815134525299, "rewards/margins": 0.0327666774392128, "rewards/rejected": -0.2135481834411621, "step": 162 }, { "epoch": 0.2568951930654058, "grad_norm": 0.36690783500671387, "learning_rate": 3.990752838909548e-06, "log_odds_chosen": 0.24148699641227722, "log_odds_ratio": -0.5847919583320618, "logits/chosen": -0.05623656138777733, "logits/rejected": -0.5564668774604797, "logps/chosen": -1.9099113941192627, "logps/rejected": -2.1186718940734863, "loss": 2.101, "nll_loss": 2.0425591468811035, "rewards/accuracies": 0.875, "rewards/chosen": -0.1909911334514618, "rewards/margins": 0.020876042544841766, "rewards/rejected": -0.21186719834804535, "step": 163 }, { "epoch": 0.25847123719464143, "grad_norm": 0.38993462920188904, "learning_rate": 3.990217318343213e-06, "log_odds_chosen": 0.5827608108520508, "log_odds_ratio": -0.4458136260509491, "logits/chosen": -0.12328344583511353, "logits/rejected": -0.7965791821479797, "logps/chosen": -1.9730701446533203, "logps/rejected": -2.4925732612609863, "loss": 2.1531, "nll_loss": 2.108518123626709, "rewards/accuracies": 1.0, "rewards/chosen": -0.1973070204257965, "rewards/margins": 0.05195032060146332, "rewards/rejected": -0.24925734102725983, "step": 164 }, { "epoch": 0.26004728132387706, "grad_norm": 0.37487825751304626, "learning_rate": 3.989666762635637e-06, "log_odds_chosen": 0.3498647212982178, "log_odds_ratio": -0.5365273356437683, "logits/chosen": -0.12066411972045898, "logits/rejected": -0.5407615900039673, "logps/chosen": -1.8215625286102295, "logps/rejected": -2.1220715045928955, "loss": 2.0347, "nll_loss": 1.9810354709625244, "rewards/accuracies": 1.0, "rewards/chosen": -0.1821562498807907, "rewards/margins": 0.03005088120698929, "rewards/rejected": -0.2122071385383606, "step": 165 }, { "epoch": 0.2616233254531127, "grad_norm": 0.3764452040195465, "learning_rate": 3.9891011759460056e-06, "log_odds_chosen": 0.43623411655426025, "log_odds_ratio": -0.5043083429336548, "logits/chosen": -0.1440771371126175, "logits/rejected": -0.6117334365844727, "logps/chosen": -1.8048644065856934, "logps/rejected": -2.175879955291748, "loss": 2.0089, "nll_loss": 1.9585102796554565, "rewards/accuracies": 1.0, "rewards/chosen": -0.18048644065856934, "rewards/margins": 0.037101540714502335, "rewards/rejected": -0.21758797764778137, "step": 166 }, { "epoch": 0.2631993695823483, "grad_norm": 0.38358378410339355, "learning_rate": 3.988520562547057e-06, "log_odds_chosen": 0.4678844213485718, "log_odds_ratio": -0.4977923333644867, "logits/chosen": -0.09336389601230621, "logits/rejected": -0.6688787937164307, "logps/chosen": -1.7605222463607788, "logps/rejected": -2.160085678100586, "loss": 1.9465, "nll_loss": 1.8967318534851074, "rewards/accuracies": 1.0, "rewards/chosen": -0.17605219781398773, "rewards/margins": 0.03995633125305176, "rewards/rejected": -0.21600855886936188, "step": 167 }, { "epoch": 0.2647754137115839, "grad_norm": 0.34890928864479065, "learning_rate": 3.987924926825047e-06, "log_odds_chosen": 0.49037039279937744, "log_odds_ratio": -0.4920588731765747, "logits/chosen": -0.11033634841442108, "logits/rejected": -0.5727885961532593, "logps/chosen": -1.8396610021591187, "logps/rejected": -2.2677671909332275, "loss": 2.0476, "nll_loss": 1.9984408617019653, "rewards/accuracies": 0.875, "rewards/chosen": -0.18396610021591187, "rewards/margins": 0.04281061887741089, "rewards/rejected": -0.22677670419216156, "step": 168 }, { "epoch": 0.26635145784081954, "grad_norm": 0.3390553891658783, "learning_rate": 3.98731427327972e-06, "log_odds_chosen": 0.45071348547935486, "log_odds_ratio": -0.493795782327652, "logits/chosen": -0.1533864587545395, "logits/rejected": -0.8038709163665771, "logps/chosen": -1.7674872875213623, "logps/rejected": -2.15299916267395, "loss": 1.9535, "nll_loss": 1.9041638374328613, "rewards/accuracies": 1.0, "rewards/chosen": -0.17674873769283295, "rewards/margins": 0.03855118155479431, "rewards/rejected": -0.21529991924762726, "step": 169 }, { "epoch": 0.26792750197005516, "grad_norm": 0.33687132596969604, "learning_rate": 3.986688606524273e-06, "log_odds_chosen": 0.3874381482601166, "log_odds_ratio": -0.5277330279350281, "logits/chosen": -0.053588882088661194, "logits/rejected": -0.9633825421333313, "logps/chosen": -1.799920678138733, "logps/rejected": -2.136354684829712, "loss": 1.9903, "nll_loss": 1.9375666379928589, "rewards/accuracies": 0.875, "rewards/chosen": -0.17999204993247986, "rewards/margins": 0.033643417060375214, "rewards/rejected": -0.21363547444343567, "step": 170 }, { "epoch": 0.2695035460992908, "grad_norm": 0.3421062231063843, "learning_rate": 3.986047931285315e-06, "log_odds_chosen": 0.5469809174537659, "log_odds_ratio": -0.461223840713501, "logits/chosen": -0.13250774145126343, "logits/rejected": -0.8443288207054138, "logps/chosen": -1.714763879776001, "logps/rejected": -2.1810569763183594, "loss": 1.9014, "nll_loss": 1.8553175926208496, "rewards/accuracies": 1.0, "rewards/chosen": -0.17147637903690338, "rewards/margins": 0.046629298478364944, "rewards/rejected": -0.21810568869113922, "step": 171 }, { "epoch": 0.2710795902285264, "grad_norm": 0.3105310797691345, "learning_rate": 3.985392252402847e-06, "log_odds_chosen": 0.2694023847579956, "log_odds_ratio": -0.5704121589660645, "logits/chosen": -0.11545141041278839, "logits/rejected": -0.635871171951294, "logps/chosen": -1.700569748878479, "logps/rejected": -1.9258500337600708, "loss": 1.8986, "nll_loss": 1.8415968418121338, "rewards/accuracies": 1.0, "rewards/chosen": -0.17005696892738342, "rewards/margins": 0.022528033703565598, "rewards/rejected": -0.1925850212574005, "step": 172 }, { "epoch": 0.272655634357762, "grad_norm": 0.3302405774593353, "learning_rate": 3.984721574830206e-06, "log_odds_chosen": 0.34335118532180786, "log_odds_ratio": -0.5412152409553528, "logits/chosen": -0.08807546645402908, "logits/rejected": -0.8193111419677734, "logps/chosen": -1.8096662759780884, "logps/rejected": -2.1047701835632324, "loss": 1.9973, "nll_loss": 1.9431415796279907, "rewards/accuracies": 1.0, "rewards/chosen": -0.18096663057804108, "rewards/margins": 0.029510382562875748, "rewards/rejected": -0.21047702431678772, "step": 173 }, { "epoch": 0.27423167848699764, "grad_norm": 0.30101102590560913, "learning_rate": 3.984035903634041e-06, "log_odds_chosen": 0.3278449773788452, "log_odds_ratio": -0.549487292766571, "logits/chosen": -0.033350471407175064, "logits/rejected": -0.8235560655593872, "logps/chosen": -1.7809809446334839, "logps/rejected": -2.0601303577423096, "loss": 1.9562, "nll_loss": 1.9012670516967773, "rewards/accuracies": 0.875, "rewards/chosen": -0.17809809744358063, "rewards/margins": 0.02791496179997921, "rewards/rejected": -0.2060130536556244, "step": 174 }, { "epoch": 0.27580772261623326, "grad_norm": 0.313924640417099, "learning_rate": 3.983335243994273e-06, "log_odds_chosen": 0.44480204582214355, "log_odds_ratio": -0.5034283399581909, "logits/chosen": -0.015027942135930061, "logits/rejected": -0.7614739537239075, "logps/chosen": -1.772882342338562, "logps/rejected": -2.154425859451294, "loss": 1.96, "nll_loss": 1.9096373319625854, "rewards/accuracies": 1.0, "rewards/chosen": -0.1772882342338562, "rewards/margins": 0.03815435618162155, "rewards/rejected": -0.21544259786605835, "step": 175 }, { "epoch": 0.2773837667454689, "grad_norm": 0.36805829405784607, "learning_rate": 3.982619601204049e-06, "log_odds_chosen": 0.37449923157691956, "log_odds_ratio": -0.5290423631668091, "logits/chosen": -0.09903114289045334, "logits/rejected": -0.7904164791107178, "logps/chosen": -1.8346233367919922, "logps/rejected": -2.158191680908203, "loss": 2.0219, "nll_loss": 1.9690214395523071, "rewards/accuracies": 1.0, "rewards/chosen": -0.18346232175827026, "rewards/margins": 0.03235683590173721, "rewards/rejected": -0.21581916511058807, "step": 176 }, { "epoch": 0.2789598108747045, "grad_norm": 0.3392786383628845, "learning_rate": 3.9818889806697085e-06, "log_odds_chosen": 0.43580567836761475, "log_odds_ratio": -0.5084404349327087, "logits/chosen": 0.015692677348852158, "logits/rejected": -0.8096188306808472, "logps/chosen": -1.8684344291687012, "logps/rejected": -2.249403476715088, "loss": 2.0317, "nll_loss": 1.9808424711227417, "rewards/accuracies": 0.875, "rewards/chosen": -0.18684343993663788, "rewards/margins": 0.038096919655799866, "rewards/rejected": -0.22494037449359894, "step": 177 }, { "epoch": 0.2805358550039401, "grad_norm": 0.31521204113960266, "learning_rate": 3.98114338791074e-06, "log_odds_chosen": 0.515329897403717, "log_odds_ratio": -0.4845845699310303, "logits/chosen": -0.09881779551506042, "logits/rejected": -0.6956612467765808, "logps/chosen": -1.74465811252594, "logps/rejected": -2.1908679008483887, "loss": 1.9357, "nll_loss": 1.8872735500335693, "rewards/accuracies": 1.0, "rewards/chosen": -0.1744658201932907, "rewards/margins": 0.04462098330259323, "rewards/rejected": -0.21908681094646454, "step": 178 }, { "epoch": 0.28211189913317575, "grad_norm": 0.30939748883247375, "learning_rate": 3.980382828559742e-06, "log_odds_chosen": 0.42107006907463074, "log_odds_ratio": -0.510704755783081, "logits/chosen": -0.11055820435285568, "logits/rejected": -0.8583495616912842, "logps/chosen": -1.78211510181427, "logps/rejected": -2.143000841140747, "loss": 1.951, "nll_loss": 1.8998899459838867, "rewards/accuracies": 1.0, "rewards/chosen": -0.1782114952802658, "rewards/margins": 0.03608858957886696, "rewards/rejected": -0.21430009603500366, "step": 179 }, { "epoch": 0.28368794326241137, "grad_norm": 0.3161435127258301, "learning_rate": 3.9796073083623774e-06, "log_odds_chosen": 0.41550877690315247, "log_odds_ratio": -0.5219519734382629, "logits/chosen": -0.08611653745174408, "logits/rejected": -0.8567744493484497, "logps/chosen": -1.8524975776672363, "logps/rejected": -2.2126336097717285, "loss": 2.0187, "nll_loss": 1.9664819240570068, "rewards/accuracies": 0.875, "rewards/chosen": -0.18524976074695587, "rewards/margins": 0.036013588309288025, "rewards/rejected": -0.2212633341550827, "step": 180 }, { "epoch": 0.285263987391647, "grad_norm": 0.31543034315109253, "learning_rate": 3.978816833177329e-06, "log_odds_chosen": 0.3989214599132538, "log_odds_ratio": -0.519888162612915, "logits/chosen": -0.01459362730383873, "logits/rejected": -0.6901648044586182, "logps/chosen": -1.7767245769500732, "logps/rejected": -2.118866443634033, "loss": 1.9515, "nll_loss": 1.8994776010513306, "rewards/accuracies": 1.0, "rewards/chosen": -0.17767244577407837, "rewards/margins": 0.03421417623758316, "rewards/rejected": -0.21188661456108093, "step": 181 }, { "epoch": 0.2868400315208826, "grad_norm": 0.32000571489334106, "learning_rate": 3.978011408976261e-06, "log_odds_chosen": 0.2851777970790863, "log_odds_ratio": -0.5661397576332092, "logits/chosen": -0.02493867464363575, "logits/rejected": -0.7876978516578674, "logps/chosen": -1.7986279726028442, "logps/rejected": -2.0402181148529053, "loss": 1.9869, "nll_loss": 1.9302453994750977, "rewards/accuracies": 1.0, "rewards/chosen": -0.17986279726028442, "rewards/margins": 0.02415899559855461, "rewards/rejected": -0.20402181148529053, "step": 182 }, { "epoch": 0.28841607565011823, "grad_norm": 0.32417967915534973, "learning_rate": 3.9771910418437674e-06, "log_odds_chosen": 0.3460734486579895, "log_odds_ratio": -0.5362839102745056, "logits/chosen": -0.03848784416913986, "logits/rejected": -0.570169985294342, "logps/chosen": -1.8317880630493164, "logps/rejected": -2.131016731262207, "loss": 2.003, "nll_loss": 1.9494034051895142, "rewards/accuracies": 1.0, "rewards/chosen": -0.1831788271665573, "rewards/margins": 0.029922863468527794, "rewards/rejected": -0.21310168504714966, "step": 183 }, { "epoch": 0.2899921197793538, "grad_norm": 0.3104395866394043, "learning_rate": 3.976355737977332e-06, "log_odds_chosen": 0.43051034212112427, "log_odds_ratio": -0.5086094737052917, "logits/chosen": -0.004376018885523081, "logits/rejected": -0.7855688333511353, "logps/chosen": -1.7710585594177246, "logps/rejected": -2.1398942470550537, "loss": 1.9511, "nll_loss": 1.9002678394317627, "rewards/accuracies": 0.875, "rewards/chosen": -0.1771058589220047, "rewards/margins": 0.03688357025384903, "rewards/rejected": -0.21398943662643433, "step": 184 }, { "epoch": 0.2915681639085894, "grad_norm": 0.28238430619239807, "learning_rate": 3.975505503687274e-06, "log_odds_chosen": 0.32954704761505127, "log_odds_ratio": -0.5454957485198975, "logits/chosen": 0.03291358798742294, "logits/rejected": -0.48518458008766174, "logps/chosen": -1.764021635055542, "logps/rejected": -2.045203924179077, "loss": 1.9398, "nll_loss": 1.8852983713150024, "rewards/accuracies": 1.0, "rewards/chosen": -0.17640215158462524, "rewards/margins": 0.028118234127759933, "rewards/rejected": -0.20452038943767548, "step": 185 }, { "epoch": 0.29314420803782504, "grad_norm": 0.27702274918556213, "learning_rate": 3.974640345396708e-06, "log_odds_chosen": 0.5177258253097534, "log_odds_ratio": -0.4748190641403198, "logits/chosen": -0.08025577664375305, "logits/rejected": -1.0515739917755127, "logps/chosen": -1.7293744087219238, "logps/rejected": -2.171941041946411, "loss": 1.9022, "nll_loss": 1.8546966314315796, "rewards/accuracies": 1.0, "rewards/chosen": -0.17293745279312134, "rewards/margins": 0.044256679713726044, "rewards/rejected": -0.21719412505626678, "step": 186 }, { "epoch": 0.29472025216706066, "grad_norm": 0.2979573905467987, "learning_rate": 3.9737602696414925e-06, "log_odds_chosen": 0.2020474076271057, "log_odds_ratio": -0.6071421504020691, "logits/chosen": 0.029590757563710213, "logits/rejected": -0.8974511027336121, "logps/chosen": -1.8263177871704102, "logps/rejected": -1.9980144500732422, "loss": 1.9794, "nll_loss": 1.9187321662902832, "rewards/accuracies": 0.625, "rewards/chosen": -0.18263177573680878, "rewards/margins": 0.017169667407870293, "rewards/rejected": -0.19980144500732422, "step": 187 }, { "epoch": 0.2962962962962963, "grad_norm": 0.2755199074745178, "learning_rate": 3.972865283070179e-06, "log_odds_chosen": 0.4493619203567505, "log_odds_ratio": -0.49928387999534607, "logits/chosen": -0.015258762054145336, "logits/rejected": -0.9950686097145081, "logps/chosen": -1.721350908279419, "logps/rejected": -2.1068735122680664, "loss": 1.9017, "nll_loss": 1.8517918586730957, "rewards/accuracies": 1.0, "rewards/chosen": -0.1721350997686386, "rewards/margins": 0.03855225443840027, "rewards/rejected": -0.21068735420703888, "step": 188 }, { "epoch": 0.2978723404255319, "grad_norm": 0.2944152057170868, "learning_rate": 3.971955392443965e-06, "log_odds_chosen": 0.39672476053237915, "log_odds_ratio": -0.5201811790466309, "logits/chosen": -0.016328200697898865, "logits/rejected": -0.786607027053833, "logps/chosen": -1.7574641704559326, "logps/rejected": -2.0998551845550537, "loss": 1.9253, "nll_loss": 1.8732632398605347, "rewards/accuracies": 1.0, "rewards/chosen": -0.17574642598628998, "rewards/margins": 0.03423908352851868, "rewards/rejected": -0.20998550951480865, "step": 189 }, { "epoch": 0.2994483845547675, "grad_norm": 0.28858453035354614, "learning_rate": 3.971030604636637e-06, "log_odds_chosen": 0.27192696928977966, "log_odds_ratio": -0.570553183555603, "logits/chosen": 0.050630006939172745, "logits/rejected": -0.7770216464996338, "logps/chosen": -1.7724601030349731, "logps/rejected": -2.0022082328796387, "loss": 1.9388, "nll_loss": 1.8817503452301025, "rewards/accuracies": 0.875, "rewards/chosen": -0.17724601924419403, "rewards/margins": 0.02297479659318924, "rewards/rejected": -0.20022080838680267, "step": 190 }, { "epoch": 0.30102442868400314, "grad_norm": 0.2995285987854004, "learning_rate": 3.970090926634526e-06, "log_odds_chosen": 0.3720911741256714, "log_odds_ratio": -0.5282139778137207, "logits/chosen": 0.11160154640674591, "logits/rejected": -0.9845487475395203, "logps/chosen": -1.8277368545532227, "logps/rejected": -2.1482725143432617, "loss": 2.0007, "nll_loss": 1.9478685855865479, "rewards/accuracies": 1.0, "rewards/chosen": -0.18277369439601898, "rewards/margins": 0.032053571194410324, "rewards/rejected": -0.2148272544145584, "step": 191 }, { "epoch": 0.30260047281323876, "grad_norm": 0.3174213767051697, "learning_rate": 3.9691363655364526e-06, "log_odds_chosen": 0.4017809331417084, "log_odds_ratio": -0.5195269584655762, "logits/chosen": 0.0210281815379858, "logits/rejected": -0.7612247467041016, "logps/chosen": -1.8064415454864502, "logps/rejected": -2.152747392654419, "loss": 1.978, "nll_loss": 1.9260954856872559, "rewards/accuracies": 0.875, "rewards/chosen": -0.18064415454864502, "rewards/margins": 0.03463058918714523, "rewards/rejected": -0.21527475118637085, "step": 192 }, { "epoch": 0.3041765169424744, "grad_norm": 0.278577595949173, "learning_rate": 3.968166928553666e-06, "log_odds_chosen": 0.3343183696269989, "log_odds_ratio": -0.544468104839325, "logits/chosen": 0.06776704639196396, "logits/rejected": -0.9449382424354553, "logps/chosen": -1.7551405429840088, "logps/rejected": -2.0379421710968018, "loss": 1.9431, "nll_loss": 1.8886576890945435, "rewards/accuracies": 0.875, "rewards/chosen": -0.17551404237747192, "rewards/margins": 0.02828015573322773, "rewards/rejected": -0.2037942111492157, "step": 193 }, { "epoch": 0.30575256107171, "grad_norm": 0.2866746485233307, "learning_rate": 3.967182623009804e-06, "log_odds_chosen": 0.46259909868240356, "log_odds_ratio": -0.4940981864929199, "logits/chosen": 0.020696064457297325, "logits/rejected": -0.7941763401031494, "logps/chosen": -1.7806702852249146, "logps/rejected": -2.181614637374878, "loss": 1.9371, "nll_loss": 1.8876622915267944, "rewards/accuracies": 1.0, "rewards/chosen": -0.17806704342365265, "rewards/margins": 0.040094420313835144, "rewards/rejected": -0.218161478638649, "step": 194 }, { "epoch": 0.3073286052009456, "grad_norm": 0.30178844928741455, "learning_rate": 3.966183456340821e-06, "log_odds_chosen": 0.238200843334198, "log_odds_ratio": -0.5833666324615479, "logits/chosen": 0.06038721278309822, "logits/rejected": -0.818547785282135, "logps/chosen": -1.8105565309524536, "logps/rejected": -2.0120017528533936, "loss": 1.9804, "nll_loss": 1.922109842300415, "rewards/accuracies": 1.0, "rewards/chosen": -0.18105565011501312, "rewards/margins": 0.020144525915384293, "rewards/rejected": -0.20120017230510712, "step": 195 }, { "epoch": 0.30890464933018125, "grad_norm": 0.30308058857917786, "learning_rate": 3.965169436094947e-06, "log_odds_chosen": 0.43042975664138794, "log_odds_ratio": -0.5097436308860779, "logits/chosen": 0.0861726701259613, "logits/rejected": -0.8697912693023682, "logps/chosen": -1.7020423412322998, "logps/rejected": -2.069258451461792, "loss": 1.8798, "nll_loss": 1.828870415687561, "rewards/accuracies": 0.875, "rewards/chosen": -0.17020423710346222, "rewards/margins": 0.0367216058075428, "rewards/rejected": -0.20692585408687592, "step": 196 }, { "epoch": 0.31048069345941687, "grad_norm": 0.28854966163635254, "learning_rate": 3.964140569932618e-06, "log_odds_chosen": 0.3548341393470764, "log_odds_ratio": -0.549152135848999, "logits/chosen": 0.04638773947954178, "logits/rejected": -1.1066012382507324, "logps/chosen": -1.8225290775299072, "logps/rejected": -2.125335931777954, "loss": 1.9827, "nll_loss": 1.9277925491333008, "rewards/accuracies": 0.75, "rewards/chosen": -0.182252898812294, "rewards/margins": 0.030280686914920807, "rewards/rejected": -0.2125336080789566, "step": 197 }, { "epoch": 0.3120567375886525, "grad_norm": 0.3032393157482147, "learning_rate": 3.9630968656264285e-06, "log_odds_chosen": 0.4571669101715088, "log_odds_ratio": -0.4972551167011261, "logits/chosen": 0.05622638389468193, "logits/rejected": -0.7387775778770447, "logps/chosen": -1.7499175071716309, "logps/rejected": -2.142073154449463, "loss": 1.9064, "nll_loss": 1.8567097187042236, "rewards/accuracies": 1.0, "rewards/chosen": -0.17499174177646637, "rewards/margins": 0.039215561002492905, "rewards/rejected": -0.21420730650424957, "step": 198 }, { "epoch": 0.3136327817178881, "grad_norm": 0.300789475440979, "learning_rate": 3.962038331061065e-06, "log_odds_chosen": 0.20108665525913239, "log_odds_ratio": -0.6110847592353821, "logits/chosen": 0.13670037686824799, "logits/rejected": -0.8646966218948364, "logps/chosen": -1.8039584159851074, "logps/rejected": -1.9805593490600586, "loss": 1.9825, "nll_loss": 1.921362280845642, "rewards/accuracies": 0.625, "rewards/chosen": -0.18039584159851074, "rewards/margins": 0.017660098150372505, "rewards/rejected": -0.1980559378862381, "step": 199 }, { "epoch": 0.31520882584712373, "grad_norm": 0.28587841987609863, "learning_rate": 3.96096497423325e-06, "log_odds_chosen": 0.298963338136673, "log_odds_ratio": -0.5593155026435852, "logits/chosen": 0.16921967267990112, "logits/rejected": -0.5161201357841492, "logps/chosen": -1.6970977783203125, "logps/rejected": -1.9485868215560913, "loss": 1.8973, "nll_loss": 1.8413777351379395, "rewards/accuracies": 1.0, "rewards/chosen": -0.16970978677272797, "rewards/margins": 0.025148894637823105, "rewards/rejected": -0.19485867023468018, "step": 200 }, { "epoch": 0.31678486997635935, "grad_norm": 0.2650342285633087, "learning_rate": 3.959876803251684e-06, "log_odds_chosen": 0.42556777596473694, "log_odds_ratio": -0.5099666118621826, "logits/chosen": 0.04669623449444771, "logits/rejected": -0.791134238243103, "logps/chosen": -1.718097448348999, "logps/rejected": -2.0799450874328613, "loss": 1.8699, "nll_loss": 1.818863034248352, "rewards/accuracies": 1.0, "rewards/chosen": -0.17180974781513214, "rewards/margins": 0.03618478775024414, "rewards/rejected": -0.20799453556537628, "step": 201 }, { "epoch": 0.31836091410559497, "grad_norm": 0.2691303491592407, "learning_rate": 3.958773826336977e-06, "log_odds_chosen": 0.3119014501571655, "log_odds_ratio": -0.5508911609649658, "logits/chosen": 0.1294005662202835, "logits/rejected": -0.6723183989524841, "logps/chosen": -1.7649327516555786, "logps/rejected": -2.0296618938446045, "loss": 1.9357, "nll_loss": 1.8805636167526245, "rewards/accuracies": 1.0, "rewards/chosen": -0.17649328708648682, "rewards/margins": 0.026472903788089752, "rewards/rejected": -0.20296618342399597, "step": 202 }, { "epoch": 0.3199369582348306, "grad_norm": 0.27943408489227295, "learning_rate": 3.957656051821592e-06, "log_odds_chosen": 0.265775203704834, "log_odds_ratio": -0.5743451714515686, "logits/chosen": 0.14156483113765717, "logits/rejected": -1.0007867813110352, "logps/chosen": -1.794228434562683, "logps/rejected": -2.022888660430908, "loss": 1.9404, "nll_loss": 1.8829922676086426, "rewards/accuracies": 0.875, "rewards/chosen": -0.17942282557487488, "rewards/margins": 0.022866029292345047, "rewards/rejected": -0.20228886604309082, "step": 203 }, { "epoch": 0.3215130023640662, "grad_norm": 0.2855702042579651, "learning_rate": 3.956523488149783e-06, "log_odds_chosen": 0.3671448826789856, "log_odds_ratio": -0.5314764976501465, "logits/chosen": 0.1590101420879364, "logits/rejected": -0.7161625623703003, "logps/chosen": -1.8966209888458252, "logps/rejected": -2.216151237487793, "loss": 2.0521, "nll_loss": 1.9989579916000366, "rewards/accuracies": 1.0, "rewards/chosen": -0.18966209888458252, "rewards/margins": 0.03195303678512573, "rewards/rejected": -0.22161512076854706, "step": 204 }, { "epoch": 0.32308904649330183, "grad_norm": 0.2792893350124359, "learning_rate": 3.9553761438775285e-06, "log_odds_chosen": 0.5917212963104248, "log_odds_ratio": -0.4462578594684601, "logits/chosen": 0.04287697374820709, "logits/rejected": -0.7457516193389893, "logps/chosen": -1.629404067993164, "logps/rejected": -2.1322901248931885, "loss": 1.7862, "nll_loss": 1.741592526435852, "rewards/accuracies": 1.0, "rewards/chosen": -0.16294041275978088, "rewards/margins": 0.0502886101603508, "rewards/rejected": -0.2132290005683899, "step": 205 }, { "epoch": 0.32466509062253746, "grad_norm": 0.2581063210964203, "learning_rate": 3.954214027672465e-06, "log_odds_chosen": 0.553436815738678, "log_odds_ratio": -0.4622824788093567, "logits/chosen": 0.06957760453224182, "logits/rejected": -1.1285384893417358, "logps/chosen": -1.7171605825424194, "logps/rejected": -2.1953935623168945, "loss": 1.867, "nll_loss": 1.8207881450653076, "rewards/accuracies": 1.0, "rewards/chosen": -0.17171606421470642, "rewards/margins": 0.04782331734895706, "rewards/rejected": -0.21953937411308289, "step": 206 }, { "epoch": 0.3262411347517731, "grad_norm": 0.28155970573425293, "learning_rate": 3.953037148313825e-06, "log_odds_chosen": 0.3361928164958954, "log_odds_ratio": -0.5446957945823669, "logits/chosen": 0.1372230350971222, "logits/rejected": -0.8692892789840698, "logps/chosen": -1.7089570760726929, "logps/rejected": -1.995591640472412, "loss": 1.8908, "nll_loss": 1.8363476991653442, "rewards/accuracies": 0.875, "rewards/chosen": -0.17089569568634033, "rewards/margins": 0.028663454577326775, "rewards/rejected": -0.19955916702747345, "step": 207 }, { "epoch": 0.32781717888100864, "grad_norm": 0.2895718216896057, "learning_rate": 3.951845514692371e-06, "log_odds_chosen": 0.4320535957813263, "log_odds_ratio": -0.5092025399208069, "logits/chosen": 0.06572133302688599, "logits/rejected": -0.7182308435440063, "logps/chosen": -1.7720617055892944, "logps/rejected": -2.1444907188415527, "loss": 1.9423, "nll_loss": 1.8913987874984741, "rewards/accuracies": 1.0, "rewards/chosen": -0.1772061586380005, "rewards/margins": 0.037242915481328964, "rewards/rejected": -0.21444910764694214, "step": 208 }, { "epoch": 0.32939322301024426, "grad_norm": 0.2872414290904999, "learning_rate": 3.950639135810325e-06, "log_odds_chosen": 0.3550606369972229, "log_odds_ratio": -0.5360490083694458, "logits/chosen": 0.2004653960466385, "logits/rejected": -0.6822468042373657, "logps/chosen": -1.889462947845459, "logps/rejected": -2.1966936588287354, "loss": 2.0309, "nll_loss": 1.977307677268982, "rewards/accuracies": 0.875, "rewards/chosen": -0.18894629180431366, "rewards/margins": 0.030723070725798607, "rewards/rejected": -0.21966935694217682, "step": 209 }, { "epoch": 0.3309692671394799, "grad_norm": 0.27572956681251526, "learning_rate": 3.9494180207813044e-06, "log_odds_chosen": 0.2917708456516266, "log_odds_ratio": -0.5690155029296875, "logits/chosen": 0.18270650506019592, "logits/rejected": -0.9679818153381348, "logps/chosen": -1.759641408920288, "logps/rejected": -2.006713628768921, "loss": 1.9279, "nll_loss": 1.8709876537322998, "rewards/accuracies": 0.875, "rewards/chosen": -0.17596416175365448, "rewards/margins": 0.024707205593585968, "rewards/rejected": -0.20067137479782104, "step": 210 }, { "epoch": 0.3325453112687155, "grad_norm": 0.27592262625694275, "learning_rate": 3.948182178830249e-06, "log_odds_chosen": 0.3494272828102112, "log_odds_ratio": -0.5527153015136719, "logits/chosen": 0.17400677502155304, "logits/rejected": -1.0591527223587036, "logps/chosen": -1.7874795198440552, "logps/rejected": -2.091515302658081, "loss": 1.9384, "nll_loss": 1.883126974105835, "rewards/accuracies": 0.75, "rewards/chosen": -0.1787479668855667, "rewards/margins": 0.030403554439544678, "rewards/rejected": -0.2091515213251114, "step": 211 }, { "epoch": 0.3341213553979511, "grad_norm": 0.2815802991390228, "learning_rate": 3.9469316192933545e-06, "log_odds_chosen": 0.5855627059936523, "log_odds_ratio": -0.451069712638855, "logits/chosen": 0.05581940338015556, "logits/rejected": -0.6912811994552612, "logps/chosen": -1.643700361251831, "logps/rejected": -2.1421589851379395, "loss": 1.809, "nll_loss": 1.7639025449752808, "rewards/accuracies": 1.0, "rewards/chosen": -0.16437003016471863, "rewards/margins": 0.0498458594083786, "rewards/rejected": -0.21421588957309723, "step": 212 }, { "epoch": 0.33569739952718675, "grad_norm": 0.3026936948299408, "learning_rate": 3.945666351618001e-06, "log_odds_chosen": 0.323087215423584, "log_odds_ratio": -0.5478854775428772, "logits/chosen": 0.14214320480823517, "logits/rejected": -0.8071248531341553, "logps/chosen": -1.700455665588379, "logps/rejected": -1.9733654260635376, "loss": 1.8643, "nll_loss": 1.8095039129257202, "rewards/accuracies": 1.0, "rewards/chosen": -0.17004558444023132, "rewards/margins": 0.027290964499115944, "rewards/rejected": -0.19733653962612152, "step": 213 }, { "epoch": 0.33727344365642237, "grad_norm": 0.2719425857067108, "learning_rate": 3.9443863853626825e-06, "log_odds_chosen": 0.36333683133125305, "log_odds_ratio": -0.5374127626419067, "logits/chosen": 0.16779878735542297, "logits/rejected": -0.8444754481315613, "logps/chosen": -1.6595702171325684, "logps/rejected": -1.9579966068267822, "loss": 1.8462, "nll_loss": 1.7924606800079346, "rewards/accuracies": 0.875, "rewards/chosen": -0.1659570336341858, "rewards/margins": 0.029842618852853775, "rewards/rejected": -0.19579966366291046, "step": 214 }, { "epoch": 0.338849487785658, "grad_norm": 0.26499754190444946, "learning_rate": 3.943091730196931e-06, "log_odds_chosen": 0.32084012031555176, "log_odds_ratio": -0.5536172986030579, "logits/chosen": 0.2257343828678131, "logits/rejected": -0.6080964803695679, "logps/chosen": -1.6629269123077393, "logps/rejected": -1.9345366954803467, "loss": 1.8348, "nll_loss": 1.7794655561447144, "rewards/accuracies": 1.0, "rewards/chosen": -0.1662926971912384, "rewards/margins": 0.02716096118092537, "rewards/rejected": -0.19345368444919586, "step": 215 }, { "epoch": 0.3404255319148936, "grad_norm": 0.2940061092376709, "learning_rate": 3.941782395901249e-06, "log_odds_chosen": 0.3910900950431824, "log_odds_ratio": -0.522650420665741, "logits/chosen": 0.13361163437366486, "logits/rejected": -0.7325764298439026, "logps/chosen": -1.7365449666976929, "logps/rejected": -2.0691425800323486, "loss": 1.9089, "nll_loss": 1.8566128015518188, "rewards/accuracies": 0.875, "rewards/chosen": -0.17365451157093048, "rewards/margins": 0.033259760588407516, "rewards/rejected": -0.2069142609834671, "step": 216 }, { "epoch": 0.34200157604412923, "grad_norm": 0.2855311334133148, "learning_rate": 3.940458392367032e-06, "log_odds_chosen": 0.359374463558197, "log_odds_ratio": -0.533739447593689, "logits/chosen": 0.17437973618507385, "logits/rejected": -1.037083387374878, "logps/chosen": -1.79496431350708, "logps/rejected": -2.1024041175842285, "loss": 1.9325, "nll_loss": 1.8790829181671143, "rewards/accuracies": 1.0, "rewards/chosen": -0.17949643731117249, "rewards/margins": 0.030744003131985664, "rewards/rejected": -0.2102404236793518, "step": 217 }, { "epoch": 0.34357762017336485, "grad_norm": 0.27844125032424927, "learning_rate": 3.939119729596493e-06, "log_odds_chosen": 0.400441437959671, "log_odds_ratio": -0.5161818265914917, "logits/chosen": 0.2301069051027298, "logits/rejected": -0.8860409259796143, "logps/chosen": -1.7927645444869995, "logps/rejected": -2.137216567993164, "loss": 1.9434, "nll_loss": 1.89176344871521, "rewards/accuracies": 1.0, "rewards/chosen": -0.1792764663696289, "rewards/margins": 0.034445181488990784, "rewards/rejected": -0.2137216329574585, "step": 218 }, { "epoch": 0.34515366430260047, "grad_norm": 0.26965805888175964, "learning_rate": 3.93776641770259e-06, "log_odds_chosen": 0.328607439994812, "log_odds_ratio": -0.5486437082290649, "logits/chosen": 0.20194567739963531, "logits/rejected": -0.8480868935585022, "logps/chosen": -1.7666839361190796, "logps/rejected": -2.047614097595215, "loss": 1.9141, "nll_loss": 1.859285593032837, "rewards/accuracies": 1.0, "rewards/chosen": -0.17666839063167572, "rewards/margins": 0.028093010187149048, "rewards/rejected": -0.20476141571998596, "step": 219 }, { "epoch": 0.3467297084318361, "grad_norm": 0.26573604345321655, "learning_rate": 3.93639846690895e-06, "log_odds_chosen": 0.47803783416748047, "log_odds_ratio": -0.4869340658187866, "logits/chosen": 0.22956933081150055, "logits/rejected": -1.1470171213150024, "logps/chosen": -1.7806720733642578, "logps/rejected": -2.194432258605957, "loss": 1.9229, "nll_loss": 1.8741583824157715, "rewards/accuracies": 1.0, "rewards/chosen": -0.17806722223758698, "rewards/margins": 0.04137600213289261, "rewards/rejected": -0.21944323182106018, "step": 220 }, { "epoch": 0.3483057525610717, "grad_norm": 0.27221164107322693, "learning_rate": 3.9350158875497855e-06, "log_odds_chosen": 0.5355910062789917, "log_odds_ratio": -0.4647471606731415, "logits/chosen": 0.2502666711807251, "logits/rejected": -0.9178094863891602, "logps/chosen": -1.693802833557129, "logps/rejected": -2.1492807865142822, "loss": 1.851, "nll_loss": 1.8045127391815186, "rewards/accuracies": 1.0, "rewards/chosen": -0.1693802773952484, "rewards/margins": 0.04554780200123787, "rewards/rejected": -0.21492807567119598, "step": 221 }, { "epoch": 0.34988179669030733, "grad_norm": 0.2870781719684601, "learning_rate": 3.933618690069824e-06, "log_odds_chosen": 0.514340341091156, "log_odds_ratio": -0.4729236364364624, "logits/chosen": 0.1029556393623352, "logits/rejected": -0.9085869789123535, "logps/chosen": -1.63947331905365, "logps/rejected": -2.0694899559020996, "loss": 1.8078, "nll_loss": 1.760536789894104, "rewards/accuracies": 1.0, "rewards/chosen": -0.16394734382629395, "rewards/margins": 0.04300164431333542, "rewards/rejected": -0.20694898068904877, "step": 222 }, { "epoch": 0.35145784081954295, "grad_norm": 0.26160404086112976, "learning_rate": 3.932206885024226e-06, "log_odds_chosen": 0.2591567635536194, "log_odds_ratio": -0.5751827359199524, "logits/chosen": 0.14362215995788574, "logits/rejected": -0.8560265898704529, "logps/chosen": -1.7445533275604248, "logps/rejected": -1.963904857635498, "loss": 1.8865, "nll_loss": 1.82896888256073, "rewards/accuracies": 0.875, "rewards/chosen": -0.17445534467697144, "rewards/margins": 0.021935151889920235, "rewards/rejected": -0.19639047980308533, "step": 223 }, { "epoch": 0.3530338849487786, "grad_norm": 0.24491266906261444, "learning_rate": 3.930780483078502e-06, "log_odds_chosen": 0.39663180708885193, "log_odds_ratio": -0.5192946195602417, "logits/chosen": 0.13351722061634064, "logits/rejected": -0.8884856104850769, "logps/chosen": -1.6729098558425903, "logps/rejected": -2.0054969787597656, "loss": 1.8405, "nll_loss": 1.7885487079620361, "rewards/accuracies": 1.0, "rewards/chosen": -0.16729097068309784, "rewards/margins": 0.03325873613357544, "rewards/rejected": -0.20054970681667328, "step": 224 }, { "epoch": 0.3546099290780142, "grad_norm": 0.26580068469047546, "learning_rate": 3.92933949500844e-06, "log_odds_chosen": 0.4522182047367096, "log_odds_ratio": -0.5091683268547058, "logits/chosen": 0.20834729075431824, "logits/rejected": -0.8979159593582153, "logps/chosen": -1.6526035070419312, "logps/rejected": -2.027092933654785, "loss": 1.8166, "nll_loss": 1.765660285949707, "rewards/accuracies": 0.875, "rewards/chosen": -0.16526035964488983, "rewards/margins": 0.0374489426612854, "rewards/rejected": -0.20270927250385284, "step": 225 }, { "epoch": 0.3561859732072498, "grad_norm": 0.2466343641281128, "learning_rate": 3.9278839317000155e-06, "log_odds_chosen": 0.4413149952888489, "log_odds_ratio": -0.502187192440033, "logits/chosen": 0.22558066248893738, "logits/rejected": -0.8436870574951172, "logps/chosen": -1.676627516746521, "logps/rejected": -2.049926519393921, "loss": 1.8305, "nll_loss": 1.7802934646606445, "rewards/accuracies": 1.0, "rewards/chosen": -0.16766275465488434, "rewards/margins": 0.037329915910959244, "rewards/rejected": -0.20499266684055328, "step": 226 }, { "epoch": 0.35776201733648544, "grad_norm": 0.2658051550388336, "learning_rate": 3.926413804149314e-06, "log_odds_chosen": 0.179185152053833, "log_odds_ratio": -0.6169071793556213, "logits/chosen": 0.24110640585422516, "logits/rejected": -0.8408032059669495, "logps/chosen": -1.758495569229126, "logps/rejected": -1.9097925424575806, "loss": 1.9105, "nll_loss": 1.8488125801086426, "rewards/accuracies": 0.625, "rewards/chosen": -0.1758495569229126, "rewards/margins": 0.015129687264561653, "rewards/rejected": -0.1909792423248291, "step": 227 }, { "epoch": 0.35933806146572106, "grad_norm": 0.26918303966522217, "learning_rate": 3.924929123462447e-06, "log_odds_chosen": 0.33828845620155334, "log_odds_ratio": -0.5443422794342041, "logits/chosen": 0.2773420810699463, "logits/rejected": -0.8576438426971436, "logps/chosen": -1.7860910892486572, "logps/rejected": -2.0751004219055176, "loss": 1.9309, "nll_loss": 1.876507043838501, "rewards/accuracies": 0.875, "rewards/chosen": -0.17860911786556244, "rewards/margins": 0.028900934383273125, "rewards/rejected": -0.20751003921031952, "step": 228 }, { "epoch": 0.3609141055949567, "grad_norm": 0.27900397777557373, "learning_rate": 3.923429900855468e-06, "log_odds_chosen": 0.26649370789527893, "log_odds_ratio": -0.5749764442443848, "logits/chosen": 0.2404632568359375, "logits/rejected": -0.6364312767982483, "logps/chosen": -1.857352375984192, "logps/rejected": -2.0843093395233154, "loss": 2.0045, "nll_loss": 1.9470313787460327, "rewards/accuracies": 0.875, "rewards/chosen": -0.18573524057865143, "rewards/margins": 0.02269568480551243, "rewards/rejected": -0.2084309309720993, "step": 229 }, { "epoch": 0.3624901497241923, "grad_norm": 0.2622164189815521, "learning_rate": 3.921916147654287e-06, "log_odds_chosen": 0.5030225515365601, "log_odds_ratio": -0.4795306324958801, "logits/chosen": 0.19567719101905823, "logits/rejected": -0.8917854428291321, "logps/chosen": -1.693268895149231, "logps/rejected": -2.1187849044799805, "loss": 1.829, "nll_loss": 1.7810728549957275, "rewards/accuracies": 1.0, "rewards/chosen": -0.16932690143585205, "rewards/margins": 0.04255159944295883, "rewards/rejected": -0.21187849342823029, "step": 230 }, { "epoch": 0.3640661938534279, "grad_norm": 0.25981763005256653, "learning_rate": 3.920387875294588e-06, "log_odds_chosen": 0.34487566351890564, "log_odds_ratio": -0.5391297936439514, "logits/chosen": 0.22495216131210327, "logits/rejected": -0.7459501028060913, "logps/chosen": -1.6685864925384521, "logps/rejected": -1.9565632343292236, "loss": 1.8216, "nll_loss": 1.7676771879196167, "rewards/accuracies": 1.0, "rewards/chosen": -0.16685864329338074, "rewards/margins": 0.028797684237360954, "rewards/rejected": -0.19565632939338684, "step": 231 }, { "epoch": 0.3656422379826635, "grad_norm": 0.2565964460372925, "learning_rate": 3.918845095321737e-06, "log_odds_chosen": 0.32233738899230957, "log_odds_ratio": -0.5548774003982544, "logits/chosen": 0.19658507406711578, "logits/rejected": -0.9954935908317566, "logps/chosen": -1.6999584436416626, "logps/rejected": -1.9643486738204956, "loss": 1.8453, "nll_loss": 1.7898164987564087, "rewards/accuracies": 0.875, "rewards/chosen": -0.16999585926532745, "rewards/margins": 0.026439011096954346, "rewards/rejected": -0.1964348405599594, "step": 232 }, { "epoch": 0.3672182821118991, "grad_norm": 0.26710182428359985, "learning_rate": 3.9172878193907004e-06, "log_odds_chosen": 0.36338526010513306, "log_odds_ratio": -0.5375514030456543, "logits/chosen": 0.13183602690696716, "logits/rejected": -0.9046310782432556, "logps/chosen": -1.772655963897705, "logps/rejected": -2.0851950645446777, "loss": 1.9319, "nll_loss": 1.8781368732452393, "rewards/accuracies": 0.875, "rewards/chosen": -0.17726561427116394, "rewards/margins": 0.03125389292836189, "rewards/rejected": -0.20851948857307434, "step": 233 }, { "epoch": 0.36879432624113473, "grad_norm": 0.252542108297348, "learning_rate": 3.915716059265955e-06, "log_odds_chosen": 0.3764590620994568, "log_odds_ratio": -0.5301459431648254, "logits/chosen": 0.2072755992412567, "logits/rejected": -0.9679795503616333, "logps/chosen": -1.7244305610656738, "logps/rejected": -2.0450494289398193, "loss": 1.8711, "nll_loss": 1.8181140422821045, "rewards/accuracies": 0.875, "rewards/chosen": -0.17244306206703186, "rewards/margins": 0.032061897218227386, "rewards/rejected": -0.20450495183467865, "step": 234 }, { "epoch": 0.37037037037037035, "grad_norm": 0.23418840765953064, "learning_rate": 3.9141298268213966e-06, "log_odds_chosen": 0.47056907415390015, "log_odds_ratio": -0.48991110920906067, "logits/chosen": 0.2516101002693176, "logits/rejected": -0.8004311323165894, "logps/chosen": -1.643180251121521, "logps/rejected": -2.04105806350708, "loss": 1.7967, "nll_loss": 1.7476621866226196, "rewards/accuracies": 1.0, "rewards/chosen": -0.1643180400133133, "rewards/margins": 0.039787761867046356, "rewards/rejected": -0.20410577952861786, "step": 235 }, { "epoch": 0.37194641449960597, "grad_norm": 0.26616281270980835, "learning_rate": 3.912529134040255e-06, "log_odds_chosen": 0.3272354304790497, "log_odds_ratio": -0.5463756918907166, "logits/chosen": 0.23501628637313843, "logits/rejected": -0.7225480079650879, "logps/chosen": -1.7243411540985107, "logps/rejected": -2.002100706100464, "loss": 1.8846, "nll_loss": 1.8299776315689087, "rewards/accuracies": 1.0, "rewards/chosen": -0.17243412137031555, "rewards/margins": 0.027775948867201805, "rewards/rejected": -0.2002100646495819, "step": 236 }, { "epoch": 0.3735224586288416, "grad_norm": 0.2716814875602722, "learning_rate": 3.910913993014998e-06, "log_odds_chosen": 0.48933014273643494, "log_odds_ratio": -0.48118287324905396, "logits/chosen": 0.3107871413230896, "logits/rejected": -0.8831154704093933, "logps/chosen": -1.7238764762878418, "logps/rejected": -2.1423754692077637, "loss": 1.8692, "nll_loss": 1.8211053609848022, "rewards/accuracies": 1.0, "rewards/chosen": -0.17238764464855194, "rewards/margins": 0.041849926114082336, "rewards/rejected": -0.21423755586147308, "step": 237 }, { "epoch": 0.3750985027580772, "grad_norm": 0.2618771493434906, "learning_rate": 3.909284415947246e-06, "log_odds_chosen": 0.47179070115089417, "log_odds_ratio": -0.4907287359237671, "logits/chosen": 0.3078673183917999, "logits/rejected": -0.9788669347763062, "logps/chosen": -1.7702430486679077, "logps/rejected": -2.173584461212158, "loss": 1.9009, "nll_loss": 1.851858139038086, "rewards/accuracies": 1.0, "rewards/chosen": -0.17702430486679077, "rewards/margins": 0.04033412039279938, "rewards/rejected": -0.21735844016075134, "step": 238 }, { "epoch": 0.37667454688731283, "grad_norm": 0.24487651884555817, "learning_rate": 3.907640415147674e-06, "log_odds_chosen": 0.5142653584480286, "log_odds_ratio": -0.47312235832214355, "logits/chosen": 0.17608575522899628, "logits/rejected": -1.0495761632919312, "logps/chosen": -1.6641782522201538, "logps/rejected": -2.098402738571167, "loss": 1.8178, "nll_loss": 1.7705227136611938, "rewards/accuracies": 1.0, "rewards/chosen": -0.16641782224178314, "rewards/margins": 0.04342243820428848, "rewards/rejected": -0.20984025299549103, "step": 239 }, { "epoch": 0.37825059101654845, "grad_norm": 0.2650536000728607, "learning_rate": 3.905982003035924e-06, "log_odds_chosen": 0.4236319363117218, "log_odds_ratio": -0.5087918043136597, "logits/chosen": 0.20158736407756805, "logits/rejected": -0.9927594661712646, "logps/chosen": -1.7691205739974976, "logps/rejected": -2.132444143295288, "loss": 1.9045, "nll_loss": 1.853621006011963, "rewards/accuracies": 1.0, "rewards/chosen": -0.17691205441951752, "rewards/margins": 0.03633236512541771, "rewards/rejected": -0.21324442327022552, "step": 240 }, { "epoch": 0.3798266351457841, "grad_norm": 0.26103925704956055, "learning_rate": 3.904309192140506e-06, "log_odds_chosen": 0.2944376468658447, "log_odds_ratio": -0.564617931842804, "logits/chosen": 0.21214058995246887, "logits/rejected": -0.6803040504455566, "logps/chosen": -1.6910583972930908, "logps/rejected": -1.9392961263656616, "loss": 1.8418, "nll_loss": 1.7853529453277588, "rewards/accuracies": 0.875, "rewards/chosen": -0.16910582780838013, "rewards/margins": 0.024823768064379692, "rewards/rejected": -0.19392961263656616, "step": 241 }, { "epoch": 0.3814026792750197, "grad_norm": 0.2756460905075073, "learning_rate": 3.90262199509871e-06, "log_odds_chosen": 0.31087297201156616, "log_odds_ratio": -0.5524182319641113, "logits/chosen": 0.2239079773426056, "logits/rejected": -0.7127591371536255, "logps/chosen": -1.7215018272399902, "logps/rejected": -1.9822158813476562, "loss": 1.8763, "nll_loss": 1.8210104703903198, "rewards/accuracies": 1.0, "rewards/chosen": -0.17215019464492798, "rewards/margins": 0.026071399450302124, "rewards/rejected": -0.1982215791940689, "step": 242 }, { "epoch": 0.3829787234042553, "grad_norm": 0.25208762288093567, "learning_rate": 3.900920424656501e-06, "log_odds_chosen": 0.4542803168296814, "log_odds_ratio": -0.4980185031890869, "logits/chosen": 0.19997750222682953, "logits/rejected": -0.8956844806671143, "logps/chosen": -1.6321688890457153, "logps/rejected": -2.0129523277282715, "loss": 1.777, "nll_loss": 1.7271740436553955, "rewards/accuracies": 0.875, "rewards/chosen": -0.16321690380573273, "rewards/margins": 0.038078341633081436, "rewards/rejected": -0.20129524171352386, "step": 243 }, { "epoch": 0.38455476753349094, "grad_norm": 0.275621235370636, "learning_rate": 3.899204493668432e-06, "log_odds_chosen": 0.3811621069908142, "log_odds_ratio": -0.5312854647636414, "logits/chosen": 0.27042973041534424, "logits/rejected": -0.8135743141174316, "logps/chosen": -1.7932356595993042, "logps/rejected": -2.1242425441741943, "loss": 1.9275, "nll_loss": 1.8743923902511597, "rewards/accuracies": 0.875, "rewards/chosen": -0.17932356894016266, "rewards/margins": 0.0331006720662117, "rewards/rejected": -0.21242424845695496, "step": 244 }, { "epoch": 0.38613081166272656, "grad_norm": 0.269351065158844, "learning_rate": 3.897474215097542e-06, "log_odds_chosen": 0.37562695145606995, "log_odds_ratio": -0.5268362760543823, "logits/chosen": 0.30195605754852295, "logits/rejected": -1.0125755071640015, "logps/chosen": -1.7352933883666992, "logps/rejected": -2.0556907653808594, "loss": 1.891, "nll_loss": 1.8383123874664307, "rewards/accuracies": 1.0, "rewards/chosen": -0.17352935671806335, "rewards/margins": 0.03203972801566124, "rewards/rejected": -0.2055690735578537, "step": 245 }, { "epoch": 0.3877068557919622, "grad_norm": 0.24685640633106232, "learning_rate": 3.8957296020152596e-06, "log_odds_chosen": 0.4886430501937866, "log_odds_ratio": -0.48388606309890747, "logits/chosen": 0.29761719703674316, "logits/rejected": -0.8692208528518677, "logps/chosen": -1.6982948780059814, "logps/rejected": -2.1153006553649902, "loss": 1.8491, "nll_loss": 1.8006938695907593, "rewards/accuracies": 1.0, "rewards/chosen": -0.16982948780059814, "rewards/margins": 0.04170059412717819, "rewards/rejected": -0.21153007447719574, "step": 246 }, { "epoch": 0.3892828999211978, "grad_norm": 0.2664787173271179, "learning_rate": 3.893970667601303e-06, "log_odds_chosen": 0.3381783068180084, "log_odds_ratio": -0.5428158044815063, "logits/chosen": 0.19423866271972656, "logits/rejected": -1.1145470142364502, "logps/chosen": -1.783911943435669, "logps/rejected": -2.072521924972534, "loss": 1.9215, "nll_loss": 1.8672170639038086, "rewards/accuracies": 1.0, "rewards/chosen": -0.1783912032842636, "rewards/margins": 0.02886100485920906, "rewards/rejected": -0.20725220441818237, "step": 247 }, { "epoch": 0.3908589440504334, "grad_norm": 0.263099730014801, "learning_rate": 3.892197425143581e-06, "log_odds_chosen": 0.50048828125, "log_odds_ratio": -0.4782467186450958, "logits/chosen": 0.2478259652853012, "logits/rejected": -0.7359952926635742, "logps/chosen": -1.708917498588562, "logps/rejected": -2.134807825088501, "loss": 1.8431, "nll_loss": 1.7952405214309692, "rewards/accuracies": 1.0, "rewards/chosen": -0.17089174687862396, "rewards/margins": 0.042589038610458374, "rewards/rejected": -0.21348077058792114, "step": 248 }, { "epoch": 0.39243498817966904, "grad_norm": 0.2791616916656494, "learning_rate": 3.890409888038094e-06, "log_odds_chosen": 0.387483149766922, "log_odds_ratio": -0.5229809284210205, "logits/chosen": 0.2733336091041565, "logits/rejected": -0.7646632194519043, "logps/chosen": -1.7522411346435547, "logps/rejected": -2.0853376388549805, "loss": 1.8984, "nll_loss": 1.8461503982543945, "rewards/accuracies": 1.0, "rewards/chosen": -0.17522412538528442, "rewards/margins": 0.03330962732434273, "rewards/rejected": -0.20853373408317566, "step": 249 }, { "epoch": 0.39401103230890466, "grad_norm": 0.30953434109687805, "learning_rate": 3.888608069788831e-06, "log_odds_chosen": 0.35872530937194824, "log_odds_ratio": -0.5424368381500244, "logits/chosen": 0.2008129060268402, "logits/rejected": -0.6498639583587646, "logps/chosen": -1.7074558734893799, "logps/rejected": -2.015054702758789, "loss": 1.8569, "nll_loss": 1.8026765584945679, "rewards/accuracies": 0.875, "rewards/chosen": -0.1707455813884735, "rewards/margins": 0.030759908258914948, "rewards/rejected": -0.20150548219680786, "step": 250 }, { "epoch": 0.3955870764381403, "grad_norm": 0.24580441415309906, "learning_rate": 3.8867919840076685e-06, "log_odds_chosen": 0.44406554102897644, "log_odds_ratio": -0.49969637393951416, "logits/chosen": 0.23540851473808289, "logits/rejected": -0.936524510383606, "logps/chosen": -1.6349210739135742, "logps/rejected": -2.0072009563446045, "loss": 1.7722, "nll_loss": 1.7221910953521729, "rewards/accuracies": 1.0, "rewards/chosen": -0.1634921282529831, "rewards/margins": 0.03722797706723213, "rewards/rejected": -0.20072008669376373, "step": 251 }, { "epoch": 0.3971631205673759, "grad_norm": 0.2601582407951355, "learning_rate": 3.884961644414267e-06, "log_odds_chosen": 0.35995495319366455, "log_odds_ratio": -0.5312166213989258, "logits/chosen": 0.2544477581977844, "logits/rejected": -0.4497869908809662, "logps/chosen": -1.7571417093276978, "logps/rejected": -2.0635859966278076, "loss": 1.8765, "nll_loss": 1.8233567476272583, "rewards/accuracies": 1.0, "rewards/chosen": -0.17571419477462769, "rewards/margins": 0.030644403770565987, "rewards/rejected": -0.20635858178138733, "step": 252 }, { "epoch": 0.3987391646966115, "grad_norm": 0.25563278794288635, "learning_rate": 3.883117064835967e-06, "log_odds_chosen": 0.42896199226379395, "log_odds_ratio": -0.5130375027656555, "logits/chosen": 0.2095140814781189, "logits/rejected": -0.7717199325561523, "logps/chosen": -1.6954660415649414, "logps/rejected": -2.0621328353881836, "loss": 1.8451, "nll_loss": 1.7938411235809326, "rewards/accuracies": 0.875, "rewards/chosen": -0.16954661905765533, "rewards/margins": 0.03666667640209198, "rewards/rejected": -0.20621328055858612, "step": 253 }, { "epoch": 0.40031520882584715, "grad_norm": 0.25425294041633606, "learning_rate": 3.881258259207688e-06, "log_odds_chosen": 0.3588080406188965, "log_odds_ratio": -0.5354359149932861, "logits/chosen": 0.3078593313694, "logits/rejected": -0.8438656330108643, "logps/chosen": -1.6679356098175049, "logps/rejected": -1.9688389301300049, "loss": 1.7965, "nll_loss": 1.7429429292678833, "rewards/accuracies": 0.875, "rewards/chosen": -0.1667935699224472, "rewards/margins": 0.030090318992733955, "rewards/rejected": -0.196883887052536, "step": 254 }, { "epoch": 0.40189125295508277, "grad_norm": 0.2434053122997284, "learning_rate": 3.8793852415718165e-06, "log_odds_chosen": 0.4722547233104706, "log_odds_ratio": -0.4863814413547516, "logits/chosen": 0.23671665787696838, "logits/rejected": -1.1288728713989258, "logps/chosen": -1.5909440517425537, "logps/rejected": -1.9825053215026855, "loss": 1.7232, "nll_loss": 1.6745716333389282, "rewards/accuracies": 1.0, "rewards/chosen": -0.1590944230556488, "rewards/margins": 0.039156101644039154, "rewards/rejected": -0.19825051724910736, "step": 255 }, { "epoch": 0.4034672970843184, "grad_norm": 0.24645063281059265, "learning_rate": 3.877498026078107e-06, "log_odds_chosen": 0.28664523363113403, "log_odds_ratio": -0.5660127401351929, "logits/chosen": 0.24130457639694214, "logits/rejected": -0.9756399393081665, "logps/chosen": -1.7283720970153809, "logps/rejected": -1.97283935546875, "loss": 1.8639, "nll_loss": 1.8072566986083984, "rewards/accuracies": 0.875, "rewards/chosen": -0.17283721268177032, "rewards/margins": 0.02444673888385296, "rewards/rejected": -0.19728393852710724, "step": 256 }, { "epoch": 0.40504334121355395, "grad_norm": 0.24935108423233032, "learning_rate": 3.875596626983573e-06, "log_odds_chosen": 0.4969295561313629, "log_odds_ratio": -0.4879858195781708, "logits/chosen": 0.2646740972995758, "logits/rejected": -0.6229531764984131, "logps/chosen": -1.6504206657409668, "logps/rejected": -2.0679101943969727, "loss": 1.7856, "nll_loss": 1.7367937564849854, "rewards/accuracies": 1.0, "rewards/chosen": -0.16504207253456116, "rewards/margins": 0.041748929768800735, "rewards/rejected": -0.2067909985780716, "step": 257 }, { "epoch": 0.4066193853427896, "grad_norm": 0.25852248072624207, "learning_rate": 3.873681058652374e-06, "log_odds_chosen": 0.3059096038341522, "log_odds_ratio": -0.5598926544189453, "logits/chosen": 0.2584393620491028, "logits/rejected": -0.7289465665817261, "logps/chosen": -1.7720271348953247, "logps/rejected": -2.033935308456421, "loss": 1.905, "nll_loss": 1.8490136861801147, "rewards/accuracies": 1.0, "rewards/chosen": -0.1772027164697647, "rewards/margins": 0.026190834119915962, "rewards/rejected": -0.20339354872703552, "step": 258 }, { "epoch": 0.4081954294720252, "grad_norm": 0.2675853967666626, "learning_rate": 3.871751335555715e-06, "log_odds_chosen": 0.28866899013519287, "log_odds_ratio": -0.5614006519317627, "logits/chosen": 0.23755401372909546, "logits/rejected": -0.8612145781517029, "logps/chosen": -1.7061153650283813, "logps/rejected": -1.9488041400909424, "loss": 1.8347, "nll_loss": 1.7785265445709229, "rewards/accuracies": 1.0, "rewards/chosen": -0.17061154544353485, "rewards/margins": 0.024268897250294685, "rewards/rejected": -0.1948804259300232, "step": 259 }, { "epoch": 0.4097714736012608, "grad_norm": 0.23187151551246643, "learning_rate": 3.869807472271731e-06, "log_odds_chosen": 0.36366868019104004, "log_odds_ratio": -0.5309762358665466, "logits/chosen": 0.2599530816078186, "logits/rejected": -0.7904383540153503, "logps/chosen": -1.6728236675262451, "logps/rejected": -1.9797537326812744, "loss": 1.834, "nll_loss": 1.7808889150619507, "rewards/accuracies": 1.0, "rewards/chosen": -0.167282372713089, "rewards/margins": 0.03069300577044487, "rewards/rejected": -0.19797536730766296, "step": 260 }, { "epoch": 0.41134751773049644, "grad_norm": 0.23739495873451233, "learning_rate": 3.8678494834853826e-06, "log_odds_chosen": 0.3631543219089508, "log_odds_ratio": -0.534823477268219, "logits/chosen": 0.23579055070877075, "logits/rejected": -0.7249607443809509, "logps/chosen": -1.6771705150604248, "logps/rejected": -1.9804139137268066, "loss": 1.8111, "nll_loss": 1.7576665878295898, "rewards/accuracies": 1.0, "rewards/chosen": -0.1677170693874359, "rewards/margins": 0.03032434731721878, "rewards/rejected": -0.1980414092540741, "step": 261 }, { "epoch": 0.41292356185973206, "grad_norm": 0.2685432434082031, "learning_rate": 3.865877383988339e-06, "log_odds_chosen": 0.45002633333206177, "log_odds_ratio": -0.4997020959854126, "logits/chosen": 0.24803175032138824, "logits/rejected": -0.6998909711837769, "logps/chosen": -1.7094076871871948, "logps/rejected": -2.0885047912597656, "loss": 1.8626, "nll_loss": 1.8126096725463867, "rewards/accuracies": 1.0, "rewards/chosen": -0.17094077169895172, "rewards/margins": 0.03790968656539917, "rewards/rejected": -0.2088504433631897, "step": 262 }, { "epoch": 0.4144996059889677, "grad_norm": 0.25303518772125244, "learning_rate": 3.863891188678869e-06, "log_odds_chosen": 0.18005381524562836, "log_odds_ratio": -0.6181609630584717, "logits/chosen": 0.20075806975364685, "logits/rejected": -0.8267990946769714, "logps/chosen": -1.6197738647460938, "logps/rejected": -1.7719405889511108, "loss": 1.7674, "nll_loss": 1.7055647373199463, "rewards/accuracies": 0.75, "rewards/chosen": -0.16197741031646729, "rewards/margins": 0.015216664411127567, "rewards/rejected": -0.17719407379627228, "step": 263 }, { "epoch": 0.4160756501182033, "grad_norm": 0.241813063621521, "learning_rate": 3.8618909125617305e-06, "log_odds_chosen": 0.5345890522003174, "log_odds_ratio": -0.464138925075531, "logits/chosen": 0.2237018644809723, "logits/rejected": -1.0322611331939697, "logps/chosen": -1.6182307004928589, "logps/rejected": -2.0676519870758057, "loss": 1.7704, "nll_loss": 1.7239396572113037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1618230640888214, "rewards/margins": 0.04494212567806244, "rewards/rejected": -0.20676518976688385, "step": 264 }, { "epoch": 0.4176516942474389, "grad_norm": 0.23744823038578033, "learning_rate": 3.859876570748054e-06, "log_odds_chosen": 0.36543500423431396, "log_odds_ratio": -0.5293257832527161, "logits/chosen": 0.2739250063896179, "logits/rejected": -0.3711114525794983, "logps/chosen": -1.666553020477295, "logps/rejected": -1.9728542566299438, "loss": 1.818, "nll_loss": 1.7650859355926514, "rewards/accuracies": 1.0, "rewards/chosen": -0.16665531694889069, "rewards/margins": 0.03063010796904564, "rewards/rejected": -0.19728542864322662, "step": 265 }, { "epoch": 0.41922773837667454, "grad_norm": 0.23207531869411469, "learning_rate": 3.857848178455231e-06, "log_odds_chosen": 0.14971187710762024, "log_odds_ratio": -0.6274988055229187, "logits/chosen": 0.2907387912273407, "logits/rejected": -1.0335592031478882, "logps/chosen": -1.684551477432251, "logps/rejected": -1.8142600059509277, "loss": 1.8511, "nll_loss": 1.788321614265442, "rewards/accuracies": 0.75, "rewards/chosen": -0.16845515370368958, "rewards/margins": 0.012970849871635437, "rewards/rejected": -0.1814260184764862, "step": 266 }, { "epoch": 0.42080378250591016, "grad_norm": 0.26370444893836975, "learning_rate": 3.855805751006794e-06, "log_odds_chosen": 0.5131804943084717, "log_odds_ratio": -0.4791393280029297, "logits/chosen": 0.27102935314178467, "logits/rejected": -0.9959600567817688, "logps/chosen": -1.7070389986038208, "logps/rejected": -2.1433634757995605, "loss": 1.8335, "nll_loss": 1.7856334447860718, "rewards/accuracies": 1.0, "rewards/chosen": -0.17070390284061432, "rewards/margins": 0.043632443994283676, "rewards/rejected": -0.2143363505601883, "step": 267 }, { "epoch": 0.4223798266351458, "grad_norm": 0.22576405107975006, "learning_rate": 3.853749303832308e-06, "log_odds_chosen": 0.3635735809803009, "log_odds_ratio": -0.5336201190948486, "logits/chosen": 0.23854336142539978, "logits/rejected": -0.5814566016197205, "logps/chosen": -1.6284022331237793, "logps/rejected": -1.9309828281402588, "loss": 1.7683, "nll_loss": 1.7149466276168823, "rewards/accuracies": 1.0, "rewards/chosen": -0.16284021735191345, "rewards/margins": 0.030258052051067352, "rewards/rejected": -0.1930982619524002, "step": 268 }, { "epoch": 0.4239558707643814, "grad_norm": 0.25254911184310913, "learning_rate": 3.8516788524672495e-06, "log_odds_chosen": 0.2377062886953354, "log_odds_ratio": -0.5871192216873169, "logits/chosen": 0.2799081802368164, "logits/rejected": -0.9253388047218323, "logps/chosen": -1.7134755849838257, "logps/rejected": -1.9147106409072876, "loss": 1.8458, "nll_loss": 1.7871187925338745, "rewards/accuracies": 0.875, "rewards/chosen": -0.17134755849838257, "rewards/margins": 0.020123496651649475, "rewards/rejected": -0.19147105515003204, "step": 269 }, { "epoch": 0.425531914893617, "grad_norm": 0.2403043806552887, "learning_rate": 3.849594412552889e-06, "log_odds_chosen": 0.3609575927257538, "log_odds_ratio": -0.53483647108078, "logits/chosen": 0.3030107021331787, "logits/rejected": -0.6186437606811523, "logps/chosen": -1.614844560623169, "logps/rejected": -1.9084908962249756, "loss": 1.7544, "nll_loss": 1.7009284496307373, "rewards/accuracies": 1.0, "rewards/chosen": -0.16148445010185242, "rewards/margins": 0.029364656656980515, "rewards/rejected": -0.19084911048412323, "step": 270 }, { "epoch": 0.42710795902285265, "grad_norm": 0.2465418577194214, "learning_rate": 3.847495999836175e-06, "log_odds_chosen": 0.36459216475486755, "log_odds_ratio": -0.5316565036773682, "logits/chosen": 0.32157933712005615, "logits/rejected": -0.9023212194442749, "logps/chosen": -1.6977217197418213, "logps/rejected": -2.005786895751953, "loss": 1.8233, "nll_loss": 1.77018404006958, "rewards/accuracies": 1.0, "rewards/chosen": -0.1697721779346466, "rewards/margins": 0.030806517228484154, "rewards/rejected": -0.2005787044763565, "step": 271 }, { "epoch": 0.42868400315208827, "grad_norm": 0.2286456674337387, "learning_rate": 3.845383630169613e-06, "log_odds_chosen": 0.33313167095184326, "log_odds_ratio": -0.5462765693664551, "logits/chosen": 0.19532737135887146, "logits/rejected": -0.9047931432723999, "logps/chosen": -1.6047083139419556, "logps/rejected": -1.8802075386047363, "loss": 1.7327, "nll_loss": 1.6780540943145752, "rewards/accuracies": 0.875, "rewards/chosen": -0.1604708433151245, "rewards/margins": 0.02754993550479412, "rewards/rejected": -0.1880207657814026, "step": 272 }, { "epoch": 0.4302600472813239, "grad_norm": 0.2520315647125244, "learning_rate": 3.843257319511147e-06, "log_odds_chosen": 0.5703607201576233, "log_odds_ratio": -0.4557495713233948, "logits/chosen": 0.24878323078155518, "logits/rejected": -0.9858651161193848, "logps/chosen": -1.6043668985366821, "logps/rejected": -2.0794129371643066, "loss": 1.7324, "nll_loss": 1.6867859363555908, "rewards/accuracies": 1.0, "rewards/chosen": -0.1604366898536682, "rewards/margins": 0.04750461503863335, "rewards/rejected": -0.20794130861759186, "step": 273 }, { "epoch": 0.4318360914105595, "grad_norm": 0.24630145728588104, "learning_rate": 3.841117083924039e-06, "log_odds_chosen": 0.3485129773616791, "log_odds_ratio": -0.5395157337188721, "logits/chosen": 0.23884254693984985, "logits/rejected": -0.9991633296012878, "logps/chosen": -1.644177794456482, "logps/rejected": -1.936130166053772, "loss": 1.7767, "nll_loss": 1.7227089405059814, "rewards/accuracies": 1.0, "rewards/chosen": -0.1644177883863449, "rewards/margins": 0.029195237904787064, "rewards/rejected": -0.19361303746700287, "step": 274 }, { "epoch": 0.43341213553979513, "grad_norm": 0.24250437319278717, "learning_rate": 3.838962939576746e-06, "log_odds_chosen": 0.4249970316886902, "log_odds_ratio": -0.5112988352775574, "logits/chosen": 0.20201551914215088, "logits/rejected": -0.8135443925857544, "logps/chosen": -1.5921757221221924, "logps/rejected": -1.9463852643966675, "loss": 1.7457, "nll_loss": 1.6946040391921997, "rewards/accuracies": 1.0, "rewards/chosen": -0.15921758115291595, "rewards/margins": 0.035420943051576614, "rewards/rejected": -0.19463855028152466, "step": 275 }, { "epoch": 0.43498817966903075, "grad_norm": 0.24594025313854218, "learning_rate": 3.8367949027427985e-06, "log_odds_chosen": 0.3127194046974182, "log_odds_ratio": -0.5534557104110718, "logits/chosen": 0.27988913655281067, "logits/rejected": -0.676584005355835, "logps/chosen": -1.7097309827804565, "logps/rejected": -1.9752788543701172, "loss": 1.8678, "nll_loss": 1.812432050704956, "rewards/accuracies": 1.0, "rewards/chosen": -0.17097310721874237, "rewards/margins": 0.026554781943559647, "rewards/rejected": -0.19752787053585052, "step": 276 }, { "epoch": 0.43656422379826637, "grad_norm": 0.2602437138557434, "learning_rate": 3.834612989800681e-06, "log_odds_chosen": 0.5669816136360168, "log_odds_ratio": -0.45552071928977966, "logits/chosen": 0.2907956540584564, "logits/rejected": -1.2319718599319458, "logps/chosen": -1.6959583759307861, "logps/rejected": -2.179285764694214, "loss": 1.8138, "nll_loss": 1.7682132720947266, "rewards/accuracies": 1.0, "rewards/chosen": -0.1695958375930786, "rewards/margins": 0.04833271726965904, "rewards/rejected": -0.21792855858802795, "step": 277 }, { "epoch": 0.438140267927502, "grad_norm": 0.25470709800720215, "learning_rate": 3.832417217233703e-06, "log_odds_chosen": 0.3171738386154175, "log_odds_ratio": -0.5497609376907349, "logits/chosen": 0.24686628580093384, "logits/rejected": -0.7249422669410706, "logps/chosen": -1.6841633319854736, "logps/rejected": -1.9515080451965332, "loss": 1.8215, "nll_loss": 1.7665718793869019, "rewards/accuracies": 1.0, "rewards/chosen": -0.1684163361787796, "rewards/margins": 0.02673446759581566, "rewards/rejected": -0.19515080749988556, "step": 278 }, { "epoch": 0.4397163120567376, "grad_norm": 0.24597591161727905, "learning_rate": 3.8302076016298775e-06, "log_odds_chosen": 0.45453959703445435, "log_odds_ratio": -0.4950627088546753, "logits/chosen": 0.2077048420906067, "logits/rejected": -0.9508089423179626, "logps/chosen": -1.6135221719741821, "logps/rejected": -1.992347240447998, "loss": 1.7562, "nll_loss": 1.7066751718521118, "rewards/accuracies": 1.0, "rewards/chosen": -0.1613522171974182, "rewards/margins": 0.03788250684738159, "rewards/rejected": -0.1992347240447998, "step": 279 }, { "epoch": 0.44129235618597323, "grad_norm": 0.25559622049331665, "learning_rate": 3.827984159681796e-06, "log_odds_chosen": 0.3496161699295044, "log_odds_ratio": -0.5389132499694824, "logits/chosen": 0.19033017754554749, "logits/rejected": -0.7456286549568176, "logps/chosen": -1.668264389038086, "logps/rejected": -1.9605971574783325, "loss": 1.7978, "nll_loss": 1.743927240371704, "rewards/accuracies": 0.875, "rewards/chosen": -0.16682645678520203, "rewards/margins": 0.02923327311873436, "rewards/rejected": -0.1960597187280655, "step": 280 }, { "epoch": 0.4428684003152088, "grad_norm": 0.2457672506570816, "learning_rate": 3.825746908186498e-06, "log_odds_chosen": 0.40318137407302856, "log_odds_ratio": -0.5202803015708923, "logits/chosen": 0.32850149273872375, "logits/rejected": -1.0219409465789795, "logps/chosen": -1.7146260738372803, "logps/rejected": -2.056997299194336, "loss": 1.8254, "nll_loss": 1.7733912467956543, "rewards/accuracies": 1.0, "rewards/chosen": -0.17146262526512146, "rewards/margins": 0.03423710912466049, "rewards/rejected": -0.20569972693920135, "step": 281 }, { "epoch": 0.4444444444444444, "grad_norm": 0.26707932353019714, "learning_rate": 3.823495864045352e-06, "log_odds_chosen": 0.4439901113510132, "log_odds_ratio": -0.49818819761276245, "logits/chosen": 0.3058054447174072, "logits/rejected": -0.41550758481025696, "logps/chosen": -1.666752576828003, "logps/rejected": -2.037712812423706, "loss": 1.7985, "nll_loss": 1.748701810836792, "rewards/accuracies": 1.0, "rewards/chosen": -0.16667526960372925, "rewards/margins": 0.037096016108989716, "rewards/rejected": -0.20377127826213837, "step": 282 }, { "epoch": 0.44602048857368004, "grad_norm": 0.2250865399837494, "learning_rate": 3.8212310442639205e-06, "log_odds_chosen": 0.34101033210754395, "log_odds_ratio": -0.5409380793571472, "logits/chosen": 0.3130001425743103, "logits/rejected": -0.6116319894790649, "logps/chosen": -1.6088491678237915, "logps/rejected": -1.8918440341949463, "loss": 1.7535, "nll_loss": 1.6994398832321167, "rewards/accuracies": 1.0, "rewards/chosen": -0.16088491678237915, "rewards/margins": 0.028299514204263687, "rewards/rejected": -0.18918441236019135, "step": 283 }, { "epoch": 0.44759653270291566, "grad_norm": 0.23142075538635254, "learning_rate": 3.8189524659518355e-06, "log_odds_chosen": 0.40798282623291016, "log_odds_ratio": -0.5132877826690674, "logits/chosen": 0.3497146964073181, "logits/rejected": -0.7581396102905273, "logps/chosen": -1.6762844324111938, "logps/rejected": -2.017564296722412, "loss": 1.8035, "nll_loss": 1.752134084701538, "rewards/accuracies": 1.0, "rewards/chosen": -0.1676284223794937, "rewards/margins": 0.034127987921237946, "rewards/rejected": -0.20175644755363464, "step": 284 }, { "epoch": 0.4491725768321513, "grad_norm": 0.2335939258337021, "learning_rate": 3.816660146322667e-06, "log_odds_chosen": 0.38592225313186646, "log_odds_ratio": -0.522415816783905, "logits/chosen": 0.28406161069869995, "logits/rejected": -0.5710610151290894, "logps/chosen": -1.569653868675232, "logps/rejected": -1.8868498802185059, "loss": 1.7028, "nll_loss": 1.6505882740020752, "rewards/accuracies": 1.0, "rewards/chosen": -0.15696537494659424, "rewards/margins": 0.03171960636973381, "rewards/rejected": -0.18868498504161835, "step": 285 }, { "epoch": 0.4507486209613869, "grad_norm": 0.23405610024929047, "learning_rate": 3.814354102693797e-06, "log_odds_chosen": 0.40164172649383545, "log_odds_ratio": -0.514717698097229, "logits/chosen": 0.3134067952632904, "logits/rejected": -0.824557900428772, "logps/chosen": -1.643754005432129, "logps/rejected": -1.9766446352005005, "loss": 1.7743, "nll_loss": 1.7228494882583618, "rewards/accuracies": 1.0, "rewards/chosen": -0.1643753945827484, "rewards/margins": 0.033289071172475815, "rewards/rejected": -0.19766446948051453, "step": 286 }, { "epoch": 0.4523246650906225, "grad_norm": 0.237734854221344, "learning_rate": 3.8120343524862814e-06, "log_odds_chosen": 0.34567391872406006, "log_odds_ratio": -0.5369825959205627, "logits/chosen": 0.28361567854881287, "logits/rejected": -0.7294724583625793, "logps/chosen": -1.6829057931900024, "logps/rejected": -1.970367193222046, "loss": 1.796, "nll_loss": 1.742274522781372, "rewards/accuracies": 1.0, "rewards/chosen": -0.16829057037830353, "rewards/margins": 0.02874615229666233, "rewards/rejected": -0.1970367133617401, "step": 287 }, { "epoch": 0.45390070921985815, "grad_norm": 0.22408141195774078, "learning_rate": 3.809700913224726e-06, "log_odds_chosen": 0.3844645619392395, "log_odds_ratio": -0.5258805751800537, "logits/chosen": 0.24493148922920227, "logits/rejected": -1.1642123460769653, "logps/chosen": -1.6014727354049683, "logps/rejected": -1.9189801216125488, "loss": 1.7282, "nll_loss": 1.6755775213241577, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601472645998001, "rewards/margins": 0.03175073489546776, "rewards/rejected": -0.19189801812171936, "step": 288 }, { "epoch": 0.45547675334909377, "grad_norm": 0.22789910435676575, "learning_rate": 3.8073538025371494e-06, "log_odds_chosen": 0.48448646068573, "log_odds_ratio": -0.48281943798065186, "logits/chosen": 0.36718764901161194, "logits/rejected": -0.9315266609191895, "logps/chosen": -1.579671025276184, "logps/rejected": -1.9810744524002075, "loss": 1.7167, "nll_loss": 1.6684261560440063, "rewards/accuracies": 1.0, "rewards/chosen": -0.15796710550785065, "rewards/margins": 0.04014034569263458, "rewards/rejected": -0.19810745120048523, "step": 289 }, { "epoch": 0.4570527974783294, "grad_norm": 0.24670805037021637, "learning_rate": 3.804993038154852e-06, "log_odds_chosen": 0.36597153544425964, "log_odds_ratio": -0.5341177582740784, "logits/chosen": 0.3026730716228485, "logits/rejected": -0.8013948202133179, "logps/chosen": -1.6119648218154907, "logps/rejected": -1.9177809953689575, "loss": 1.7591, "nll_loss": 1.7057002782821655, "rewards/accuracies": 0.875, "rewards/chosen": -0.1611964851617813, "rewards/margins": 0.03058161959052086, "rewards/rejected": -0.19177812337875366, "step": 290 }, { "epoch": 0.458628841607565, "grad_norm": 0.2390255182981491, "learning_rate": 3.8026186379122816e-06, "log_odds_chosen": 0.2855335474014282, "log_odds_ratio": -0.5630379915237427, "logits/chosen": 0.25951528549194336, "logits/rejected": -0.8107087016105652, "logps/chosen": -1.688565731048584, "logps/rejected": -1.9271503686904907, "loss": 1.8113, "nll_loss": 1.754982590675354, "rewards/accuracies": 1.0, "rewards/chosen": -0.16885659098625183, "rewards/margins": 0.023858455941081047, "rewards/rejected": -0.19271504878997803, "step": 291 }, { "epoch": 0.46020488573680063, "grad_norm": 0.23229312896728516, "learning_rate": 3.8002306197468983e-06, "log_odds_chosen": 0.17798058688640594, "log_odds_ratio": -0.6102758049964905, "logits/chosen": 0.275474488735199, "logits/rejected": -0.8618179559707642, "logps/chosen": -1.6651320457458496, "logps/rejected": -1.8112871646881104, "loss": 1.806, "nll_loss": 1.7449393272399902, "rewards/accuracies": 0.875, "rewards/chosen": -0.16651323437690735, "rewards/margins": 0.014615494757890701, "rewards/rejected": -0.18112872540950775, "step": 292 }, { "epoch": 0.46178092986603625, "grad_norm": 0.2197176069021225, "learning_rate": 3.7978290016990367e-06, "log_odds_chosen": 0.49173322319984436, "log_odds_ratio": -0.4813576638698578, "logits/chosen": 0.24893192946910858, "logits/rejected": -0.7819869518280029, "logps/chosen": -1.6695976257324219, "logps/rejected": -2.0870187282562256, "loss": 1.7923, "nll_loss": 1.744187831878662, "rewards/accuracies": 1.0, "rewards/chosen": -0.1669597625732422, "rewards/margins": 0.04174211993813515, "rewards/rejected": -0.20870187878608704, "step": 293 }, { "epoch": 0.46335697399527187, "grad_norm": 0.24719281494617462, "learning_rate": 3.795413801911776e-06, "log_odds_chosen": 0.5599408745765686, "log_odds_ratio": -0.4586997926235199, "logits/chosen": 0.24123334884643555, "logits/rejected": -0.8821290135383606, "logps/chosen": -1.597358226776123, "logps/rejected": -2.068033456802368, "loss": 1.741, "nll_loss": 1.69508957862854, "rewards/accuracies": 1.0, "rewards/chosen": -0.1597358137369156, "rewards/margins": 0.0470675528049469, "rewards/rejected": -0.2068033665418625, "step": 294 }, { "epoch": 0.4649330181245075, "grad_norm": 0.25565704703330994, "learning_rate": 3.7929850386307965e-06, "log_odds_chosen": 0.327176958322525, "log_odds_ratio": -0.548469066619873, "logits/chosen": 0.34679511189460754, "logits/rejected": -0.684309720993042, "logps/chosen": -1.806984782218933, "logps/rejected": -2.0878827571868896, "loss": 1.9209, "nll_loss": 1.8660597801208496, "rewards/accuracies": 0.875, "rewards/chosen": -0.1806984692811966, "rewards/margins": 0.028089802712202072, "rewards/rejected": -0.20878829061985016, "step": 295 }, { "epoch": 0.4665090622537431, "grad_norm": 0.24570108950138092, "learning_rate": 3.790542730204245e-06, "log_odds_chosen": 0.3950555920600891, "log_odds_ratio": -0.5176329612731934, "logits/chosen": 0.29106709361076355, "logits/rejected": -0.8974196910858154, "logps/chosen": -1.7347073554992676, "logps/rejected": -2.0715324878692627, "loss": 1.843, "nll_loss": 1.7912408113479614, "rewards/accuracies": 1.0, "rewards/chosen": -0.17347073554992676, "rewards/margins": 0.033682532608509064, "rewards/rejected": -0.20715326070785522, "step": 296 }, { "epoch": 0.46808510638297873, "grad_norm": 0.23746472597122192, "learning_rate": 3.7880868950825935e-06, "log_odds_chosen": 0.4195549488067627, "log_odds_ratio": -0.5058358907699585, "logits/chosen": 0.26286160945892334, "logits/rejected": -0.9936229586601257, "logps/chosen": -1.690929889678955, "logps/rejected": -2.044651508331299, "loss": 1.7908, "nll_loss": 1.740168571472168, "rewards/accuracies": 1.0, "rewards/chosen": -0.16909299790859222, "rewards/margins": 0.03537215292453766, "rewards/rejected": -0.20446515083312988, "step": 297 }, { "epoch": 0.46966115051221435, "grad_norm": 0.25234460830688477, "learning_rate": 3.7856175518185058e-06, "log_odds_chosen": 0.420246422290802, "log_odds_ratio": -0.510624349117279, "logits/chosen": 0.1483045071363449, "logits/rejected": -1.0136771202087402, "logps/chosen": -1.6671572923660278, "logps/rejected": -2.019413709640503, "loss": 1.795, "nll_loss": 1.7439560890197754, "rewards/accuracies": 1.0, "rewards/chosen": -0.16671572625637054, "rewards/margins": 0.03522564470767975, "rewards/rejected": -0.2019413709640503, "step": 298 }, { "epoch": 0.47123719464145, "grad_norm": 0.24835272133350372, "learning_rate": 3.7831347190666883e-06, "log_odds_chosen": 0.4697123169898987, "log_odds_ratio": -0.49107876420021057, "logits/chosen": 0.20852668583393097, "logits/rejected": -0.9135799407958984, "logps/chosen": -1.662784457206726, "logps/rejected": -2.059032917022705, "loss": 1.7932, "nll_loss": 1.7441176176071167, "rewards/accuracies": 0.875, "rewards/chosen": -0.16627845168113708, "rewards/margins": 0.039624836295843124, "rewards/rejected": -0.20590327680110931, "step": 299 }, { "epoch": 0.4728132387706856, "grad_norm": 0.23449158668518066, "learning_rate": 3.780638415583759e-06, "log_odds_chosen": 0.2897394299507141, "log_odds_ratio": -0.568331241607666, "logits/chosen": 0.32292455434799194, "logits/rejected": -0.7160006761550903, "logps/chosen": -1.6466126441955566, "logps/rejected": -1.8909659385681152, "loss": 1.7859, "nll_loss": 1.7290998697280884, "rewards/accuracies": 0.75, "rewards/chosen": -0.16466127336025238, "rewards/margins": 0.024435309693217278, "rewards/rejected": -0.1890965849161148, "step": 300 }, { "epoch": 0.4743892828999212, "grad_norm": 0.23816445469856262, "learning_rate": 3.7781286602280967e-06, "log_odds_chosen": 0.168609157204628, "log_odds_ratio": -0.6210045218467712, "logits/chosen": 0.28317171335220337, "logits/rejected": -0.8950541019439697, "logps/chosen": -1.6320608854293823, "logps/rejected": -1.7655320167541504, "loss": 1.7803, "nll_loss": 1.7181925773620605, "rewards/accuracies": 0.75, "rewards/chosen": -0.163206085562706, "rewards/margins": 0.013347111642360687, "rewards/rejected": -0.17655321955680847, "step": 301 }, { "epoch": 0.47596532702915684, "grad_norm": 0.2272704839706421, "learning_rate": 3.7756054719597044e-06, "log_odds_chosen": 0.28053295612335205, "log_odds_ratio": -0.5875728726387024, "logits/chosen": 0.16442207992076874, "logits/rejected": -0.9546319246292114, "logps/chosen": -1.6610065698623657, "logps/rejected": -1.903661847114563, "loss": 1.7791, "nll_loss": 1.7202967405319214, "rewards/accuracies": 0.75, "rewards/chosen": -0.16610068082809448, "rewards/margins": 0.024265505373477936, "rewards/rejected": -0.19036617875099182, "step": 302 }, { "epoch": 0.47754137115839246, "grad_norm": 0.21994216740131378, "learning_rate": 3.773068869840066e-06, "log_odds_chosen": 0.21547146141529083, "log_odds_ratio": -0.6008081436157227, "logits/chosen": 0.37992700934410095, "logits/rejected": -0.9604979157447815, "logps/chosen": -1.6873159408569336, "logps/rejected": -1.8643107414245605, "loss": 1.8079, "nll_loss": 1.7478294372558594, "rewards/accuracies": 0.75, "rewards/chosen": -0.16873158514499664, "rewards/margins": 0.017699476331472397, "rewards/rejected": -0.18643106520175934, "step": 303 }, { "epoch": 0.4791174152876281, "grad_norm": 0.22366014122962952, "learning_rate": 3.770518873031997e-06, "log_odds_chosen": 0.4537414312362671, "log_odds_ratio": -0.4942602813243866, "logits/chosen": 0.15717966854572296, "logits/rejected": -0.6233416199684143, "logps/chosen": -1.5575112104415894, "logps/rejected": -1.9306727647781372, "loss": 1.7018, "nll_loss": 1.6523317098617554, "rewards/accuracies": 1.0, "rewards/chosen": -0.15575110912322998, "rewards/margins": 0.037316154688596725, "rewards/rejected": -0.193067267537117, "step": 304 }, { "epoch": 0.4806934594168637, "grad_norm": 0.24693521857261658, "learning_rate": 3.7679555007995065e-06, "log_odds_chosen": 0.4173615872859955, "log_odds_ratio": -0.5234081745147705, "logits/chosen": 0.291871577501297, "logits/rejected": -0.8078293204307556, "logps/chosen": -1.6800901889801025, "logps/rejected": -2.0314533710479736, "loss": 1.8116, "nll_loss": 1.7592403888702393, "rewards/accuracies": 0.875, "rewards/chosen": -0.16800902783870697, "rewards/margins": 0.03513630852103233, "rewards/rejected": -0.2031453251838684, "step": 305 }, { "epoch": 0.48226950354609927, "grad_norm": 0.2487160563468933, "learning_rate": 3.7653787725076464e-06, "log_odds_chosen": 0.26628607511520386, "log_odds_ratio": -0.5737897157669067, "logits/chosen": 0.2887229025363922, "logits/rejected": -0.7761309146881104, "logps/chosen": -1.644072413444519, "logps/rejected": -1.8666337728500366, "loss": 1.7694, "nll_loss": 1.7120240926742554, "rewards/accuracies": 0.875, "rewards/chosen": -0.16440723836421967, "rewards/margins": 0.02225613035261631, "rewards/rejected": -0.18666337430477142, "step": 306 }, { "epoch": 0.4838455476753349, "grad_norm": 0.2717806398868561, "learning_rate": 3.7627887076223685e-06, "log_odds_chosen": 0.3717851936817169, "log_odds_ratio": -0.5291674733161926, "logits/chosen": 0.3130492866039276, "logits/rejected": -0.5744433999061584, "logps/chosen": -1.7094042301177979, "logps/rejected": -2.023301124572754, "loss": 1.8175, "nll_loss": 1.7645916938781738, "rewards/accuracies": 1.0, "rewards/chosen": -0.17094042897224426, "rewards/margins": 0.03138966113328934, "rewards/rejected": -0.2023300975561142, "step": 307 }, { "epoch": 0.4854215918045705, "grad_norm": 0.3177509605884552, "learning_rate": 3.7601853257103765e-06, "log_odds_chosen": 0.23472489416599274, "log_odds_ratio": -0.5888123512268066, "logits/chosen": 0.21074482798576355, "logits/rejected": -0.8820545077323914, "logps/chosen": -1.6076140403747559, "logps/rejected": -1.8040858507156372, "loss": 1.7182, "nll_loss": 1.6593618392944336, "rewards/accuracies": 0.875, "rewards/chosen": -0.16076140105724335, "rewards/margins": 0.019647175446152687, "rewards/rejected": -0.18040858209133148, "step": 308 }, { "epoch": 0.48699763593380613, "grad_norm": 0.22378169000148773, "learning_rate": 3.7575686464389767e-06, "log_odds_chosen": 0.3512086570262909, "log_odds_ratio": -0.5429125428199768, "logits/chosen": 0.23526576161384583, "logits/rejected": -0.8768814206123352, "logps/chosen": -1.589032769203186, "logps/rejected": -1.880218267440796, "loss": 1.7234, "nll_loss": 1.6690884828567505, "rewards/accuracies": 0.875, "rewards/chosen": -0.1589033007621765, "rewards/margins": 0.029118528589606285, "rewards/rejected": -0.18802182376384735, "step": 309 }, { "epoch": 0.48857368006304175, "grad_norm": 0.21849550306797028, "learning_rate": 3.7549386895759315e-06, "log_odds_chosen": 0.3859555721282959, "log_odds_ratio": -0.5214402079582214, "logits/chosen": 0.2112809419631958, "logits/rejected": -0.8602145910263062, "logps/chosen": -1.5440895557403564, "logps/rejected": -1.8589766025543213, "loss": 1.6701, "nll_loss": 1.61797034740448, "rewards/accuracies": 1.0, "rewards/chosen": -0.15440896153450012, "rewards/margins": 0.03148870915174484, "rewards/rejected": -0.18589766323566437, "step": 310 }, { "epoch": 0.49014972419227737, "grad_norm": 0.21237994730472565, "learning_rate": 3.7522954749893086e-06, "log_odds_chosen": 0.5937802195549011, "log_odds_ratio": -0.4507063925266266, "logits/chosen": 0.14280448853969574, "logits/rejected": -0.8862166404724121, "logps/chosen": -1.571141242980957, "logps/rejected": -2.0660626888275146, "loss": 1.7067, "nll_loss": 1.6616342067718506, "rewards/accuracies": 1.0, "rewards/chosen": -0.15711411833763123, "rewards/margins": 0.04949216917157173, "rewards/rejected": -0.20660629868507385, "step": 311 }, { "epoch": 0.491725768321513, "grad_norm": 0.23776353895664215, "learning_rate": 3.749639022647332e-06, "log_odds_chosen": 0.3704267144203186, "log_odds_ratio": -0.5295000076293945, "logits/chosen": 0.21290373802185059, "logits/rejected": -0.8199090957641602, "logps/chosen": -1.6614030599594116, "logps/rejected": -1.9728885889053345, "loss": 1.7854, "nll_loss": 1.7324740886688232, "rewards/accuracies": 1.0, "rewards/chosen": -0.1661403328180313, "rewards/margins": 0.031148536130785942, "rewards/rejected": -0.1972888708114624, "step": 312 }, { "epoch": 0.4933018124507486, "grad_norm": 0.21524956822395325, "learning_rate": 3.7469693526182304e-06, "log_odds_chosen": 0.5271570682525635, "log_odds_ratio": -0.4816766381263733, "logits/chosen": 0.17095641791820526, "logits/rejected": -1.1865314245224, "logps/chosen": -1.605276346206665, "logps/rejected": -2.045314073562622, "loss": 1.7166, "nll_loss": 1.668409824371338, "rewards/accuracies": 0.875, "rewards/chosen": -0.16052761673927307, "rewards/margins": 0.04400378465652466, "rewards/rejected": -0.20453140139579773, "step": 313 }, { "epoch": 0.49487785657998423, "grad_norm": 0.22467942535877228, "learning_rate": 3.744286485070085e-06, "log_odds_chosen": 0.5159242749214172, "log_odds_ratio": -0.4703598618507385, "logits/chosen": 0.19940146803855896, "logits/rejected": -0.9005659818649292, "logps/chosen": -1.5551189184188843, "logps/rejected": -1.9823968410491943, "loss": 1.6713, "nll_loss": 1.6242244243621826, "rewards/accuracies": 1.0, "rewards/chosen": -0.15551190078258514, "rewards/margins": 0.04272779822349548, "rewards/rejected": -0.19823968410491943, "step": 314 }, { "epoch": 0.49645390070921985, "grad_norm": 0.24698135256767273, "learning_rate": 3.7415904402706795e-06, "log_odds_chosen": 0.43156135082244873, "log_odds_ratio": -0.508359432220459, "logits/chosen": 0.22156882286071777, "logits/rejected": -0.8898169994354248, "logps/chosen": -1.7517122030258179, "logps/rejected": -2.120948076248169, "loss": 1.8642, "nll_loss": 1.813354253768921, "rewards/accuracies": 1.0, "rewards/chosen": -0.17517122626304626, "rewards/margins": 0.036923572421073914, "rewards/rejected": -0.21209479868412018, "step": 315 }, { "epoch": 0.4980299448384555, "grad_norm": 0.21681058406829834, "learning_rate": 3.7388812385873435e-06, "log_odds_chosen": 0.36917757987976074, "log_odds_ratio": -0.527721643447876, "logits/chosen": 0.1808536797761917, "logits/rejected": -0.9747561812400818, "logps/chosen": -1.5873167514801025, "logps/rejected": -1.8916015625, "loss": 1.7039, "nll_loss": 1.6511293649673462, "rewards/accuracies": 1.0, "rewards/chosen": -0.15873166918754578, "rewards/margins": 0.030428504571318626, "rewards/rejected": -0.18916018307209015, "step": 316 }, { "epoch": 0.4996059889676911, "grad_norm": 0.2535804212093353, "learning_rate": 3.7361589004868033e-06, "log_odds_chosen": 0.5016902685165405, "log_odds_ratio": -0.4793998599052429, "logits/chosen": 0.3036730885505676, "logits/rejected": -0.9837846159934998, "logps/chosen": -1.6742297410964966, "logps/rejected": -2.1013123989105225, "loss": 1.7751, "nll_loss": 1.727131724357605, "rewards/accuracies": 1.0, "rewards/chosen": -0.16742298007011414, "rewards/margins": 0.04270825535058975, "rewards/rejected": -0.2101312130689621, "step": 317 }, { "epoch": 0.5011820330969267, "grad_norm": 0.22353678941726685, "learning_rate": 3.733423446535022e-06, "log_odds_chosen": 0.5217798352241516, "log_odds_ratio": -0.47520411014556885, "logits/chosen": 0.2759425938129425, "logits/rejected": -1.0702801942825317, "logps/chosen": -1.6638113260269165, "logps/rejected": -2.1103484630584717, "loss": 1.7661, "nll_loss": 1.7186000347137451, "rewards/accuracies": 1.0, "rewards/chosen": -0.1663811355829239, "rewards/margins": 0.04465372860431671, "rewards/rejected": -0.2110348641872406, "step": 318 }, { "epoch": 0.5027580772261623, "grad_norm": 0.2162550985813141, "learning_rate": 3.7306748973970476e-06, "log_odds_chosen": 0.39420896768569946, "log_odds_ratio": -0.5247294902801514, "logits/chosen": 0.19344046711921692, "logits/rejected": -1.2027227878570557, "logps/chosen": -1.531542420387268, "logps/rejected": -1.8519684076309204, "loss": 1.6788, "nll_loss": 1.626314640045166, "rewards/accuracies": 0.875, "rewards/chosen": -0.15315423905849457, "rewards/margins": 0.03204261139035225, "rewards/rejected": -0.18519684672355652, "step": 319 }, { "epoch": 0.5043341213553979, "grad_norm": 0.2854284942150116, "learning_rate": 3.7279132738368564e-06, "log_odds_chosen": 0.19533756375312805, "log_odds_ratio": -0.6046186089515686, "logits/chosen": 0.25027167797088623, "logits/rejected": -0.9639004468917847, "logps/chosen": -1.727016568183899, "logps/rejected": -1.8893370628356934, "loss": 1.8393, "nll_loss": 1.778852939605713, "rewards/accuracies": 0.75, "rewards/chosen": -0.1727016568183899, "rewards/margins": 0.016232045367360115, "rewards/rejected": -0.18893368542194366, "step": 320 }, { "epoch": 0.5059101654846335, "grad_norm": 0.22148051857948303, "learning_rate": 3.725138596717195e-06, "log_odds_chosen": 0.4484974443912506, "log_odds_ratio": -0.4975241720676422, "logits/chosen": 0.17572706937789917, "logits/rejected": -0.9954120516777039, "logps/chosen": -1.577749252319336, "logps/rejected": -1.949328899383545, "loss": 1.6904, "nll_loss": 1.640618085861206, "rewards/accuracies": 1.0, "rewards/chosen": -0.1577749252319336, "rewards/margins": 0.03715795278549194, "rewards/rejected": -0.19493287801742554, "step": 321 }, { "epoch": 0.5074862096138691, "grad_norm": 0.3961973786354065, "learning_rate": 3.7223508869994244e-06, "log_odds_chosen": 0.4676430821418762, "log_odds_ratio": -0.4938008785247803, "logits/chosen": 0.21728691458702087, "logits/rejected": -1.0851279497146606, "logps/chosen": -1.6289960145950317, "logps/rejected": -2.0200228691101074, "loss": 1.7432, "nll_loss": 1.6938555240631104, "rewards/accuracies": 1.0, "rewards/chosen": -0.16289958357810974, "rewards/margins": 0.03910268098115921, "rewards/rejected": -0.20200228691101074, "step": 322 }, { "epoch": 0.5090622537431048, "grad_norm": 0.2561986744403839, "learning_rate": 3.7195501657433594e-06, "log_odds_chosen": 0.3967494070529938, "log_odds_ratio": -0.518577516078949, "logits/chosen": 0.16213250160217285, "logits/rejected": -1.014851450920105, "logps/chosen": -1.6295907497406006, "logps/rejected": -1.9586544036865234, "loss": 1.7525, "nll_loss": 1.700607180595398, "rewards/accuracies": 1.0, "rewards/chosen": -0.16295908391475677, "rewards/margins": 0.03290637210011482, "rewards/rejected": -0.1958654522895813, "step": 323 }, { "epoch": 0.5106382978723404, "grad_norm": 0.2168796956539154, "learning_rate": 3.716736454107111e-06, "log_odds_chosen": 0.5052562355995178, "log_odds_ratio": -0.47744137048721313, "logits/chosen": 0.16427958011627197, "logits/rejected": -0.8152539134025574, "logps/chosen": -1.5257859230041504, "logps/rejected": -1.938908576965332, "loss": 1.6411, "nll_loss": 1.5933436155319214, "rewards/accuracies": 1.0, "rewards/chosen": -0.15257860720157623, "rewards/margins": 0.041312284767627716, "rewards/rejected": -0.19389088451862335, "step": 324 }, { "epoch": 0.512214342001576, "grad_norm": 0.2312452644109726, "learning_rate": 3.7139097733469277e-06, "log_odds_chosen": 0.5160778760910034, "log_odds_ratio": -0.47250521183013916, "logits/chosen": 0.18574649095535278, "logits/rejected": -0.9940904974937439, "logps/chosen": -1.5720903873443604, "logps/rejected": -2.0003862380981445, "loss": 1.6905, "nll_loss": 1.6432417631149292, "rewards/accuracies": 1.0, "rewards/chosen": -0.15720903873443604, "rewards/margins": 0.04282958805561066, "rewards/rejected": -0.20003864169120789, "step": 325 }, { "epoch": 0.5137903861308116, "grad_norm": 0.2351461946964264, "learning_rate": 3.711070144817032e-06, "log_odds_chosen": 0.4174703359603882, "log_odds_ratio": -0.5126141905784607, "logits/chosen": 0.16775144636631012, "logits/rejected": -0.7969292998313904, "logps/chosen": -1.6593265533447266, "logps/rejected": -2.0132079124450684, "loss": 1.7821, "nll_loss": 1.730817437171936, "rewards/accuracies": 1.0, "rewards/chosen": -0.16593264043331146, "rewards/margins": 0.03538812696933746, "rewards/rejected": -0.20132076740264893, "step": 326 }, { "epoch": 0.5153664302600472, "grad_norm": 0.2421269714832306, "learning_rate": 3.708217589969461e-06, "log_odds_chosen": 0.5103427171707153, "log_odds_ratio": -0.4785231649875641, "logits/chosen": 0.1360926777124405, "logits/rejected": -1.0719431638717651, "logps/chosen": -1.636336088180542, "logps/rejected": -2.0653886795043945, "loss": 1.7628, "nll_loss": 1.7149783372879028, "rewards/accuracies": 1.0, "rewards/chosen": -0.16363361477851868, "rewards/margins": 0.04290526732802391, "rewards/rejected": -0.20653888583183289, "step": 327 }, { "epoch": 0.5169424743892829, "grad_norm": 0.2216341197490692, "learning_rate": 3.705352130353904e-06, "log_odds_chosen": 0.4580025374889374, "log_odds_ratio": -0.5018777251243591, "logits/chosen": 0.1973736435174942, "logits/rejected": -1.0065253973007202, "logps/chosen": -1.5510623455047607, "logps/rejected": -1.9285662174224854, "loss": 1.6825, "nll_loss": 1.6323111057281494, "rewards/accuracies": 1.0, "rewards/chosen": -0.15510624647140503, "rewards/margins": 0.03775038942694664, "rewards/rejected": -0.19285663962364197, "step": 328 }, { "epoch": 0.5185185185185185, "grad_norm": 0.23714834451675415, "learning_rate": 3.7024737876175404e-06, "log_odds_chosen": 0.41599202156066895, "log_odds_ratio": -0.5087955594062805, "logits/chosen": 0.15757359564304352, "logits/rejected": -0.8292059302330017, "logps/chosen": -1.6072828769683838, "logps/rejected": -1.949842929840088, "loss": 1.7153, "nll_loss": 1.664394736289978, "rewards/accuracies": 1.0, "rewards/chosen": -0.16072829067707062, "rewards/margins": 0.03425602242350578, "rewards/rejected": -0.1949843019247055, "step": 329 }, { "epoch": 0.5200945626477541, "grad_norm": 0.23693764209747314, "learning_rate": 3.699582583504874e-06, "log_odds_chosen": 0.2892705500125885, "log_odds_ratio": -0.5659085512161255, "logits/chosen": 0.2140631228685379, "logits/rejected": -1.0034198760986328, "logps/chosen": -1.7431427240371704, "logps/rejected": -1.9875526428222656, "loss": 1.8448, "nll_loss": 1.788230061531067, "rewards/accuracies": 0.875, "rewards/chosen": -0.17431427538394928, "rewards/margins": 0.024440988898277283, "rewards/rejected": -0.19875526428222656, "step": 330 }, { "epoch": 0.5216706067769897, "grad_norm": 0.2290606051683426, "learning_rate": 3.696678539857571e-06, "log_odds_chosen": 0.39967021346092224, "log_odds_ratio": -0.5233641862869263, "logits/chosen": 0.1767245978116989, "logits/rejected": -0.9841853380203247, "logps/chosen": -1.723608374595642, "logps/rejected": -2.065523862838745, "loss": 1.8305, "nll_loss": 1.7781703472137451, "rewards/accuracies": 0.875, "rewards/chosen": -0.1723608523607254, "rewards/margins": 0.0341915525496006, "rewards/rejected": -0.2065524011850357, "step": 331 }, { "epoch": 0.5232466509062254, "grad_norm": 0.22988593578338623, "learning_rate": 3.6937616786142956e-06, "log_odds_chosen": 0.526243269443512, "log_odds_ratio": -0.4684247672557831, "logits/chosen": 0.18895483016967773, "logits/rejected": -0.7783670425415039, "logps/chosen": -1.5386149883270264, "logps/rejected": -1.9738643169403076, "loss": 1.6716, "nll_loss": 1.6247152090072632, "rewards/accuracies": 1.0, "rewards/chosen": -0.15386147797107697, "rewards/margins": 0.043524954468011856, "rewards/rejected": -0.19738642871379852, "step": 332 }, { "epoch": 0.524822695035461, "grad_norm": 0.2145272046327591, "learning_rate": 3.6908320218105393e-06, "log_odds_chosen": 0.3370119333267212, "log_odds_ratio": -0.5480267405509949, "logits/chosen": 0.16891886293888092, "logits/rejected": -0.9655537009239197, "logps/chosen": -1.5358006954193115, "logps/rejected": -1.8141486644744873, "loss": 1.6492, "nll_loss": 1.5944433212280273, "rewards/accuracies": 0.875, "rewards/chosen": -0.15358006954193115, "rewards/margins": 0.0278348159044981, "rewards/rejected": -0.1814148873090744, "step": 333 }, { "epoch": 0.5263987391646966, "grad_norm": 0.20287442207336426, "learning_rate": 3.6878895915784607e-06, "log_odds_chosen": 0.48463141918182373, "log_odds_ratio": -0.4846850335597992, "logits/chosen": 0.2023809552192688, "logits/rejected": -0.9322280287742615, "logps/chosen": -1.497266411781311, "logps/rejected": -1.8917622566223145, "loss": 1.6087, "nll_loss": 1.560238242149353, "rewards/accuracies": 1.0, "rewards/chosen": -0.14972662925720215, "rewards/margins": 0.039449580013751984, "rewards/rejected": -0.18917621672153473, "step": 334 }, { "epoch": 0.5279747832939322, "grad_norm": 0.21218512952327728, "learning_rate": 3.6849344101467147e-06, "log_odds_chosen": 0.3986428380012512, "log_odds_ratio": -0.520935595035553, "logits/chosen": 0.19497668743133545, "logits/rejected": -1.0536717176437378, "logps/chosen": -1.5831056833267212, "logps/rejected": -1.9150642156600952, "loss": 1.7035, "nll_loss": 1.65139639377594, "rewards/accuracies": 0.875, "rewards/chosen": -0.15831057727336884, "rewards/margins": 0.033195845782756805, "rewards/rejected": -0.19150641560554504, "step": 335 }, { "epoch": 0.5295508274231678, "grad_norm": 0.2202080488204956, "learning_rate": 3.6819664998402857e-06, "log_odds_chosen": 0.3745194673538208, "log_odds_ratio": -0.5271171927452087, "logits/chosen": 0.1852867603302002, "logits/rejected": -0.9111910462379456, "logps/chosen": -1.6216871738433838, "logps/rejected": -1.9346219301223755, "loss": 1.7383, "nll_loss": 1.6856300830841064, "rewards/accuracies": 1.0, "rewards/chosen": -0.1621687263250351, "rewards/margins": 0.03129347786307335, "rewards/rejected": -0.19346219301223755, "step": 336 }, { "epoch": 0.5311268715524035, "grad_norm": 0.2444775551557541, "learning_rate": 3.6789858830803186e-06, "log_odds_chosen": 0.40843021869659424, "log_odds_ratio": -0.5149621963500977, "logits/chosen": 0.1969577968120575, "logits/rejected": -0.9371582269668579, "logps/chosen": -1.6738759279251099, "logps/rejected": -2.0160107612609863, "loss": 1.7778, "nll_loss": 1.7263498306274414, "rewards/accuracies": 1.0, "rewards/chosen": -0.16738761961460114, "rewards/margins": 0.03421346843242645, "rewards/rejected": -0.2016010731458664, "step": 337 }, { "epoch": 0.5327029156816391, "grad_norm": 0.23307378590106964, "learning_rate": 3.6759925823839486e-06, "log_odds_chosen": 0.31894347071647644, "log_odds_ratio": -0.5511658787727356, "logits/chosen": 0.16060388088226318, "logits/rejected": -0.8142496943473816, "logps/chosen": -1.5587881803512573, "logps/rejected": -1.8193809986114502, "loss": 1.6951, "nll_loss": 1.6400277614593506, "rewards/accuracies": 1.0, "rewards/chosen": -0.15587882697582245, "rewards/margins": 0.026059266179800034, "rewards/rejected": -0.1819380819797516, "step": 338 }, { "epoch": 0.5342789598108747, "grad_norm": 0.21192984282970428, "learning_rate": 3.672986620364134e-06, "log_odds_chosen": 0.4383492171764374, "log_odds_ratio": -0.5006621479988098, "logits/chosen": 0.19966988265514374, "logits/rejected": -0.8241013884544373, "logps/chosen": -1.6338273286819458, "logps/rejected": -1.9998819828033447, "loss": 1.7404, "nll_loss": 1.6902958154678345, "rewards/accuracies": 1.0, "rewards/chosen": -0.16338275372982025, "rewards/margins": 0.03660544753074646, "rewards/rejected": -0.1999882012605667, "step": 339 }, { "epoch": 0.5358550039401103, "grad_norm": 0.20865905284881592, "learning_rate": 3.669968019729481e-06, "log_odds_chosen": 0.5389569997787476, "log_odds_ratio": -0.4687870442867279, "logits/chosen": 0.14506393671035767, "logits/rejected": -1.0461050271987915, "logps/chosen": -1.507906198501587, "logps/rejected": -1.9527502059936523, "loss": 1.6252, "nll_loss": 1.5782867670059204, "rewards/accuracies": 1.0, "rewards/chosen": -0.15079061686992645, "rewards/margins": 0.044484399259090424, "rewards/rejected": -0.19527502357959747, "step": 340 }, { "epoch": 0.5374310480693459, "grad_norm": 0.22792787849903107, "learning_rate": 3.666936803284076e-06, "log_odds_chosen": 0.4631209373474121, "log_odds_ratio": -0.4909701347351074, "logits/chosen": 0.18787729740142822, "logits/rejected": -0.8647685647010803, "logps/chosen": -1.6495139598846436, "logps/rejected": -2.0405564308166504, "loss": 1.7452, "nll_loss": 1.6961357593536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.1649513989686966, "rewards/margins": 0.039104264229536057, "rewards/rejected": -0.20405566692352295, "step": 341 }, { "epoch": 0.5390070921985816, "grad_norm": 0.22127310931682587, "learning_rate": 3.663892993927312e-06, "log_odds_chosen": 0.5525428652763367, "log_odds_ratio": -0.45917612314224243, "logits/chosen": 0.11797277629375458, "logits/rejected": -1.0334317684173584, "logps/chosen": -1.596346139907837, "logps/rejected": -2.0591015815734863, "loss": 1.7139, "nll_loss": 1.6679694652557373, "rewards/accuracies": 1.0, "rewards/chosen": -0.15963461995124817, "rewards/margins": 0.04627552628517151, "rewards/rejected": -0.20591016113758087, "step": 342 }, { "epoch": 0.5405831363278172, "grad_norm": 0.21761682629585266, "learning_rate": 3.6608366146537136e-06, "log_odds_chosen": 0.5936369299888611, "log_odds_ratio": -0.4582027494907379, "logits/chosen": 0.09912507981061935, "logits/rejected": -0.8791636824607849, "logps/chosen": -1.5269497632980347, "logps/rejected": -2.0259313583374023, "loss": 1.6475, "nll_loss": 1.601715087890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.15269500017166138, "rewards/margins": 0.04989815130829811, "rewards/rejected": -0.2025931477546692, "step": 343 }, { "epoch": 0.5421591804570528, "grad_norm": 0.2286389023065567, "learning_rate": 3.6577676885527674e-06, "log_odds_chosen": 0.46725213527679443, "log_odds_ratio": -0.49099448323249817, "logits/chosen": 0.12012003362178802, "logits/rejected": -1.079842448234558, "logps/chosen": -1.5879201889038086, "logps/rejected": -1.9761006832122803, "loss": 1.6901, "nll_loss": 1.6409858465194702, "rewards/accuracies": 1.0, "rewards/chosen": -0.15879201889038086, "rewards/margins": 0.03881806507706642, "rewards/rejected": -0.1976100653409958, "step": 344 }, { "epoch": 0.5437352245862884, "grad_norm": 0.2085123509168625, "learning_rate": 3.654686238808744e-06, "log_odds_chosen": 0.4555826783180237, "log_odds_ratio": -0.49895185232162476, "logits/chosen": 0.08309884369373322, "logits/rejected": -1.0625288486480713, "logps/chosen": -1.6775835752487183, "logps/rejected": -2.0619888305664062, "loss": 1.7808, "nll_loss": 1.7309454679489136, "rewards/accuracies": 1.0, "rewards/chosen": -0.16775836050510406, "rewards/margins": 0.038440532982349396, "rewards/rejected": -0.20619890093803406, "step": 345 }, { "epoch": 0.545311268715524, "grad_norm": 0.19838877022266388, "learning_rate": 3.6515922887005245e-06, "log_odds_chosen": 0.658145010471344, "log_odds_ratio": -0.4212106168270111, "logits/chosen": -0.022555839270353317, "logits/rejected": -1.0948203802108765, "logps/chosen": -1.516296148300171, "logps/rejected": -2.05875825881958, "loss": 1.6283, "nll_loss": 1.5861799716949463, "rewards/accuracies": 1.0, "rewards/chosen": -0.15162962675094604, "rewards/margins": 0.054246217012405396, "rewards/rejected": -0.20587582886219025, "step": 346 }, { "epoch": 0.5468873128447597, "grad_norm": 0.20286299288272858, "learning_rate": 3.6484858616014236e-06, "log_odds_chosen": 0.2818338871002197, "log_odds_ratio": -0.564333438873291, "logits/chosen": 0.1583118885755539, "logits/rejected": -1.0933054685592651, "logps/chosen": -1.6371684074401855, "logps/rejected": -1.8697998523712158, "loss": 1.7386, "nll_loss": 1.6821991205215454, "rewards/accuracies": 1.0, "rewards/chosen": -0.1637168526649475, "rewards/margins": 0.02326314151287079, "rewards/rejected": -0.1869799643754959, "step": 347 }, { "epoch": 0.5484633569739953, "grad_norm": 0.29366299510002136, "learning_rate": 3.6453669809790154e-06, "log_odds_chosen": 0.32832571864128113, "log_odds_ratio": -0.5527113676071167, "logits/chosen": 0.1874067187309265, "logits/rejected": -0.7553760409355164, "logps/chosen": -1.5719748735427856, "logps/rejected": -1.8433563709259033, "loss": 1.6811, "nll_loss": 1.6258182525634766, "rewards/accuracies": 0.75, "rewards/chosen": -0.1571974903345108, "rewards/margins": 0.027138162404298782, "rewards/rejected": -0.1843356490135193, "step": 348 }, { "epoch": 0.5500394011032309, "grad_norm": 0.21022526919841766, "learning_rate": 3.642235670394952e-06, "log_odds_chosen": 0.4171709418296814, "log_odds_ratio": -0.5099711418151855, "logits/chosen": 0.164518341422081, "logits/rejected": -0.7692568898200989, "logps/chosen": -1.5778452157974243, "logps/rejected": -1.9177711009979248, "loss": 1.6972, "nll_loss": 1.646168828010559, "rewards/accuracies": 1.0, "rewards/chosen": -0.15778450667858124, "rewards/margins": 0.03399258852005005, "rewards/rejected": -0.19177711009979248, "step": 349 }, { "epoch": 0.5516154452324665, "grad_norm": 0.199528306722641, "learning_rate": 3.63909195350479e-06, "log_odds_chosen": 0.39780351519584656, "log_odds_ratio": -0.5235289335250854, "logits/chosen": 0.21239252388477325, "logits/rejected": -0.9313897490501404, "logps/chosen": -1.6653385162353516, "logps/rejected": -1.996529459953308, "loss": 1.7686, "nll_loss": 1.7162431478500366, "rewards/accuracies": 0.875, "rewards/chosen": -0.16653385758399963, "rewards/margins": 0.033119089901447296, "rewards/rejected": -0.19965295493602753, "step": 350 }, { "epoch": 0.5531914893617021, "grad_norm": 0.21873773634433746, "learning_rate": 3.635935854057809e-06, "log_odds_chosen": 0.3728238642215729, "log_odds_ratio": -0.5321751236915588, "logits/chosen": 0.04262760281562805, "logits/rejected": -0.6860441565513611, "logps/chosen": -1.6196409463882446, "logps/rejected": -1.930510401725769, "loss": 1.731, "nll_loss": 1.6777571439743042, "rewards/accuracies": 0.875, "rewards/chosen": -0.16196408867835999, "rewards/margins": 0.031086942180991173, "rewards/rejected": -0.1930510252714157, "step": 351 }, { "epoch": 0.5547675334909378, "grad_norm": 0.20314662158489227, "learning_rate": 3.6327673958968327e-06, "log_odds_chosen": 0.49560269713401794, "log_odds_ratio": -0.4786853790283203, "logits/chosen": 0.07317562401294708, "logits/rejected": -0.9375707507133484, "logps/chosen": -1.498767375946045, "logps/rejected": -1.9013431072235107, "loss": 1.6284, "nll_loss": 1.5804861783981323, "rewards/accuracies": 1.0, "rewards/chosen": -0.14987674355506897, "rewards/margins": 0.04025757685303688, "rewards/rejected": -0.19013431668281555, "step": 352 }, { "epoch": 0.5563435776201734, "grad_norm": 0.2797704339027405, "learning_rate": 3.6295866029580483e-06, "log_odds_chosen": 0.4891226589679718, "log_odds_ratio": -0.481170117855072, "logits/chosen": 0.07735095918178558, "logits/rejected": -1.0302777290344238, "logps/chosen": -1.5447524785995483, "logps/rejected": -1.9455407857894897, "loss": 1.6431, "nll_loss": 1.5949952602386475, "rewards/accuracies": 1.0, "rewards/chosen": -0.15447524189949036, "rewards/margins": 0.040078844875097275, "rewards/rejected": -0.19455409049987793, "step": 353 }, { "epoch": 0.557919621749409, "grad_norm": 0.2015417069196701, "learning_rate": 3.626393499270829e-06, "log_odds_chosen": 0.4410100281238556, "log_odds_ratio": -0.5045400261878967, "logits/chosen": 0.08361411094665527, "logits/rejected": -0.9850642681121826, "logps/chosen": -1.4818400144577026, "logps/rejected": -1.8396446704864502, "loss": 1.6143, "nll_loss": 1.5638469457626343, "rewards/accuracies": 1.0, "rewards/chosen": -0.14818400144577026, "rewards/margins": 0.03578047454357147, "rewards/rejected": -0.18396447598934174, "step": 354 }, { "epoch": 0.5594956658786446, "grad_norm": 0.20118573307991028, "learning_rate": 3.6231881089575466e-06, "log_odds_chosen": 0.6061029434204102, "log_odds_ratio": -0.4385111927986145, "logits/chosen": 0.02086889185011387, "logits/rejected": -1.1207969188690186, "logps/chosen": -1.5292540788650513, "logps/rejected": -2.0326905250549316, "loss": 1.612, "nll_loss": 1.5681171417236328, "rewards/accuracies": 1.0, "rewards/chosen": -0.15292541682720184, "rewards/margins": 0.05034365504980087, "rewards/rejected": -0.20326906442642212, "step": 355 }, { "epoch": 0.5610717100078803, "grad_norm": 0.21071591973304749, "learning_rate": 3.6199704562333945e-06, "log_odds_chosen": 0.45574647188186646, "log_odds_ratio": -0.49790191650390625, "logits/chosen": 0.054458700120449066, "logits/rejected": -0.9820718169212341, "logps/chosen": -1.5057026147842407, "logps/rejected": -1.8732877969741821, "loss": 1.6173, "nll_loss": 1.5674865245819092, "rewards/accuracies": 1.0, "rewards/chosen": -0.15057027339935303, "rewards/margins": 0.03675851225852966, "rewards/rejected": -0.1873287856578827, "step": 356 }, { "epoch": 0.5626477541371159, "grad_norm": 0.20148809254169464, "learning_rate": 3.6167405654062024e-06, "log_odds_chosen": 0.4633074402809143, "log_odds_ratio": -0.5014227032661438, "logits/chosen": 0.11039365828037262, "logits/rejected": -0.9207887649536133, "logps/chosen": -1.556457757949829, "logps/rejected": -1.9362834692001343, "loss": 1.6571, "nll_loss": 1.606947660446167, "rewards/accuracies": 1.0, "rewards/chosen": -0.15564577281475067, "rewards/margins": 0.03798258304595947, "rewards/rejected": -0.19362835586071014, "step": 357 }, { "epoch": 0.5642237982663515, "grad_norm": 0.18568812310695648, "learning_rate": 3.6134984608762515e-06, "log_odds_chosen": 0.4947914183139801, "log_odds_ratio": -0.47895732522010803, "logits/chosen": 0.09608149528503418, "logits/rejected": -1.0143461227416992, "logps/chosen": -1.4812129735946655, "logps/rejected": -1.881905436515808, "loss": 1.6029, "nll_loss": 1.5549757480621338, "rewards/accuracies": 1.0, "rewards/chosen": -0.14812129735946655, "rewards/margins": 0.04006926715373993, "rewards/rejected": -0.18819056451320648, "step": 358 }, { "epoch": 0.5657998423955871, "grad_norm": 0.2126130759716034, "learning_rate": 3.6102441671360945e-06, "log_odds_chosen": 0.47413334250450134, "log_odds_ratio": -0.4936099946498871, "logits/chosen": 0.06261715292930603, "logits/rejected": -0.8793343305587769, "logps/chosen": -1.6251752376556396, "logps/rejected": -2.0224335193634033, "loss": 1.7191, "nll_loss": 1.6696956157684326, "rewards/accuracies": 1.0, "rewards/chosen": -0.1625175178050995, "rewards/margins": 0.03972584009170532, "rewards/rejected": -0.2022433578968048, "step": 359 }, { "epoch": 0.5673758865248227, "grad_norm": 0.20976465940475464, "learning_rate": 3.6069777087703654e-06, "log_odds_chosen": 0.4963104724884033, "log_odds_ratio": -0.48178282380104065, "logits/chosen": 0.0887591764330864, "logits/rejected": -0.8758858442306519, "logps/chosen": -1.5486314296722412, "logps/rejected": -1.9577083587646484, "loss": 1.6676, "nll_loss": 1.619391679763794, "rewards/accuracies": 1.0, "rewards/chosen": -0.1548631489276886, "rewards/margins": 0.04090770334005356, "rewards/rejected": -0.19577085971832275, "step": 360 }, { "epoch": 0.5689519306540584, "grad_norm": 0.20991873741149902, "learning_rate": 3.6036991104555973e-06, "log_odds_chosen": 0.4287150204181671, "log_odds_ratio": -0.5141339898109436, "logits/chosen": 0.09619182348251343, "logits/rejected": -0.939784824848175, "logps/chosen": -1.6915943622589111, "logps/rejected": -2.055938720703125, "loss": 1.7819, "nll_loss": 1.730458378791809, "rewards/accuracies": 0.875, "rewards/chosen": -0.16915945708751678, "rewards/margins": 0.03643442690372467, "rewards/rejected": -0.20559386909008026, "step": 361 }, { "epoch": 0.570527974783294, "grad_norm": 0.2047056257724762, "learning_rate": 3.600408396960034e-06, "log_odds_chosen": 0.48920494318008423, "log_odds_ratio": -0.4854881465435028, "logits/chosen": 0.11697036772966385, "logits/rejected": -0.6390340328216553, "logps/chosen": -1.4950973987579346, "logps/rejected": -1.8872171640396118, "loss": 1.5913, "nll_loss": 1.5427401065826416, "rewards/accuracies": 1.0, "rewards/chosen": -0.1495097428560257, "rewards/margins": 0.03921198099851608, "rewards/rejected": -0.18872171640396118, "step": 362 }, { "epoch": 0.5721040189125296, "grad_norm": 0.1965114176273346, "learning_rate": 3.5971055931434447e-06, "log_odds_chosen": 0.43638381361961365, "log_odds_ratio": -0.5067461729049683, "logits/chosen": 0.13429589569568634, "logits/rejected": -0.8910205364227295, "logps/chosen": -1.5401540994644165, "logps/rejected": -1.9012306928634644, "loss": 1.6487, "nll_loss": 1.5980687141418457, "rewards/accuracies": 0.875, "rewards/chosen": -0.1540154069662094, "rewards/margins": 0.03610766679048538, "rewards/rejected": -0.1901230663061142, "step": 363 }, { "epoch": 0.5736800630417652, "grad_norm": 0.19536329805850983, "learning_rate": 3.5937907239569343e-06, "log_odds_chosen": 0.45536214113235474, "log_odds_ratio": -0.5008538365364075, "logits/chosen": 0.04754452407360077, "logits/rejected": -0.9931653141975403, "logps/chosen": -1.592581868171692, "logps/rejected": -1.9675819873809814, "loss": 1.7023, "nll_loss": 1.65225350856781, "rewards/accuracies": 0.875, "rewards/chosen": -0.15925820171833038, "rewards/margins": 0.03749999776482582, "rewards/rejected": -0.1967581808567047, "step": 364 }, { "epoch": 0.5752561071710008, "grad_norm": 0.21669350564479828, "learning_rate": 3.5904638144427572e-06, "log_odds_chosen": 0.2608916759490967, "log_odds_ratio": -0.5785377621650696, "logits/chosen": 0.10151364654302597, "logits/rejected": -0.7654220461845398, "logps/chosen": -1.651747703552246, "logps/rejected": -1.8675367832183838, "loss": 1.7496, "nll_loss": 1.6917589902877808, "rewards/accuracies": 0.75, "rewards/chosen": -0.16517476737499237, "rewards/margins": 0.021578924730420113, "rewards/rejected": -0.18675370514392853, "step": 365 }, { "epoch": 0.5768321513002365, "grad_norm": 0.20522025227546692, "learning_rate": 3.5871248897341246e-06, "log_odds_chosen": 0.48859813809394836, "log_odds_ratio": -0.4839872121810913, "logits/chosen": 0.040113721042871475, "logits/rejected": -0.7136132121086121, "logps/chosen": -1.4968351125717163, "logps/rejected": -1.9003262519836426, "loss": 1.6213, "nll_loss": 1.572914958000183, "rewards/accuracies": 1.0, "rewards/chosen": -0.14968350529670715, "rewards/margins": 0.04034912586212158, "rewards/rejected": -0.19003261625766754, "step": 366 }, { "epoch": 0.578408195429472, "grad_norm": 0.20791637897491455, "learning_rate": 3.5837739750550182e-06, "log_odds_chosen": 0.47811540961265564, "log_odds_ratio": -0.4912639856338501, "logits/chosen": 0.032442331314086914, "logits/rejected": -1.2634479999542236, "logps/chosen": -1.5675827264785767, "logps/rejected": -1.966813325881958, "loss": 1.6687, "nll_loss": 1.619585394859314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15675827860832214, "rewards/margins": 0.03992307186126709, "rewards/rejected": -0.19668133556842804, "step": 367 }, { "epoch": 0.5799842395587076, "grad_norm": 0.18128971755504608, "learning_rate": 3.5804110957199977e-06, "log_odds_chosen": 0.5172293782234192, "log_odds_ratio": -0.47360023856163025, "logits/chosen": 0.12969666719436646, "logits/rejected": -0.8731107711791992, "logps/chosen": -1.549896001815796, "logps/rejected": -1.9799461364746094, "loss": 1.6502, "nll_loss": 1.6028105020523071, "rewards/accuracies": 1.0, "rewards/chosen": -0.1549895852804184, "rewards/margins": 0.043005019426345825, "rewards/rejected": -0.19799461960792542, "step": 368 }, { "epoch": 0.5815602836879432, "grad_norm": 0.1900000423192978, "learning_rate": 3.577036277134011e-06, "log_odds_chosen": 0.5826700925827026, "log_odds_ratio": -0.4474554657936096, "logits/chosen": 0.06492473185062408, "logits/rejected": -1.0637844800949097, "logps/chosen": -1.5324748754501343, "logps/rejected": -2.014566659927368, "loss": 1.6468, "nll_loss": 1.60209059715271, "rewards/accuracies": 1.0, "rewards/chosen": -0.15324749052524567, "rewards/margins": 0.04820918291807175, "rewards/rejected": -0.20145665109157562, "step": 369 }, { "epoch": 0.5831363278171788, "grad_norm": 0.20140865445137024, "learning_rate": 3.5736495447922e-06, "log_odds_chosen": 0.36809372901916504, "log_odds_ratio": -0.5302509069442749, "logits/chosen": 0.06567880511283875, "logits/rejected": -1.0636937618255615, "logps/chosen": -1.659379243850708, "logps/rejected": -1.9658015966415405, "loss": 1.7622, "nll_loss": 1.709184169769287, "rewards/accuracies": 1.0, "rewards/chosen": -0.16593793034553528, "rewards/margins": 0.03064224123954773, "rewards/rejected": -0.196580171585083, "step": 370 }, { "epoch": 0.5847123719464145, "grad_norm": 0.1792723536491394, "learning_rate": 3.5702509242797096e-06, "log_odds_chosen": 0.6814740300178528, "log_odds_ratio": -0.4248891770839691, "logits/chosen": 0.039288341999053955, "logits/rejected": -1.0555591583251953, "logps/chosen": -1.4857667684555054, "logps/rejected": -2.0495104789733887, "loss": 1.5995, "nll_loss": 1.5570186376571655, "rewards/accuracies": 1.0, "rewards/chosen": -0.14857669174671173, "rewards/margins": 0.05637436360120773, "rewards/rejected": -0.20495106279850006, "step": 371 }, { "epoch": 0.5862884160756501, "grad_norm": 0.1895635426044464, "learning_rate": 3.566840441271495e-06, "log_odds_chosen": 0.6294078230857849, "log_odds_ratio": -0.4429580569267273, "logits/chosen": 0.07919707894325256, "logits/rejected": -1.0324056148529053, "logps/chosen": -1.5050973892211914, "logps/rejected": -2.0217623710632324, "loss": 1.5956, "nll_loss": 1.5513163805007935, "rewards/accuracies": 1.0, "rewards/chosen": -0.15050974488258362, "rewards/margins": 0.05166648328304291, "rewards/rejected": -0.20217622816562653, "step": 372 }, { "epoch": 0.5878644602048857, "grad_norm": 0.1982351839542389, "learning_rate": 3.5634181215321265e-06, "log_odds_chosen": 0.5948350429534912, "log_odds_ratio": -0.4424865245819092, "logits/chosen": 0.10533066838979721, "logits/rejected": -0.964317798614502, "logps/chosen": -1.5590019226074219, "logps/rejected": -2.0552213191986084, "loss": 1.6722, "nll_loss": 1.6279126405715942, "rewards/accuracies": 1.0, "rewards/chosen": -0.15590018033981323, "rewards/margins": 0.049621954560279846, "rewards/rejected": -0.20552213490009308, "step": 373 }, { "epoch": 0.5894405043341213, "grad_norm": 0.18995660543441772, "learning_rate": 3.5599839909155947e-06, "log_odds_chosen": 0.48201611638069153, "log_odds_ratio": -0.4901425540447235, "logits/chosen": 0.06920170038938522, "logits/rejected": -0.9864767789840698, "logps/chosen": -1.5232574939727783, "logps/rejected": -1.920086145401001, "loss": 1.6269, "nll_loss": 1.5779118537902832, "rewards/accuracies": 0.875, "rewards/chosen": -0.15232573449611664, "rewards/margins": 0.03968288004398346, "rewards/rejected": -0.1920086145401001, "step": 374 }, { "epoch": 0.5910165484633569, "grad_norm": 0.1883167177438736, "learning_rate": 3.556538075365116e-06, "log_odds_chosen": 0.5346459150314331, "log_odds_ratio": -0.4716908633708954, "logits/chosen": 0.10186448693275452, "logits/rejected": -0.9781060814857483, "logps/chosen": -1.5155980587005615, "logps/rejected": -1.952977180480957, "loss": 1.6202, "nll_loss": 1.5730595588684082, "rewards/accuracies": 1.0, "rewards/chosen": -0.15155981481075287, "rewards/margins": 0.043737903237342834, "rewards/rejected": -0.1952977180480957, "step": 375 }, { "epoch": 0.5925925925925926, "grad_norm": 0.1945546567440033, "learning_rate": 3.5530804009129367e-06, "log_odds_chosen": 0.4805316925048828, "log_odds_ratio": -0.4836026430130005, "logits/chosen": 0.059399835765361786, "logits/rejected": -0.7888279557228088, "logps/chosen": -1.5551681518554688, "logps/rejected": -1.9524855613708496, "loss": 1.6452, "nll_loss": 1.5968666076660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.15551680326461792, "rewards/margins": 0.039731740951538086, "rewards/rejected": -0.195248544216156, "step": 376 }, { "epoch": 0.5941686367218282, "grad_norm": 0.1976991593837738, "learning_rate": 3.5496109936801368e-06, "log_odds_chosen": 0.4780104160308838, "log_odds_ratio": -0.4899000823497772, "logits/chosen": 0.014288029633462429, "logits/rejected": -1.0679166316986084, "logps/chosen": -1.5730230808258057, "logps/rejected": -1.9747998714447021, "loss": 1.6721, "nll_loss": 1.6231590509414673, "rewards/accuracies": 1.0, "rewards/chosen": -0.15730230510234833, "rewards/margins": 0.040177665650844574, "rewards/rejected": -0.1974799782037735, "step": 377 }, { "epoch": 0.5957446808510638, "grad_norm": 0.20086143910884857, "learning_rate": 3.546129879876429e-06, "log_odds_chosen": 0.345024049282074, "log_odds_ratio": -0.547674298286438, "logits/chosen": 0.11364105343818665, "logits/rejected": -0.8114718794822693, "logps/chosen": -1.6343905925750732, "logps/rejected": -1.9218847751617432, "loss": 1.73, "nll_loss": 1.6752517223358154, "rewards/accuracies": 0.875, "rewards/chosen": -0.16343903541564941, "rewards/margins": 0.028749439865350723, "rewards/rejected": -0.19218848645687103, "step": 378 }, { "epoch": 0.5973207249802994, "grad_norm": 0.21694695949554443, "learning_rate": 3.5426370857999662e-06, "log_odds_chosen": 0.3613109886646271, "log_odds_ratio": -0.5329811573028564, "logits/chosen": 0.05423086881637573, "logits/rejected": -0.89935302734375, "logps/chosen": -1.6174565553665161, "logps/rejected": -1.9179959297180176, "loss": 1.7236, "nll_loss": 1.6703437566757202, "rewards/accuracies": 1.0, "rewards/chosen": -0.16174568235874176, "rewards/margins": 0.030053915455937386, "rewards/rejected": -0.191799595952034, "step": 379 }, { "epoch": 0.598896769109535, "grad_norm": 0.20389492809772491, "learning_rate": 3.53913263783714e-06, "log_odds_chosen": 0.3913048505783081, "log_odds_ratio": -0.5298315286636353, "logits/chosen": 0.03343074396252632, "logits/rejected": -0.9829487800598145, "logps/chosen": -1.5685036182403564, "logps/rejected": -1.8965883255004883, "loss": 1.6643, "nll_loss": 1.6112816333770752, "rewards/accuracies": 0.875, "rewards/chosen": -0.15685035288333893, "rewards/margins": 0.032808490097522736, "rewards/rejected": -0.18965885043144226, "step": 380 }, { "epoch": 0.6004728132387707, "grad_norm": 0.18716998398303986, "learning_rate": 3.53561656246238e-06, "log_odds_chosen": 0.6300040483474731, "log_odds_ratio": -0.4305461347103119, "logits/chosen": -0.0723160058259964, "logits/rejected": -1.210856556892395, "logps/chosen": -1.5864709615707397, "logps/rejected": -2.1141700744628906, "loss": 1.6812, "nll_loss": 1.638156771659851, "rewards/accuracies": 1.0, "rewards/chosen": -0.1586471050977707, "rewards/margins": 0.052769921720027924, "rewards/rejected": -0.2114170342683792, "step": 381 }, { "epoch": 0.6020488573680063, "grad_norm": 0.18554647266864777, "learning_rate": 3.532088886237956e-06, "log_odds_chosen": 0.5184412002563477, "log_odds_ratio": -0.47040432691574097, "logits/chosen": 0.008511025458574295, "logits/rejected": -1.1571906805038452, "logps/chosen": -1.6124807596206665, "logps/rejected": -2.04634428024292, "loss": 1.6949, "nll_loss": 1.6479085683822632, "rewards/accuracies": 1.0, "rewards/chosen": -0.1612480878829956, "rewards/margins": 0.04338634014129639, "rewards/rejected": -0.2046344131231308, "step": 382 }, { "epoch": 0.6036249014972419, "grad_norm": 0.17985667288303375, "learning_rate": 3.528549635813778e-06, "log_odds_chosen": 0.5406981706619263, "log_odds_ratio": -0.46587079763412476, "logits/chosen": -0.046166982501745224, "logits/rejected": -0.9862831830978394, "logps/chosen": -1.4486740827560425, "logps/rejected": -1.8883919715881348, "loss": 1.5544, "nll_loss": 1.507815957069397, "rewards/accuracies": 1.0, "rewards/chosen": -0.144867405295372, "rewards/margins": 0.04397178441286087, "rewards/rejected": -0.18883919715881348, "step": 383 }, { "epoch": 0.6052009456264775, "grad_norm": 0.19946593046188354, "learning_rate": 3.524998837927192e-06, "log_odds_chosen": 0.5522040128707886, "log_odds_ratio": -0.45645731687545776, "logits/chosen": 0.0009910385124385357, "logits/rejected": -0.9322502017021179, "logps/chosen": -1.5654282569885254, "logps/rejected": -2.024646759033203, "loss": 1.6474, "nll_loss": 1.601776361465454, "rewards/accuracies": 1.0, "rewards/chosen": -0.1565428376197815, "rewards/margins": 0.04592186585068703, "rewards/rejected": -0.20246468484401703, "step": 384 }, { "epoch": 0.6067769897557131, "grad_norm": 0.18796052038669586, "learning_rate": 3.5214365194027797e-06, "log_odds_chosen": 0.565650463104248, "log_odds_ratio": -0.4533955752849579, "logits/chosen": -0.014517467468976974, "logits/rejected": -1.0332865715026855, "logps/chosen": -1.492905855178833, "logps/rejected": -1.9580696821212769, "loss": 1.5881, "nll_loss": 1.5427998304367065, "rewards/accuracies": 1.0, "rewards/chosen": -0.14929059147834778, "rewards/margins": 0.04651637375354767, "rewards/rejected": -0.19580696523189545, "step": 385 }, { "epoch": 0.6083530338849488, "grad_norm": 0.19329367578029633, "learning_rate": 3.517862707152157e-06, "log_odds_chosen": 0.4329968988895416, "log_odds_ratio": -0.5075357556343079, "logits/chosen": 0.07324811816215515, "logits/rejected": -0.848515510559082, "logps/chosen": -1.6198889017105103, "logps/rejected": -1.980916976928711, "loss": 1.7035, "nll_loss": 1.6527172327041626, "rewards/accuracies": 1.0, "rewards/chosen": -0.16198891401290894, "rewards/margins": 0.03610279783606529, "rewards/rejected": -0.19809171557426453, "step": 386 }, { "epoch": 0.6099290780141844, "grad_norm": 0.2003830224275589, "learning_rate": 3.5142774281737674e-06, "log_odds_chosen": 0.6210008859634399, "log_odds_ratio": -0.4322190284729004, "logits/chosen": -0.0007459288462996483, "logits/rejected": -0.9446620345115662, "logps/chosen": -1.6110001802444458, "logps/rejected": -2.1332759857177734, "loss": 1.6895, "nll_loss": 1.6462812423706055, "rewards/accuracies": 1.0, "rewards/chosen": -0.16110001504421234, "rewards/margins": 0.052227575331926346, "rewards/rejected": -0.2133275866508484, "step": 387 }, { "epoch": 0.61150512214342, "grad_norm": 0.17635990679264069, "learning_rate": 3.5106807095526817e-06, "log_odds_chosen": 0.5996285080909729, "log_odds_ratio": -0.44902303814888, "logits/chosen": -0.02553224191069603, "logits/rejected": -1.1442608833312988, "logps/chosen": -1.5787206888198853, "logps/rejected": -2.0769317150115967, "loss": 1.673, "nll_loss": 1.6281229257583618, "rewards/accuracies": 0.875, "rewards/chosen": -0.1578720659017563, "rewards/margins": 0.04982110857963562, "rewards/rejected": -0.2076931744813919, "step": 388 }, { "epoch": 0.6130811662726556, "grad_norm": 0.17955166101455688, "learning_rate": 3.5070725784603905e-06, "log_odds_chosen": 0.5273114442825317, "log_odds_ratio": -0.4709566831588745, "logits/chosen": -0.12750433385372162, "logits/rejected": -0.8868665099143982, "logps/chosen": -1.450034737586975, "logps/rejected": -1.878908634185791, "loss": 1.5397, "nll_loss": 1.4926133155822754, "rewards/accuracies": 1.0, "rewards/chosen": -0.14500348269939423, "rewards/margins": 0.0428873747587204, "rewards/rejected": -0.18789085745811462, "step": 389 }, { "epoch": 0.6146572104018913, "grad_norm": 0.206462100148201, "learning_rate": 3.503453062154602e-06, "log_odds_chosen": 0.4430813491344452, "log_odds_ratio": -0.5067101120948792, "logits/chosen": -0.01937907561659813, "logits/rejected": -0.8744063973426819, "logps/chosen": -1.6130276918411255, "logps/rejected": -1.989070177078247, "loss": 1.7124, "nll_loss": 1.6617623567581177, "rewards/accuracies": 0.875, "rewards/chosen": -0.16130277514457703, "rewards/margins": 0.037604257464408875, "rewards/rejected": -0.1989070326089859, "step": 390 }, { "epoch": 0.6162332545311269, "grad_norm": 0.1900879293680191, "learning_rate": 3.499822187979032e-06, "log_odds_chosen": 0.42407649755477905, "log_odds_ratio": -0.5121719837188721, "logits/chosen": 0.0466628335416317, "logits/rejected": -1.085059404373169, "logps/chosen": -1.5678186416625977, "logps/rejected": -1.919469952583313, "loss": 1.675, "nll_loss": 1.6237623691558838, "rewards/accuracies": 0.875, "rewards/chosen": -0.1567818522453308, "rewards/margins": 0.03516513854265213, "rewards/rejected": -0.19194699823856354, "step": 391 }, { "epoch": 0.6178092986603625, "grad_norm": 0.17979402840137482, "learning_rate": 3.496179983363202e-06, "log_odds_chosen": 0.39420366287231445, "log_odds_ratio": -0.5192743539810181, "logits/chosen": 0.009703043848276138, "logits/rejected": -0.9731006026268005, "logps/chosen": -1.5731898546218872, "logps/rejected": -1.8994686603546143, "loss": 1.66, "nll_loss": 1.6080539226531982, "rewards/accuracies": 1.0, "rewards/chosen": -0.15731897950172424, "rewards/margins": 0.0326278880238533, "rewards/rejected": -0.18994688987731934, "step": 392 }, { "epoch": 0.6193853427895981, "grad_norm": 0.20772549510002136, "learning_rate": 3.4925264758222268e-06, "log_odds_chosen": 0.6100749373435974, "log_odds_ratio": -0.4368566870689392, "logits/chosen": -0.04344996437430382, "logits/rejected": -0.844353199005127, "logps/chosen": -1.5049924850463867, "logps/rejected": -2.0098416805267334, "loss": 1.6044, "nll_loss": 1.5606787204742432, "rewards/accuracies": 1.0, "rewards/chosen": -0.15049925446510315, "rewards/margins": 0.05048491805791855, "rewards/rejected": -0.2009841501712799, "step": 393 }, { "epoch": 0.6209613869188337, "grad_norm": 0.1882377713918686, "learning_rate": 3.488861692956611e-06, "log_odds_chosen": 0.5299686789512634, "log_odds_ratio": -0.46948546171188354, "logits/chosen": -0.05664820969104767, "logits/rejected": -0.9505389928817749, "logps/chosen": -1.5510681867599487, "logps/rejected": -1.9924825429916382, "loss": 1.6432, "nll_loss": 1.5962103605270386, "rewards/accuracies": 1.0, "rewards/chosen": -0.1551068127155304, "rewards/margins": 0.04414144158363342, "rewards/rejected": -0.19924825429916382, "step": 394 }, { "epoch": 0.6225374310480694, "grad_norm": 0.18782253563404083, "learning_rate": 3.4851856624520394e-06, "log_odds_chosen": 0.5067974925041199, "log_odds_ratio": -0.4746589660644531, "logits/chosen": -0.008503247052431107, "logits/rejected": -1.0330562591552734, "logps/chosen": -1.5813007354736328, "logps/rejected": -2.0009989738464355, "loss": 1.668, "nll_loss": 1.6205734014511108, "rewards/accuracies": 1.0, "rewards/chosen": -0.15813006460666656, "rewards/margins": 0.04196983203291893, "rewards/rejected": -0.200099915266037, "step": 395 }, { "epoch": 0.624113475177305, "grad_norm": 0.19832897186279297, "learning_rate": 3.4814984120791664e-06, "log_odds_chosen": 0.49104994535446167, "log_odds_ratio": -0.48074740171432495, "logits/chosen": -0.014758365228772163, "logits/rejected": -0.8711364269256592, "logps/chosen": -1.5502197742462158, "logps/rejected": -1.954229712486267, "loss": 1.6486, "nll_loss": 1.6004936695098877, "rewards/accuracies": 1.0, "rewards/chosen": -0.15502198040485382, "rewards/margins": 0.040400996804237366, "rewards/rejected": -0.1954229772090912, "step": 396 }, { "epoch": 0.6256895193065406, "grad_norm": 0.17359870672225952, "learning_rate": 3.477799969693407e-06, "log_odds_chosen": 0.446225643157959, "log_odds_ratio": -0.5032547116279602, "logits/chosen": 0.005189519375562668, "logits/rejected": -1.031449317932129, "logps/chosen": -1.5504294633865356, "logps/rejected": -1.9127092361450195, "loss": 1.6418, "nll_loss": 1.5914617776870728, "rewards/accuracies": 1.0, "rewards/chosen": -0.15504296123981476, "rewards/margins": 0.036227963864803314, "rewards/rejected": -0.19127091765403748, "step": 397 }, { "epoch": 0.6272655634357762, "grad_norm": 0.19309338927268982, "learning_rate": 3.474090363234728e-06, "log_odds_chosen": 0.7335612177848816, "log_odds_ratio": -0.4062363803386688, "logits/chosen": -0.12859651446342468, "logits/rejected": -1.0809268951416016, "logps/chosen": -1.487412929534912, "logps/rejected": -2.097137928009033, "loss": 1.5731, "nll_loss": 1.5325067043304443, "rewards/accuracies": 1.0, "rewards/chosen": -0.14874127507209778, "rewards/margins": 0.060972537845373154, "rewards/rejected": -0.20971381664276123, "step": 398 }, { "epoch": 0.6288416075650118, "grad_norm": 0.18933425843715668, "learning_rate": 3.4703696207274325e-06, "log_odds_chosen": 0.5037076473236084, "log_odds_ratio": -0.4797070622444153, "logits/chosen": -0.0356442891061306, "logits/rejected": -0.9325816035270691, "logps/chosen": -1.566871166229248, "logps/rejected": -1.9823353290557861, "loss": 1.6422, "nll_loss": 1.5941991806030273, "rewards/accuracies": 1.0, "rewards/chosen": -0.15668712556362152, "rewards/margins": 0.04154641181230545, "rewards/rejected": -0.19823354482650757, "step": 399 }, { "epoch": 0.6304176516942475, "grad_norm": 0.1794310212135315, "learning_rate": 3.4666377702799545e-06, "log_odds_chosen": 0.507260799407959, "log_odds_ratio": -0.47620829939842224, "logits/chosen": -0.1437523066997528, "logits/rejected": -1.181861400604248, "logps/chosen": -1.4786607027053833, "logps/rejected": -1.8904798030853271, "loss": 1.5826, "nll_loss": 1.5349457263946533, "rewards/accuracies": 1.0, "rewards/chosen": -0.14786607027053833, "rewards/margins": 0.04118192940950394, "rewards/rejected": -0.18904799222946167, "step": 400 }, { "epoch": 0.6319936958234831, "grad_norm": 0.19182448089122772, "learning_rate": 3.4628948400846417e-06, "log_odds_chosen": 0.6221880316734314, "log_odds_ratio": -0.43973517417907715, "logits/chosen": -0.07098089158535004, "logits/rejected": -1.0959513187408447, "logps/chosen": -1.5599616765975952, "logps/rejected": -2.0811495780944824, "loss": 1.6506, "nll_loss": 1.6066694259643555, "rewards/accuracies": 1.0, "rewards/chosen": -0.1559961885213852, "rewards/margins": 0.052118781954050064, "rewards/rejected": -0.20811496675014496, "step": 401 }, { "epoch": 0.6335697399527187, "grad_norm": 0.19639141857624054, "learning_rate": 3.4591408584175426e-06, "log_odds_chosen": 0.5058379173278809, "log_odds_ratio": -0.48095616698265076, "logits/chosen": -0.0667698010802269, "logits/rejected": -1.0430485010147095, "logps/chosen": -1.5944137573242188, "logps/rejected": -2.0188231468200684, "loss": 1.6883, "nll_loss": 1.6401551961898804, "rewards/accuracies": 1.0, "rewards/chosen": -0.15944138169288635, "rewards/margins": 0.04244093596935272, "rewards/rejected": -0.20188231766223907, "step": 402 }, { "epoch": 0.6351457840819543, "grad_norm": 0.21676738560199738, "learning_rate": 3.4553758536381974e-06, "log_odds_chosen": 0.5158869028091431, "log_odds_ratio": -0.4791540205478668, "logits/chosen": -0.07078143954277039, "logits/rejected": -1.0287364721298218, "logps/chosen": -1.5488569736480713, "logps/rejected": -1.9769119024276733, "loss": 1.6391, "nll_loss": 1.591202974319458, "rewards/accuracies": 1.0, "rewards/chosen": -0.15488570928573608, "rewards/margins": 0.04280547425150871, "rewards/rejected": -0.1976911723613739, "step": 403 }, { "epoch": 0.6367218282111899, "grad_norm": 0.19120246171951294, "learning_rate": 3.451599854189418e-06, "log_odds_chosen": 0.5294864177703857, "log_odds_ratio": -0.4672943651676178, "logits/chosen": -0.02905140444636345, "logits/rejected": -0.7433152198791504, "logps/chosen": -1.5466922521591187, "logps/rejected": -1.9846346378326416, "loss": 1.6494, "nll_loss": 1.6026866436004639, "rewards/accuracies": 1.0, "rewards/chosen": -0.15466921031475067, "rewards/margins": 0.04379424825310707, "rewards/rejected": -0.19846348464488983, "step": 404 }, { "epoch": 0.6382978723404256, "grad_norm": 0.19179391860961914, "learning_rate": 3.4478128885970765e-06, "log_odds_chosen": 0.587192177772522, "log_odds_ratio": -0.4434540867805481, "logits/chosen": -0.07451428472995758, "logits/rejected": -1.0500125885009766, "logps/chosen": -1.6142433881759644, "logps/rejected": -2.1082777976989746, "loss": 1.702, "nll_loss": 1.657612919807434, "rewards/accuracies": 1.0, "rewards/chosen": -0.16142433881759644, "rewards/margins": 0.049403443932533264, "rewards/rejected": -0.2108277678489685, "step": 405 }, { "epoch": 0.6398739164696612, "grad_norm": 0.18501067161560059, "learning_rate": 3.44401498546989e-06, "log_odds_chosen": 0.40521109104156494, "log_odds_ratio": -0.5183945298194885, "logits/chosen": -0.03797990828752518, "logits/rejected": -1.0899728536605835, "logps/chosen": -1.5686931610107422, "logps/rejected": -1.90311861038208, "loss": 1.6415, "nll_loss": 1.5896741151809692, "rewards/accuracies": 1.0, "rewards/chosen": -0.1568693071603775, "rewards/margins": 0.03344256803393364, "rewards/rejected": -0.19031187891960144, "step": 406 }, { "epoch": 0.6414499605988968, "grad_norm": 0.1799306869506836, "learning_rate": 3.4402061734992005e-06, "log_odds_chosen": 0.4967467188835144, "log_odds_ratio": -0.48000407218933105, "logits/chosen": -0.05448009818792343, "logits/rejected": -1.0589975118637085, "logps/chosen": -1.5077265501022339, "logps/rejected": -1.9145967960357666, "loss": 1.6056, "nll_loss": 1.5576308965682983, "rewards/accuracies": 1.0, "rewards/chosen": -0.15077266097068787, "rewards/margins": 0.04068702459335327, "rewards/rejected": -0.19145967066287994, "step": 407 }, { "epoch": 0.6430260047281324, "grad_norm": 0.20216302573680878, "learning_rate": 3.4363864814587656e-06, "log_odds_chosen": 0.47775065898895264, "log_odds_ratio": -0.48624420166015625, "logits/chosen": -0.17656797170639038, "logits/rejected": -0.6637123823165894, "logps/chosen": -1.4837757349014282, "logps/rejected": -1.8745161294937134, "loss": 1.5846, "nll_loss": 1.5359528064727783, "rewards/accuracies": 1.0, "rewards/chosen": -0.14837758243083954, "rewards/margins": 0.039074040949344635, "rewards/rejected": -0.18745161592960358, "step": 408 }, { "epoch": 0.644602048857368, "grad_norm": 0.20102708041667938, "learning_rate": 3.4325559382045343e-06, "log_odds_chosen": 0.36611372232437134, "log_odds_ratio": -0.5313414335250854, "logits/chosen": -0.1553874909877777, "logits/rejected": -1.0996140241622925, "logps/chosen": -1.4935764074325562, "logps/rejected": -1.7868804931640625, "loss": 1.5879, "nll_loss": 1.5347291231155396, "rewards/accuracies": 1.0, "rewards/chosen": -0.1493576467037201, "rewards/margins": 0.029330415651202202, "rewards/rejected": -0.17868804931640625, "step": 409 }, { "epoch": 0.6461780929866037, "grad_norm": 0.19717082381248474, "learning_rate": 3.4287145726744295e-06, "log_odds_chosen": 0.4966202974319458, "log_odds_ratio": -0.4785189628601074, "logits/chosen": -0.17511916160583496, "logits/rejected": -1.0572038888931274, "logps/chosen": -1.4797816276550293, "logps/rejected": -1.8831043243408203, "loss": 1.5853, "nll_loss": 1.5374045372009277, "rewards/accuracies": 1.0, "rewards/chosen": -0.14797815680503845, "rewards/margins": 0.04033227264881134, "rewards/rejected": -0.1883104145526886, "step": 410 }, { "epoch": 0.6477541371158393, "grad_norm": 0.19743499159812927, "learning_rate": 3.4248624138881335e-06, "log_odds_chosen": 0.41479527950286865, "log_odds_ratio": -0.5294526815414429, "logits/chosen": -0.07476592063903809, "logits/rejected": -0.8440074920654297, "logps/chosen": -1.6108230352401733, "logps/rejected": -1.9684065580368042, "loss": 1.6982, "nll_loss": 1.6452078819274902, "rewards/accuracies": 0.875, "rewards/chosen": -0.16108231246471405, "rewards/margins": 0.035758357495069504, "rewards/rejected": -0.19684067368507385, "step": 411 }, { "epoch": 0.6493301812450749, "grad_norm": 0.22961333394050598, "learning_rate": 3.4209994909468672e-06, "log_odds_chosen": 0.6204558610916138, "log_odds_ratio": -0.4397663474082947, "logits/chosen": -0.19274577498435974, "logits/rejected": -0.7418732643127441, "logps/chosen": -1.5166682004928589, "logps/rejected": -2.030907392501831, "loss": 1.6063, "nll_loss": 1.5622737407684326, "rewards/accuracies": 1.0, "rewards/chosen": -0.15166683495044708, "rewards/margins": 0.051423899829387665, "rewards/rejected": -0.20309074223041534, "step": 412 }, { "epoch": 0.6509062253743105, "grad_norm": 0.18674862384796143, "learning_rate": 3.4171258330331667e-06, "log_odds_chosen": 0.4268144369125366, "log_odds_ratio": -0.5047621130943298, "logits/chosen": -0.06827862560749054, "logits/rejected": -0.8055958151817322, "logps/chosen": -1.5982739925384521, "logps/rejected": -1.9501354694366455, "loss": 1.6845, "nll_loss": 1.6340434551239014, "rewards/accuracies": 1.0, "rewards/chosen": -0.15982739627361298, "rewards/margins": 0.03518615663051605, "rewards/rejected": -0.19501356780529022, "step": 413 }, { "epoch": 0.6524822695035462, "grad_norm": 0.1905805468559265, "learning_rate": 3.4132414694106684e-06, "log_odds_chosen": 0.6537319421768188, "log_odds_ratio": -0.42581480741500854, "logits/chosen": -0.09001298248767853, "logits/rejected": -1.1602773666381836, "logps/chosen": -1.5066035985946655, "logps/rejected": -2.0486607551574707, "loss": 1.5912, "nll_loss": 1.5486054420471191, "rewards/accuracies": 1.0, "rewards/chosen": -0.15066036581993103, "rewards/margins": 0.05420570820569992, "rewards/rejected": -0.20486608147621155, "step": 414 }, { "epoch": 0.6540583136327817, "grad_norm": 0.19903519749641418, "learning_rate": 3.409346429423884e-06, "log_odds_chosen": 0.43221455812454224, "log_odds_ratio": -0.5025917887687683, "logits/chosen": 0.04223542660474777, "logits/rejected": -1.0005658864974976, "logps/chosen": -1.5795127153396606, "logps/rejected": -1.9357554912567139, "loss": 1.6597, "nll_loss": 1.6094313859939575, "rewards/accuracies": 1.0, "rewards/chosen": -0.15795128047466278, "rewards/margins": 0.03562428802251816, "rewards/rejected": -0.19357556104660034, "step": 415 }, { "epoch": 0.6556343577620173, "grad_norm": 0.1885460466146469, "learning_rate": 3.40544074249798e-06, "log_odds_chosen": 0.5970582365989685, "log_odds_ratio": -0.44192248582839966, "logits/chosen": -0.13446246087551117, "logits/rejected": -1.157158374786377, "logps/chosen": -1.5702476501464844, "logps/rejected": -2.0696332454681396, "loss": 1.6555, "nll_loss": 1.6113184690475464, "rewards/accuracies": 1.0, "rewards/chosen": -0.15702477097511292, "rewards/margins": 0.049938566982746124, "rewards/rejected": -0.20696333050727844, "step": 416 }, { "epoch": 0.6572104018912529, "grad_norm": 0.20453964173793793, "learning_rate": 3.401524438138556e-06, "log_odds_chosen": 0.447142630815506, "log_odds_ratio": -0.4999522864818573, "logits/chosen": -0.06529423594474792, "logits/rejected": -0.8071571588516235, "logps/chosen": -1.5707049369812012, "logps/rejected": -1.9413305521011353, "loss": 1.6382, "nll_loss": 1.5881924629211426, "rewards/accuracies": 1.0, "rewards/chosen": -0.15707048773765564, "rewards/margins": 0.037062570452690125, "rewards/rejected": -0.19413305819034576, "step": 417 }, { "epoch": 0.6587864460204885, "grad_norm": 0.1998985856771469, "learning_rate": 3.39759754593142e-06, "log_odds_chosen": 0.49144208431243896, "log_odds_ratio": -0.4852937161922455, "logits/chosen": -0.12646208703517914, "logits/rejected": -0.9786322116851807, "logps/chosen": -1.6269687414169312, "logps/rejected": -2.0418336391448975, "loss": 1.7024, "nll_loss": 1.6539154052734375, "rewards/accuracies": 1.0, "rewards/chosen": -0.16269688308238983, "rewards/margins": 0.041486479341983795, "rewards/rejected": -0.20418335497379303, "step": 418 }, { "epoch": 0.6603624901497241, "grad_norm": 0.19601181149482727, "learning_rate": 3.3936600955423683e-06, "log_odds_chosen": 0.6909330487251282, "log_odds_ratio": -0.42103973031044006, "logits/chosen": -0.05681590735912323, "logits/rejected": -0.7964982986450195, "logps/chosen": -1.524495005607605, "logps/rejected": -2.098771333694458, "loss": 1.614, "nll_loss": 1.5719231367111206, "rewards/accuracies": 1.0, "rewards/chosen": -0.15244948863983154, "rewards/margins": 0.05742764472961426, "rewards/rejected": -0.209877148270607, "step": 419 }, { "epoch": 0.6619385342789598, "grad_norm": 0.1980346292257309, "learning_rate": 3.3897121167169573e-06, "log_odds_chosen": 0.3761827051639557, "log_odds_ratio": -0.5326985120773315, "logits/chosen": -0.1706826090812683, "logits/rejected": -1.0160996913909912, "logps/chosen": -1.4991317987442017, "logps/rejected": -1.805385947227478, "loss": 1.6005, "nll_loss": 1.5472354888916016, "rewards/accuracies": 0.875, "rewards/chosen": -0.14991319179534912, "rewards/margins": 0.030625399202108383, "rewards/rejected": -0.1805385798215866, "step": 420 }, { "epoch": 0.6635145784081954, "grad_norm": 0.19532448053359985, "learning_rate": 3.38575363928028e-06, "log_odds_chosen": 0.5968228578567505, "log_odds_ratio": -0.4450194239616394, "logits/chosen": -0.09741874039173126, "logits/rejected": -0.8825951814651489, "logps/chosen": -1.5054579973220825, "logps/rejected": -1.9978173971176147, "loss": 1.5798, "nll_loss": 1.5352727174758911, "rewards/accuracies": 1.0, "rewards/chosen": -0.15054580569267273, "rewards/margins": 0.049235932528972626, "rewards/rejected": -0.19978173077106476, "step": 421 }, { "epoch": 0.665090622537431, "grad_norm": 0.19437773525714874, "learning_rate": 3.3817846931367452e-06, "log_odds_chosen": 0.3979894518852234, "log_odds_ratio": -0.5184385180473328, "logits/chosen": -0.19602851569652557, "logits/rejected": -0.9046583771705627, "logps/chosen": -1.492729902267456, "logps/rejected": -1.8125101327896118, "loss": 1.5839, "nll_loss": 1.5320786237716675, "rewards/accuracies": 1.0, "rewards/chosen": -0.14927297830581665, "rewards/margins": 0.031978022307157516, "rewards/rejected": -0.18125101923942566, "step": 422 }, { "epoch": 0.6666666666666666, "grad_norm": 0.18563054502010345, "learning_rate": 3.377805308269844e-06, "log_odds_chosen": 0.6691169738769531, "log_odds_ratio": -0.4283203184604645, "logits/chosen": -0.06355718523263931, "logits/rejected": -1.2140281200408936, "logps/chosen": -1.549073338508606, "logps/rejected": -2.1106374263763428, "loss": 1.6271, "nll_loss": 1.5843141078948975, "rewards/accuracies": 1.0, "rewards/chosen": -0.15490733087062836, "rewards/margins": 0.05615641176700592, "rewards/rejected": -0.21106374263763428, "step": 423 }, { "epoch": 0.6682427107959023, "grad_norm": 0.1859610229730606, "learning_rate": 3.3738155147419275e-06, "log_odds_chosen": 0.6023603677749634, "log_odds_ratio": -0.44674554467201233, "logits/chosen": -0.18490374088287354, "logits/rejected": -0.9434022307395935, "logps/chosen": -1.5427931547164917, "logps/rejected": -2.0453150272369385, "loss": 1.6059, "nll_loss": 1.5612272024154663, "rewards/accuracies": 1.0, "rewards/chosen": -0.15427932143211365, "rewards/margins": 0.05025219917297363, "rewards/rejected": -0.20453152060508728, "step": 424 }, { "epoch": 0.6698187549251379, "grad_norm": 0.18988454341888428, "learning_rate": 3.3698153426939824e-06, "log_odds_chosen": 0.6990432739257812, "log_odds_ratio": -0.41350850462913513, "logits/chosen": -0.17698457837104797, "logits/rejected": -0.7861905694007874, "logps/chosen": -1.4711315631866455, "logps/rejected": -2.045083999633789, "loss": 1.5487, "nll_loss": 1.5073806047439575, "rewards/accuracies": 1.0, "rewards/chosen": -0.1471131592988968, "rewards/margins": 0.05739524960517883, "rewards/rejected": -0.20450839400291443, "step": 425 }, { "epoch": 0.6713947990543735, "grad_norm": 0.19885441660881042, "learning_rate": 3.3658048223453954e-06, "log_odds_chosen": 0.6381431221961975, "log_odds_ratio": -0.4310988783836365, "logits/chosen": -0.14545650780200958, "logits/rejected": -1.0610605478286743, "logps/chosen": -1.6127598285675049, "logps/rejected": -2.1517128944396973, "loss": 1.661, "nll_loss": 1.6179263591766357, "rewards/accuracies": 1.0, "rewards/chosen": -0.1612759828567505, "rewards/margins": 0.05389528349041939, "rewards/rejected": -0.21517126262187958, "step": 426 }, { "epoch": 0.6729708431836091, "grad_norm": 0.19337302446365356, "learning_rate": 3.3617839839937337e-06, "log_odds_chosen": 0.6319109201431274, "log_odds_ratio": -0.4360054135322571, "logits/chosen": -0.08238127827644348, "logits/rejected": -1.1307555437088013, "logps/chosen": -1.5771963596343994, "logps/rejected": -2.1106276512145996, "loss": 1.6469, "nll_loss": 1.6032705307006836, "rewards/accuracies": 1.0, "rewards/chosen": -0.15771962702274323, "rewards/margins": 0.05334313213825226, "rewards/rejected": -0.21106277406215668, "step": 427 }, { "epoch": 0.6745468873128447, "grad_norm": 0.19824667274951935, "learning_rate": 3.3577528580145107e-06, "log_odds_chosen": 0.3213936984539032, "log_odds_ratio": -0.548486053943634, "logits/chosen": -0.09538719803094864, "logits/rejected": -1.006461501121521, "logps/chosen": -1.5969069004058838, "logps/rejected": -1.8635402917861938, "loss": 1.6813, "nll_loss": 1.6264036893844604, "rewards/accuracies": 0.875, "rewards/chosen": -0.15969069302082062, "rewards/margins": 0.026663340628147125, "rewards/rejected": -0.18635404109954834, "step": 428 }, { "epoch": 0.6761229314420804, "grad_norm": 0.1969161182641983, "learning_rate": 3.353711474860956e-06, "log_odds_chosen": 0.5900525450706482, "log_odds_ratio": -0.45637646317481995, "logits/chosen": -0.154756560921669, "logits/rejected": -0.9149386882781982, "logps/chosen": -1.5449559688568115, "logps/rejected": -2.042635202407837, "loss": 1.6083, "nll_loss": 1.5626716613769531, "rewards/accuracies": 1.0, "rewards/chosen": -0.15449561178684235, "rewards/margins": 0.049767911434173584, "rewards/rejected": -0.20426350831985474, "step": 429 }, { "epoch": 0.677698975571316, "grad_norm": 0.1858314871788025, "learning_rate": 3.3496598650637916e-06, "log_odds_chosen": 0.555091917514801, "log_odds_ratio": -0.4606228470802307, "logits/chosen": -0.17273341119289398, "logits/rejected": -0.8560744524002075, "logps/chosen": -1.4441226720809937, "logps/rejected": -1.892524242401123, "loss": 1.5294, "nll_loss": 1.4833111763000488, "rewards/accuracies": 1.0, "rewards/chosen": -0.14441227912902832, "rewards/margins": 0.04484015703201294, "rewards/rejected": -0.18925242125988007, "step": 430 }, { "epoch": 0.6792750197005516, "grad_norm": 0.18842041492462158, "learning_rate": 3.3455980592309923e-06, "log_odds_chosen": 0.6005351543426514, "log_odds_ratio": -0.44451576471328735, "logits/chosen": -0.2088775932788849, "logits/rejected": -0.9120292663574219, "logps/chosen": -1.4238063097000122, "logps/rejected": -1.9125840663909912, "loss": 1.5182, "nll_loss": 1.4737814664840698, "rewards/accuracies": 1.0, "rewards/chosen": -0.14238062500953674, "rewards/margins": 0.0488777831196785, "rewards/rejected": -0.19125841557979584, "step": 431 }, { "epoch": 0.6808510638297872, "grad_norm": 0.19271476566791534, "learning_rate": 3.341526088047562e-06, "log_odds_chosen": 0.5448278188705444, "log_odds_ratio": -0.46626630425453186, "logits/chosen": -0.15303638577461243, "logits/rejected": -1.0090641975402832, "logps/chosen": -1.6087162494659424, "logps/rejected": -2.0677130222320557, "loss": 1.6696, "nll_loss": 1.622945785522461, "rewards/accuracies": 1.0, "rewards/chosen": -0.16087162494659424, "rewards/margins": 0.04589966684579849, "rewards/rejected": -0.20677129924297333, "step": 432 }, { "epoch": 0.6824271079590228, "grad_norm": 0.21036098897457123, "learning_rate": 3.3374439822752972e-06, "log_odds_chosen": 0.419351726770401, "log_odds_ratio": -0.5088696479797363, "logits/chosen": -0.10887278616428375, "logits/rejected": -0.8606767654418945, "logps/chosen": -1.572546362876892, "logps/rejected": -1.9210548400878906, "loss": 1.6632, "nll_loss": 1.612297534942627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1572546511888504, "rewards/margins": 0.03485086187720299, "rewards/rejected": -0.1921055018901825, "step": 433 }, { "epoch": 0.6840031520882585, "grad_norm": 0.20627471804618835, "learning_rate": 3.333351772752559e-06, "log_odds_chosen": 0.566561222076416, "log_odds_ratio": -0.46442872285842896, "logits/chosen": -0.21569103002548218, "logits/rejected": -0.9650353789329529, "logps/chosen": -1.639683723449707, "logps/rejected": -2.114182949066162, "loss": 1.7037, "nll_loss": 1.6572954654693604, "rewards/accuracies": 1.0, "rewards/chosen": -0.16396838426589966, "rewards/margins": 0.04744991660118103, "rewards/rejected": -0.2114183008670807, "step": 434 }, { "epoch": 0.6855791962174941, "grad_norm": 0.20955084264278412, "learning_rate": 3.3292494903940338e-06, "log_odds_chosen": 0.6968019604682922, "log_odds_ratio": -0.41255873441696167, "logits/chosen": -0.1339864879846573, "logits/rejected": -0.9979945421218872, "logps/chosen": -1.54659104347229, "logps/rejected": -2.1267032623291016, "loss": 1.6187, "nll_loss": 1.5774283409118652, "rewards/accuracies": 1.0, "rewards/chosen": -0.15465912222862244, "rewards/margins": 0.05801120400428772, "rewards/rejected": -0.21267032623291016, "step": 435 }, { "epoch": 0.6871552403467297, "grad_norm": 0.18498389422893524, "learning_rate": 3.3251371661905063e-06, "log_odds_chosen": 0.5884695053100586, "log_odds_ratio": -0.45282870531082153, "logits/chosen": -0.174495130777359, "logits/rejected": -0.8306106925010681, "logps/chosen": -1.3706011772155762, "logps/rejected": -1.8502486944198608, "loss": 1.4589, "nll_loss": 1.413588523864746, "rewards/accuracies": 1.0, "rewards/chosen": -0.13706013560295105, "rewards/margins": 0.047964759171009064, "rewards/rejected": -0.18502488732337952, "step": 436 }, { "epoch": 0.6887312844759653, "grad_norm": 0.1930173933506012, "learning_rate": 3.321014831208622e-06, "log_odds_chosen": 0.5815909504890442, "log_odds_ratio": -0.44832438230514526, "logits/chosen": -0.16663047671318054, "logits/rejected": -0.8158444762229919, "logps/chosen": -1.4361575841903687, "logps/rejected": -1.9045019149780273, "loss": 1.532, "nll_loss": 1.487160325050354, "rewards/accuracies": 1.0, "rewards/chosen": -0.14361576735973358, "rewards/margins": 0.046834442764520645, "rewards/rejected": -0.19045020639896393, "step": 437 }, { "epoch": 0.6903073286052009, "grad_norm": 0.20272772014141083, "learning_rate": 3.316882516590652e-06, "log_odds_chosen": 0.6009421944618225, "log_odds_ratio": -0.43921971321105957, "logits/chosen": -0.16785867512226105, "logits/rejected": -0.9978471398353577, "logps/chosen": -1.4756765365600586, "logps/rejected": -1.96806001663208, "loss": 1.5594, "nll_loss": 1.5154640674591064, "rewards/accuracies": 1.0, "rewards/chosen": -0.14756765961647034, "rewards/margins": 0.04923836141824722, "rewards/rejected": -0.19680601358413696, "step": 438 }, { "epoch": 0.6918833727344366, "grad_norm": 0.21401967108249664, "learning_rate": 3.31274025355426e-06, "log_odds_chosen": 0.5665644407272339, "log_odds_ratio": -0.45475664734840393, "logits/chosen": -0.20744185149669647, "logits/rejected": -0.8934562802314758, "logps/chosen": -1.5667637586593628, "logps/rejected": -2.0387802124023438, "loss": 1.6208, "nll_loss": 1.575301170349121, "rewards/accuracies": 1.0, "rewards/chosen": -0.15667636692523956, "rewards/margins": 0.04720166325569153, "rewards/rejected": -0.2038780301809311, "step": 439 }, { "epoch": 0.6934594168636722, "grad_norm": 0.20656292140483856, "learning_rate": 3.308588073392265e-06, "log_odds_chosen": 0.5346047878265381, "log_odds_ratio": -0.4697275161743164, "logits/chosen": -0.19090476632118225, "logits/rejected": -0.910874605178833, "logps/chosen": -1.603625774383545, "logps/rejected": -2.054203748703003, "loss": 1.6598, "nll_loss": 1.612839937210083, "rewards/accuracies": 1.0, "rewards/chosen": -0.16036257147789001, "rewards/margins": 0.04505779966711998, "rewards/rejected": -0.2054203748703003, "step": 440 }, { "epoch": 0.6950354609929078, "grad_norm": 0.18964466452598572, "learning_rate": 3.3044260074724035e-06, "log_odds_chosen": 0.6167492866516113, "log_odds_ratio": -0.4436300992965698, "logits/chosen": -0.19338998198509216, "logits/rejected": -1.0959054231643677, "logps/chosen": -1.5115933418273926, "logps/rejected": -2.0191221237182617, "loss": 1.5891, "nll_loss": 1.544729471206665, "rewards/accuracies": 1.0, "rewards/chosen": -0.1511593461036682, "rewards/margins": 0.050752852112054825, "rewards/rejected": -0.20191219449043274, "step": 441 }, { "epoch": 0.6966115051221434, "grad_norm": 0.19798265397548676, "learning_rate": 3.300254087237097e-06, "log_odds_chosen": 0.536584198474884, "log_odds_ratio": -0.4647059440612793, "logits/chosen": -0.16768081486225128, "logits/rejected": -0.9846087098121643, "logps/chosen": -1.4075177907943726, "logps/rejected": -1.8397033214569092, "loss": 1.5153, "nll_loss": 1.4688141345977783, "rewards/accuracies": 1.0, "rewards/chosen": -0.14075177907943726, "rewards/margins": 0.043218567967414856, "rewards/rejected": -0.1839703470468521, "step": 442 }, { "epoch": 0.698187549251379, "grad_norm": 0.19152890145778656, "learning_rate": 3.2960723442032105e-06, "log_odds_chosen": 0.6987941265106201, "log_odds_ratio": -0.4106602370738983, "logits/chosen": -0.2129349410533905, "logits/rejected": -1.2987092733383179, "logps/chosen": -1.5995932817459106, "logps/rejected": -2.192657947540283, "loss": 1.6557, "nll_loss": 1.614605188369751, "rewards/accuracies": 1.0, "rewards/chosen": -0.1599593311548233, "rewards/margins": 0.059306442737579346, "rewards/rejected": -0.21926578879356384, "step": 443 }, { "epoch": 0.6997635933806147, "grad_norm": 0.1959015130996704, "learning_rate": 3.291880809961814e-06, "log_odds_chosen": 0.6200303435325623, "log_odds_ratio": -0.44293415546417236, "logits/chosen": -0.13872218132019043, "logits/rejected": -0.9167954921722412, "logps/chosen": -1.5655865669250488, "logps/rejected": -2.0818004608154297, "loss": 1.6387, "nll_loss": 1.594357967376709, "rewards/accuracies": 1.0, "rewards/chosen": -0.15655863285064697, "rewards/margins": 0.05162140727043152, "rewards/rejected": -0.20818006992340088, "step": 444 }, { "epoch": 0.7013396375098503, "grad_norm": 0.20035208761692047, "learning_rate": 3.2876795161779473e-06, "log_odds_chosen": 0.7111386060714722, "log_odds_ratio": -0.40192127227783203, "logits/chosen": -0.16324040293693542, "logits/rejected": -1.2092691659927368, "logps/chosen": -1.576241374015808, "logps/rejected": -2.177393674850464, "loss": 1.627, "nll_loss": 1.586832880973816, "rewards/accuracies": 1.0, "rewards/chosen": -0.15762415528297424, "rewards/margins": 0.060115229338407516, "rewards/rejected": -0.21773935854434967, "step": 445 }, { "epoch": 0.7029156816390859, "grad_norm": 0.19885501265525818, "learning_rate": 3.2834684945903776e-06, "log_odds_chosen": 0.5507287979125977, "log_odds_ratio": -0.4596758782863617, "logits/chosen": -0.2190311998128891, "logits/rejected": -0.9251666069030762, "logps/chosen": -1.5019633769989014, "logps/rejected": -1.9505462646484375, "loss": 1.5836, "nll_loss": 1.5376709699630737, "rewards/accuracies": 1.0, "rewards/chosen": -0.15019632875919342, "rewards/margins": 0.044858310371637344, "rewards/rejected": -0.19505466520786285, "step": 446 }, { "epoch": 0.7044917257683215, "grad_norm": 0.2020716369152069, "learning_rate": 3.2792477770113624e-06, "log_odds_chosen": 0.4936216175556183, "log_odds_ratio": -0.4818211495876312, "logits/chosen": -0.3026110827922821, "logits/rejected": -1.0682443380355835, "logps/chosen": -1.5848886966705322, "logps/rejected": -1.9951268434524536, "loss": 1.6426, "nll_loss": 1.5943942070007324, "rewards/accuracies": 1.0, "rewards/chosen": -0.15848886966705322, "rewards/margins": 0.041023820638656616, "rewards/rejected": -0.19951270520687103, "step": 447 }, { "epoch": 0.7060677698975572, "grad_norm": 0.18542690575122833, "learning_rate": 3.275017395326407e-06, "log_odds_chosen": 0.48193085193634033, "log_odds_ratio": -0.4900546371936798, "logits/chosen": -0.1320614218711853, "logits/rejected": -0.8647894263267517, "logps/chosen": -1.4781405925750732, "logps/rejected": -1.8653026819229126, "loss": 1.5636, "nll_loss": 1.5146205425262451, "rewards/accuracies": 0.875, "rewards/chosen": -0.14781403541564941, "rewards/margins": 0.03871622309088707, "rewards/rejected": -0.18653027713298798, "step": 448 }, { "epoch": 0.7076438140267928, "grad_norm": 0.1981741189956665, "learning_rate": 3.2707773814940244e-06, "log_odds_chosen": 0.6399044990539551, "log_odds_ratio": -0.43002036213874817, "logits/chosen": -0.1578344702720642, "logits/rejected": -1.0271568298339844, "logps/chosen": -1.509232997894287, "logps/rejected": -2.0302257537841797, "loss": 1.5915, "nll_loss": 1.5485467910766602, "rewards/accuracies": 1.0, "rewards/chosen": -0.15092331171035767, "rewards/margins": 0.05209926515817642, "rewards/rejected": -0.2030225694179535, "step": 449 }, { "epoch": 0.7092198581560284, "grad_norm": 0.2161954790353775, "learning_rate": 3.2665277675454935e-06, "log_odds_chosen": 0.6585409641265869, "log_odds_ratio": -0.43546614050865173, "logits/chosen": -0.2146613448858261, "logits/rejected": -1.2824149131774902, "logps/chosen": -1.591871976852417, "logps/rejected": -2.1544575691223145, "loss": 1.6446, "nll_loss": 1.60104501247406, "rewards/accuracies": 1.0, "rewards/chosen": -0.1591871976852417, "rewards/margins": 0.05625856667757034, "rewards/rejected": -0.21544577181339264, "step": 450 }, { "epoch": 0.710795902285264, "grad_norm": 0.19009153544902802, "learning_rate": 3.262268585584619e-06, "log_odds_chosen": 0.5836161971092224, "log_odds_ratio": -0.44612884521484375, "logits/chosen": -0.1468001902103424, "logits/rejected": -0.9669194221496582, "logps/chosen": -1.5377750396728516, "logps/rejected": -2.0233418941497803, "loss": 1.607, "nll_loss": 1.562421441078186, "rewards/accuracies": 1.0, "rewards/chosen": -0.15377750992774963, "rewards/margins": 0.048556677997112274, "rewards/rejected": -0.2023341804742813, "step": 451 }, { "epoch": 0.7123719464144996, "grad_norm": 0.2047768086194992, "learning_rate": 3.2579998677874853e-06, "log_odds_chosen": 0.6704806685447693, "log_odds_ratio": -0.42241793870925903, "logits/chosen": -0.22139252722263336, "logits/rejected": -0.9740051627159119, "logps/chosen": -1.572437047958374, "logps/rejected": -2.138031244277954, "loss": 1.6272, "nll_loss": 1.5849249362945557, "rewards/accuracies": 1.0, "rewards/chosen": -0.15724369883537292, "rewards/margins": 0.05655941367149353, "rewards/rejected": -0.21380311250686646, "step": 452 }, { "epoch": 0.7139479905437353, "grad_norm": 0.1976122111082077, "learning_rate": 3.2537216464022155e-06, "log_odds_chosen": 0.6525107026100159, "log_odds_ratio": -0.42794734239578247, "logits/chosen": -0.21664118766784668, "logits/rejected": -0.944098174571991, "logps/chosen": -1.5721834897994995, "logps/rejected": -2.1166865825653076, "loss": 1.638, "nll_loss": 1.5951955318450928, "rewards/accuracies": 1.0, "rewards/chosen": -0.15721836686134338, "rewards/margins": 0.054450295865535736, "rewards/rejected": -0.21166865527629852, "step": 453 }, { "epoch": 0.7155240346729709, "grad_norm": 0.19175730645656586, "learning_rate": 3.2494339537487314e-06, "log_odds_chosen": 0.5195281505584717, "log_odds_ratio": -0.46826571226119995, "logits/chosen": -0.21530620753765106, "logits/rejected": -1.1608517169952393, "logps/chosen": -1.5761486291885376, "logps/rejected": -2.0063657760620117, "loss": 1.6517, "nll_loss": 1.6048378944396973, "rewards/accuracies": 1.0, "rewards/chosen": -0.15761485695838928, "rewards/margins": 0.043021731078624725, "rewards/rejected": -0.2006365805864334, "step": 454 }, { "epoch": 0.7171000788022065, "grad_norm": 0.1911652535200119, "learning_rate": 3.2451368222185006e-06, "log_odds_chosen": 0.46220406889915466, "log_odds_ratio": -0.4957023561000824, "logits/chosen": -0.22534283995628357, "logits/rejected": -0.9358788132667542, "logps/chosen": -1.5175461769104004, "logps/rejected": -1.8928101062774658, "loss": 1.6, "nll_loss": 1.550462007522583, "rewards/accuracies": 1.0, "rewards/chosen": -0.15175461769104004, "rewards/margins": 0.03752640262246132, "rewards/rejected": -0.18928101658821106, "step": 455 }, { "epoch": 0.7186761229314421, "grad_norm": 0.17351412773132324, "learning_rate": 3.2408302842743007e-06, "log_odds_chosen": 0.6546438932418823, "log_odds_ratio": -0.42687559127807617, "logits/chosen": -0.19083309173583984, "logits/rejected": -0.9766150116920471, "logps/chosen": -1.4680445194244385, "logps/rejected": -2.0059475898742676, "loss": 1.5574, "nll_loss": 1.5147082805633545, "rewards/accuracies": 1.0, "rewards/chosen": -0.14680443704128265, "rewards/margins": 0.05379030108451843, "rewards/rejected": -0.2005947381258011, "step": 456 }, { "epoch": 0.7202521670606777, "grad_norm": 0.19053436815738678, "learning_rate": 3.2365143724499684e-06, "log_odds_chosen": 0.5713084936141968, "log_odds_ratio": -0.4496612548828125, "logits/chosen": -0.19255973398685455, "logits/rejected": -1.0097147226333618, "logps/chosen": -1.5191394090652466, "logps/rejected": -1.9902849197387695, "loss": 1.6076, "nll_loss": 1.5626832246780396, "rewards/accuracies": 1.0, "rewards/chosen": -0.15191395580768585, "rewards/margins": 0.047114528715610504, "rewards/rejected": -0.19902849197387695, "step": 457 }, { "epoch": 0.7218282111899134, "grad_norm": 0.19614577293395996, "learning_rate": 3.2321891193501564e-06, "log_odds_chosen": 0.5764437913894653, "log_odds_ratio": -0.45762819051742554, "logits/chosen": -0.17019122838974, "logits/rejected": -0.6311085224151611, "logps/chosen": -1.525330901145935, "logps/rejected": -2.001309633255005, "loss": 1.5922, "nll_loss": 1.5463932752609253, "rewards/accuracies": 1.0, "rewards/chosen": -0.15253308415412903, "rewards/margins": 0.04759787768125534, "rewards/rejected": -0.20013096928596497, "step": 458 }, { "epoch": 0.723404255319149, "grad_norm": 0.18243008852005005, "learning_rate": 3.2278545576500858e-06, "log_odds_chosen": 0.8040944337844849, "log_odds_ratio": -0.3783116936683655, "logits/chosen": -0.10738835483789444, "logits/rejected": -0.7439028024673462, "logps/chosen": -1.3702032566070557, "logps/rejected": -2.027367115020752, "loss": 1.4567, "nll_loss": 1.4188926219940186, "rewards/accuracies": 1.0, "rewards/chosen": -0.1370203197002411, "rewards/margins": 0.06571639329195023, "rewards/rejected": -0.2027367204427719, "step": 459 }, { "epoch": 0.7249802994483846, "grad_norm": 0.19899946451187134, "learning_rate": 3.223510720095299e-06, "log_odds_chosen": 0.7631068825721741, "log_odds_ratio": -0.39056843519210815, "logits/chosen": -0.26606088876724243, "logits/rejected": -1.066392421722412, "logps/chosen": -1.5545616149902344, "logps/rejected": -2.1977691650390625, "loss": 1.6112, "nll_loss": 1.5720990896224976, "rewards/accuracies": 1.0, "rewards/chosen": -0.15545617043972015, "rewards/margins": 0.06432076543569565, "rewards/rejected": -0.2197769284248352, "step": 460 }, { "epoch": 0.7265563435776202, "grad_norm": 0.18922847509384155, "learning_rate": 3.2191576395014158e-06, "log_odds_chosen": 0.70744788646698, "log_odds_ratio": -0.4035731256008148, "logits/chosen": -0.21413788199424744, "logits/rejected": -0.9344199299812317, "logps/chosen": -1.4856576919555664, "logps/rejected": -2.070754051208496, "loss": 1.5558, "nll_loss": 1.5154281854629517, "rewards/accuracies": 1.0, "rewards/chosen": -0.14856578409671783, "rewards/margins": 0.058509618043899536, "rewards/rejected": -0.20707541704177856, "step": 461 }, { "epoch": 0.7281323877068558, "grad_norm": 0.1896039843559265, "learning_rate": 3.2147953487538794e-06, "log_odds_chosen": 0.6818787455558777, "log_odds_ratio": -0.4148927927017212, "logits/chosen": -0.15922857820987701, "logits/rejected": -1.004948377609253, "logps/chosen": -1.4554266929626465, "logps/rejected": -2.015336275100708, "loss": 1.5271, "nll_loss": 1.4856001138687134, "rewards/accuracies": 1.0, "rewards/chosen": -0.1455426663160324, "rewards/margins": 0.05599096417427063, "rewards/rejected": -0.20153363049030304, "step": 462 }, { "epoch": 0.7297084318360915, "grad_norm": 0.19634757936000824, "learning_rate": 3.2104238808077133e-06, "log_odds_chosen": 0.5675202012062073, "log_odds_ratio": -0.4579786956310272, "logits/chosen": -0.16884222626686096, "logits/rejected": -0.785213828086853, "logps/chosen": -1.490207314491272, "logps/rejected": -1.9569623470306396, "loss": 1.5656, "nll_loss": 1.5198465585708618, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490207314491272, "rewards/margins": 0.04667549580335617, "rewards/rejected": -0.19569621980190277, "step": 463 }, { "epoch": 0.731284475965327, "grad_norm": 0.18298950791358948, "learning_rate": 3.2060432686872704e-06, "log_odds_chosen": 0.817018449306488, "log_odds_ratio": -0.37420207262039185, "logits/chosen": -0.25051018595695496, "logits/rejected": -0.8697549104690552, "logps/chosen": -1.399641513824463, "logps/rejected": -2.06581449508667, "loss": 1.4859, "nll_loss": 1.4484362602233887, "rewards/accuracies": 1.0, "rewards/chosen": -0.13996414840221405, "rewards/margins": 0.06661731749773026, "rewards/rejected": -0.2065814733505249, "step": 464 }, { "epoch": 0.7328605200945626, "grad_norm": 0.18866463005542755, "learning_rate": 3.201653545485982e-06, "log_odds_chosen": 0.6448830962181091, "log_odds_ratio": -0.42958295345306396, "logits/chosen": -0.1599559634923935, "logits/rejected": -0.9195252060890198, "logps/chosen": -1.5451632738113403, "logps/rejected": -2.085179090499878, "loss": 1.6017, "nll_loss": 1.5587797164916992, "rewards/accuracies": 1.0, "rewards/chosen": -0.1545163244009018, "rewards/margins": 0.0540015734732151, "rewards/rejected": -0.2085179090499878, "step": 465 }, { "epoch": 0.7344365642237982, "grad_norm": 0.20371563732624054, "learning_rate": 3.197254744366111e-06, "log_odds_chosen": 0.6497169733047485, "log_odds_ratio": -0.42687100172042847, "logits/chosen": -0.1431826502084732, "logits/rejected": -0.902421236038208, "logps/chosen": -1.4522693157196045, "logps/rejected": -1.9825525283813477, "loss": 1.5297, "nll_loss": 1.4870625734329224, "rewards/accuracies": 1.0, "rewards/chosen": -0.14522692561149597, "rewards/margins": 0.05302830785512924, "rewards/rejected": -0.1982552409172058, "step": 466 }, { "epoch": 0.7360126083530338, "grad_norm": 0.1974330097436905, "learning_rate": 3.192846898558498e-06, "log_odds_chosen": 0.5328829288482666, "log_odds_ratio": -0.46320897340774536, "logits/chosen": -0.1656794548034668, "logits/rejected": -0.9588291645050049, "logps/chosen": -1.5812673568725586, "logps/rejected": -2.025834321975708, "loss": 1.6544, "nll_loss": 1.608088731765747, "rewards/accuracies": 1.0, "rewards/chosen": -0.15812674164772034, "rewards/margins": 0.04445669800043106, "rewards/rejected": -0.2025834321975708, "step": 467 }, { "epoch": 0.7375886524822695, "grad_norm": 0.19795842468738556, "learning_rate": 3.188430041362313e-06, "log_odds_chosen": 0.47163689136505127, "log_odds_ratio": -0.49232786893844604, "logits/chosen": -0.12224957346916199, "logits/rejected": -0.909099280834198, "logps/chosen": -1.5807934999465942, "logps/rejected": -1.9740912914276123, "loss": 1.6398, "nll_loss": 1.5905786752700806, "rewards/accuracies": 1.0, "rewards/chosen": -0.1580793559551239, "rewards/margins": 0.03932976722717285, "rewards/rejected": -0.19740912318229675, "step": 468 }, { "epoch": 0.7391646966115051, "grad_norm": 0.19603583216667175, "learning_rate": 3.184004206144803e-06, "log_odds_chosen": 0.7065101861953735, "log_odds_ratio": -0.41205504536628723, "logits/chosen": -0.2486933022737503, "logits/rejected": -0.9299865961074829, "logps/chosen": -1.4757022857666016, "logps/rejected": -2.054920196533203, "loss": 1.5464, "nll_loss": 1.5052430629730225, "rewards/accuracies": 1.0, "rewards/chosen": -0.14757022261619568, "rewards/margins": 0.05792178213596344, "rewards/rejected": -0.20549200475215912, "step": 469 }, { "epoch": 0.7407407407407407, "grad_norm": 0.19463147222995758, "learning_rate": 3.1795694263410386e-06, "log_odds_chosen": 0.7694414258003235, "log_odds_ratio": -0.39305710792541504, "logits/chosen": -0.21214531362056732, "logits/rejected": -0.9676571488380432, "logps/chosen": -1.4680100679397583, "logps/rejected": -2.1116573810577393, "loss": 1.5324, "nll_loss": 1.4930670261383057, "rewards/accuracies": 1.0, "rewards/chosen": -0.14680100977420807, "rewards/margins": 0.0643647089600563, "rewards/rejected": -0.21116572618484497, "step": 470 }, { "epoch": 0.7423167848699763, "grad_norm": 0.18510647118091583, "learning_rate": 3.1751257354536634e-06, "log_odds_chosen": 0.5473852753639221, "log_odds_ratio": -0.469069242477417, "logits/chosen": -0.16158902645111084, "logits/rejected": -1.0543768405914307, "logps/chosen": -1.458742618560791, "logps/rejected": -1.9098637104034424, "loss": 1.5407, "nll_loss": 1.4938390254974365, "rewards/accuracies": 0.875, "rewards/chosen": -0.1458742618560791, "rewards/margins": 0.04511209577322006, "rewards/rejected": -0.19098637998104095, "step": 471 }, { "epoch": 0.7438928289992119, "grad_norm": 0.19235409796237946, "learning_rate": 3.1706731670526394e-06, "log_odds_chosen": 0.608568012714386, "log_odds_ratio": -0.43675366044044495, "logits/chosen": -0.26602888107299805, "logits/rejected": -0.9281780123710632, "logps/chosen": -1.5125672817230225, "logps/rejected": -2.013179063796997, "loss": 1.5683, "nll_loss": 1.5246038436889648, "rewards/accuracies": 1.0, "rewards/chosen": -0.15125672519207, "rewards/margins": 0.0500611811876297, "rewards/rejected": -0.2013178914785385, "step": 472 }, { "epoch": 0.7454688731284476, "grad_norm": 0.19334271550178528, "learning_rate": 3.166211754774994e-06, "log_odds_chosen": 0.6511934995651245, "log_odds_ratio": -0.42712464928627014, "logits/chosen": -0.20964574813842773, "logits/rejected": -1.049457311630249, "logps/chosen": -1.5654706954956055, "logps/rejected": -2.1108522415161133, "loss": 1.6535, "nll_loss": 1.6107853651046753, "rewards/accuracies": 1.0, "rewards/chosen": -0.15654708445072174, "rewards/margins": 0.05453815311193466, "rewards/rejected": -0.2110852152109146, "step": 473 }, { "epoch": 0.7470449172576832, "grad_norm": 0.21741212904453278, "learning_rate": 3.1617415323245665e-06, "log_odds_chosen": 0.6575350761413574, "log_odds_ratio": -0.4272666275501251, "logits/chosen": -0.24989920854568481, "logits/rejected": -1.0688942670822144, "logps/chosen": -1.4732602834701538, "logps/rejected": -2.006078004837036, "loss": 1.5346, "nll_loss": 1.491843581199646, "rewards/accuracies": 1.0, "rewards/chosen": -0.1473260223865509, "rewards/margins": 0.05328178033232689, "rewards/rejected": -0.2006077915430069, "step": 474 }, { "epoch": 0.7486209613869188, "grad_norm": 0.18145692348480225, "learning_rate": 3.157262533471752e-06, "log_odds_chosen": 0.6887553930282593, "log_odds_ratio": -0.4109429717063904, "logits/chosen": -0.1704792082309723, "logits/rejected": -1.0367528200149536, "logps/chosen": -1.5262348651885986, "logps/rejected": -2.1029748916625977, "loss": 1.5964, "nll_loss": 1.5552966594696045, "rewards/accuracies": 1.0, "rewards/chosen": -0.1526234894990921, "rewards/margins": 0.057673998177051544, "rewards/rejected": -0.21029751002788544, "step": 475 }, { "epoch": 0.7501970055161544, "grad_norm": 0.20390117168426514, "learning_rate": 3.1527747920532468e-06, "log_odds_chosen": 0.5650341510772705, "log_odds_ratio": -0.4515586495399475, "logits/chosen": -0.17346344888210297, "logits/rejected": -0.88625168800354, "logps/chosen": -1.4861788749694824, "logps/rejected": -1.9488956928253174, "loss": 1.5617, "nll_loss": 1.516565203666687, "rewards/accuracies": 1.0, "rewards/chosen": -0.14861789345741272, "rewards/margins": 0.0462716668844223, "rewards/rejected": -0.19488956034183502, "step": 476 }, { "epoch": 0.75177304964539, "grad_norm": 0.19172121584415436, "learning_rate": 3.148278341971795e-06, "log_odds_chosen": 0.6366340517997742, "log_odds_ratio": -0.4304071068763733, "logits/chosen": -0.2341357320547104, "logits/rejected": -0.8897933959960938, "logps/chosen": -1.5178066492080688, "logps/rejected": -2.045914888381958, "loss": 1.5838, "nll_loss": 1.5407583713531494, "rewards/accuracies": 1.0, "rewards/chosen": -0.15178067982196808, "rewards/margins": 0.052810803055763245, "rewards/rejected": -0.20459145307540894, "step": 477 }, { "epoch": 0.7533490937746257, "grad_norm": 0.20052027702331543, "learning_rate": 3.143773217195929e-06, "log_odds_chosen": 0.7204963564872742, "log_odds_ratio": -0.40443986654281616, "logits/chosen": -0.24737702310085297, "logits/rejected": -1.0543692111968994, "logps/chosen": -1.5792121887207031, "logps/rejected": -2.187511920928955, "loss": 1.6363, "nll_loss": 1.5958765745162964, "rewards/accuracies": 1.0, "rewards/chosen": -0.1579212099313736, "rewards/margins": 0.060829974710941315, "rewards/rejected": -0.2187511920928955, "step": 478 }, { "epoch": 0.7549251379038613, "grad_norm": 0.18838270008563995, "learning_rate": 3.139259451759714e-06, "log_odds_chosen": 0.5550141930580139, "log_odds_ratio": -0.4581400752067566, "logits/chosen": -0.2387973815202713, "logits/rejected": -0.9775084257125854, "logps/chosen": -1.5817269086837769, "logps/rejected": -2.044766426086426, "loss": 1.6553, "nll_loss": 1.6094508171081543, "rewards/accuracies": 1.0, "rewards/chosen": -0.15817268192768097, "rewards/margins": 0.046303942799568176, "rewards/rejected": -0.20447663962841034, "step": 479 }, { "epoch": 0.7565011820330969, "grad_norm": 0.1940333992242813, "learning_rate": 3.134737079762493e-06, "log_odds_chosen": 0.5709211230278015, "log_odds_ratio": -0.4525545537471771, "logits/chosen": -0.203268900513649, "logits/rejected": -0.9061958193778992, "logps/chosen": -1.5168800354003906, "logps/rejected": -1.9859740734100342, "loss": 1.5814, "nll_loss": 1.5361382961273193, "rewards/accuracies": 1.0, "rewards/chosen": -0.15168799459934235, "rewards/margins": 0.046909406781196594, "rewards/rejected": -0.19859741628170013, "step": 480 }, { "epoch": 0.7580772261623325, "grad_norm": 0.19957613945007324, "learning_rate": 3.130206135368626e-06, "log_odds_chosen": 0.5962250232696533, "log_odds_ratio": -0.450898677110672, "logits/chosen": -0.24314109981060028, "logits/rejected": -0.7290536165237427, "logps/chosen": -1.4346656799316406, "logps/rejected": -1.9179528951644897, "loss": 1.5249, "nll_loss": 1.4798487424850464, "rewards/accuracies": 1.0, "rewards/chosen": -0.14346656203269958, "rewards/margins": 0.048328716307878494, "rewards/rejected": -0.19179528951644897, "step": 481 }, { "epoch": 0.7596532702915682, "grad_norm": 0.267019122838974, "learning_rate": 3.1256666528072327e-06, "log_odds_chosen": 0.761562705039978, "log_odds_ratio": -0.38860708475112915, "logits/chosen": -0.23340564966201782, "logits/rejected": -0.7684799432754517, "logps/chosen": -1.4836543798446655, "logps/rejected": -2.114672899246216, "loss": 1.5491, "nll_loss": 1.5102040767669678, "rewards/accuracies": 1.0, "rewards/chosen": -0.14836543798446655, "rewards/margins": 0.06310184299945831, "rewards/rejected": -0.21146729588508606, "step": 482 }, { "epoch": 0.7612293144208038, "grad_norm": 0.1857529729604721, "learning_rate": 3.121118666371937e-06, "log_odds_chosen": 0.5648155808448792, "log_odds_ratio": -0.456367164850235, "logits/chosen": -0.1727515161037445, "logits/rejected": -1.1376999616622925, "logps/chosen": -1.545863389968872, "logps/rejected": -2.0154378414154053, "loss": 1.6028, "nll_loss": 1.557152271270752, "rewards/accuracies": 1.0, "rewards/chosen": -0.15458634495735168, "rewards/margins": 0.04695742577314377, "rewards/rejected": -0.20154377818107605, "step": 483 }, { "epoch": 0.7628053585500394, "grad_norm": 0.1998901218175888, "learning_rate": 3.1165622104206034e-06, "log_odds_chosen": 0.744139552116394, "log_odds_ratio": -0.3924621343612671, "logits/chosen": -0.3864690661430359, "logits/rejected": -0.8910558819770813, "logps/chosen": -1.4746683835983276, "logps/rejected": -2.0918891429901123, "loss": 1.5428, "nll_loss": 1.5035266876220703, "rewards/accuracies": 1.0, "rewards/chosen": -0.14746685326099396, "rewards/margins": 0.061722077429294586, "rewards/rejected": -0.20918893814086914, "step": 484 }, { "epoch": 0.764381402679275, "grad_norm": 0.1890469342470169, "learning_rate": 3.1119973193750816e-06, "log_odds_chosen": 0.6397299766540527, "log_odds_ratio": -0.4284901022911072, "logits/chosen": -0.2745180130004883, "logits/rejected": -0.9615079164505005, "logps/chosen": -1.4832903146743774, "logps/rejected": -2.0098438262939453, "loss": 1.5453, "nll_loss": 1.502469778060913, "rewards/accuracies": 1.0, "rewards/chosen": -0.14832903444766998, "rewards/margins": 0.052655380219221115, "rewards/rejected": -0.200984388589859, "step": 485 }, { "epoch": 0.7659574468085106, "grad_norm": 0.23916488885879517, "learning_rate": 3.1074240277209408e-06, "log_odds_chosen": 0.596796452999115, "log_odds_ratio": -0.4465484321117401, "logits/chosen": -0.27292871475219727, "logits/rejected": -0.8987700939178467, "logps/chosen": -1.5037661790847778, "logps/rejected": -1.9917497634887695, "loss": 1.5509, "nll_loss": 1.5062373876571655, "rewards/accuracies": 1.0, "rewards/chosen": -0.15037663280963898, "rewards/margins": 0.04879835247993469, "rewards/rejected": -0.19917498528957367, "step": 486 }, { "epoch": 0.7675334909377463, "grad_norm": 0.21090711653232574, "learning_rate": 3.102842370007217e-06, "log_odds_chosen": 0.6544410586357117, "log_odds_ratio": -0.42719337344169617, "logits/chosen": -0.16714385151863098, "logits/rejected": -0.8268420100212097, "logps/chosen": -1.5153406858444214, "logps/rejected": -2.051680088043213, "loss": 1.5812, "nll_loss": 1.538484811782837, "rewards/accuracies": 1.0, "rewards/chosen": -0.1515340805053711, "rewards/margins": 0.0536339245736599, "rewards/rejected": -0.2051679790019989, "step": 487 }, { "epoch": 0.7691095350669819, "grad_norm": 0.1895013153553009, "learning_rate": 3.0982523808461454e-06, "log_odds_chosen": 0.5242911577224731, "log_odds_ratio": -0.4711127281188965, "logits/chosen": -0.16803131997585297, "logits/rejected": -0.8569961786270142, "logps/chosen": -1.557231068611145, "logps/rejected": -1.9868173599243164, "loss": 1.6165, "nll_loss": 1.569408893585205, "rewards/accuracies": 1.0, "rewards/chosen": -0.15572310984134674, "rewards/margins": 0.04295860975980759, "rewards/rejected": -0.19868171215057373, "step": 488 }, { "epoch": 0.7706855791962175, "grad_norm": 0.1873975694179535, "learning_rate": 3.0936540949129006e-06, "log_odds_chosen": 0.6198223829269409, "log_odds_ratio": -0.44185134768486023, "logits/chosen": -0.21959857642650604, "logits/rejected": -0.8527919054031372, "logps/chosen": -1.5346941947937012, "logps/rejected": -2.0513901710510254, "loss": 1.601, "nll_loss": 1.5567827224731445, "rewards/accuracies": 1.0, "rewards/chosen": -0.15346942842006683, "rewards/margins": 0.05166961997747421, "rewards/rejected": -0.20513902604579926, "step": 489 }, { "epoch": 0.7722616233254531, "grad_norm": 0.1847822517156601, "learning_rate": 3.0890475469453378e-06, "log_odds_chosen": 0.7269415855407715, "log_odds_ratio": -0.4065399765968323, "logits/chosen": -0.2197483777999878, "logits/rejected": -0.9742456078529358, "logps/chosen": -1.4097678661346436, "logps/rejected": -2.001023054122925, "loss": 1.4795, "nll_loss": 1.4388233423233032, "rewards/accuracies": 1.0, "rewards/chosen": -0.14097680151462555, "rewards/margins": 0.05912550166249275, "rewards/rejected": -0.2001022845506668, "step": 490 }, { "epoch": 0.7738376674546887, "grad_norm": 0.18551437556743622, "learning_rate": 3.0844327717437263e-06, "log_odds_chosen": 0.6703569889068604, "log_odds_ratio": -0.42353707551956177, "logits/chosen": -0.23779956996440887, "logits/rejected": -0.9448487758636475, "logps/chosen": -1.4378920793533325, "logps/rejected": -1.9892666339874268, "loss": 1.5099, "nll_loss": 1.4675414562225342, "rewards/accuracies": 1.0, "rewards/chosen": -0.14378920197486877, "rewards/margins": 0.055137455463409424, "rewards/rejected": -0.198926642537117, "step": 491 }, { "epoch": 0.7754137115839244, "grad_norm": 0.17690856754779816, "learning_rate": 3.0798098041704892e-06, "log_odds_chosen": 0.4932956099510193, "log_odds_ratio": -0.48199495673179626, "logits/chosen": -0.18783992528915405, "logits/rejected": -0.9837126135826111, "logps/chosen": -1.36940336227417, "logps/rejected": -1.7628213167190552, "loss": 1.4616, "nll_loss": 1.4133892059326172, "rewards/accuracies": 1.0, "rewards/chosen": -0.13694033026695251, "rewards/margins": 0.039341796189546585, "rewards/rejected": -0.17628213763237, "step": 492 }, { "epoch": 0.77698975571316, "grad_norm": 0.19573038816452026, "learning_rate": 3.0751786791499368e-06, "log_odds_chosen": 0.6043769717216492, "log_odds_ratio": -0.44333675503730774, "logits/chosen": -0.18606124818325043, "logits/rejected": -0.9217195510864258, "logps/chosen": -1.558120608329773, "logps/rejected": -2.0642759799957275, "loss": 1.6268, "nll_loss": 1.5824553966522217, "rewards/accuracies": 1.0, "rewards/chosen": -0.15581203997135162, "rewards/margins": 0.050615549087524414, "rewards/rejected": -0.20642760396003723, "step": 493 }, { "epoch": 0.7785657998423956, "grad_norm": 0.18777020275592804, "learning_rate": 3.070539431668008e-06, "log_odds_chosen": 0.6096667647361755, "log_odds_ratio": -0.4396899938583374, "logits/chosen": -0.19759081304073334, "logits/rejected": -0.8988550305366516, "logps/chosen": -1.5582072734832764, "logps/rejected": -2.069912910461426, "loss": 1.6177, "nll_loss": 1.5736886262893677, "rewards/accuracies": 1.0, "rewards/chosen": -0.15582072734832764, "rewards/margins": 0.051170557737350464, "rewards/rejected": -0.2069912701845169, "step": 494 }, { "epoch": 0.7801418439716312, "grad_norm": 0.19385822117328644, "learning_rate": 3.0658920967720018e-06, "log_odds_chosen": 0.777790904045105, "log_odds_ratio": -0.3871975541114807, "logits/chosen": -0.3274940252304077, "logits/rejected": -0.924656331539154, "logps/chosen": -1.495604395866394, "logps/rejected": -2.147104263305664, "loss": 1.5648, "nll_loss": 1.5260502099990845, "rewards/accuracies": 1.0, "rewards/chosen": -0.14956045150756836, "rewards/margins": 0.06514997035264969, "rewards/rejected": -0.21471041440963745, "step": 495 }, { "epoch": 0.7817178881008668, "grad_norm": 0.19317972660064697, "learning_rate": 3.0612367095703116e-06, "log_odds_chosen": 0.7192878723144531, "log_odds_ratio": -0.4008732736110687, "logits/chosen": -0.1955651044845581, "logits/rejected": -1.0837275981903076, "logps/chosen": -1.5353162288665771, "logps/rejected": -2.139145851135254, "loss": 1.5897, "nll_loss": 1.5495661497116089, "rewards/accuracies": 1.0, "rewards/chosen": -0.15353162586688995, "rewards/margins": 0.06038297340273857, "rewards/rejected": -0.21391460299491882, "step": 496 }, { "epoch": 0.7832939322301025, "grad_norm": 0.1901102215051651, "learning_rate": 3.056573305232167e-06, "log_odds_chosen": 0.7629727721214294, "log_odds_ratio": -0.38942158222198486, "logits/chosen": -0.2240157276391983, "logits/rejected": -1.0085515975952148, "logps/chosen": -1.4874334335327148, "logps/rejected": -2.1163125038146973, "loss": 1.554, "nll_loss": 1.5150222778320312, "rewards/accuracies": 1.0, "rewards/chosen": -0.14874334633350372, "rewards/margins": 0.06288789212703705, "rewards/rejected": -0.21163123846054077, "step": 497 }, { "epoch": 0.7848699763593381, "grad_norm": 0.18616610765457153, "learning_rate": 3.051901918987359e-06, "log_odds_chosen": 0.7289243340492249, "log_odds_ratio": -0.3984372615814209, "logits/chosen": -0.3597657084465027, "logits/rejected": -1.0724691152572632, "logps/chosen": -1.43277108669281, "logps/rejected": -2.0301997661590576, "loss": 1.5008, "nll_loss": 1.4609586000442505, "rewards/accuracies": 1.0, "rewards/chosen": -0.143277108669281, "rewards/margins": 0.05974285304546356, "rewards/rejected": -0.20301997661590576, "step": 498 }, { "epoch": 0.7864460204885737, "grad_norm": 0.23125092685222626, "learning_rate": 3.047222586125979e-06, "log_odds_chosen": 0.7393041253089905, "log_odds_ratio": -0.39401498436927795, "logits/chosen": -0.16603292524814606, "logits/rejected": -0.6574579477310181, "logps/chosen": -1.4413788318634033, "logps/rejected": -2.049246072769165, "loss": 1.5197, "nll_loss": 1.4802521467208862, "rewards/accuracies": 1.0, "rewards/chosen": -0.1441378891468048, "rewards/margins": 0.06078672781586647, "rewards/rejected": -0.20492461323738098, "step": 499 }, { "epoch": 0.7880220646178093, "grad_norm": 0.18493805825710297, "learning_rate": 3.042535341998152e-06, "log_odds_chosen": 0.5539823174476624, "log_odds_ratio": -0.4595508873462677, "logits/chosen": -0.08695846050977707, "logits/rejected": -0.9634714722633362, "logps/chosen": -1.5871978998184204, "logps/rejected": -2.052105665206909, "loss": 1.6325, "nll_loss": 1.5865356922149658, "rewards/accuracies": 1.0, "rewards/chosen": -0.15871979296207428, "rewards/margins": 0.04649076238274574, "rewards/rejected": -0.20521055161952972, "step": 500 }, { "epoch": 0.789598108747045, "grad_norm": 0.19296394288539886, "learning_rate": 3.037840222013769e-06, "log_odds_chosen": 0.6352705955505371, "log_odds_ratio": -0.43386781215667725, "logits/chosen": -0.1699742078781128, "logits/rejected": -0.6674728393554688, "logps/chosen": -1.5233521461486816, "logps/rejected": -2.051621913909912, "loss": 1.5786, "nll_loss": 1.535168170928955, "rewards/accuracies": 1.0, "rewards/chosen": -0.15233521163463593, "rewards/margins": 0.05282699689269066, "rewards/rejected": -0.2051621973514557, "step": 501 }, { "epoch": 0.7911741528762806, "grad_norm": 0.20238351821899414, "learning_rate": 3.033137261642219e-06, "log_odds_chosen": 0.7758163809776306, "log_odds_ratio": -0.387259840965271, "logits/chosen": -0.20891791582107544, "logits/rejected": -0.8921989798545837, "logps/chosen": -1.4728256464004517, "logps/rejected": -2.116684913635254, "loss": 1.5287, "nll_loss": 1.4899262189865112, "rewards/accuracies": 1.0, "rewards/chosen": -0.14728258550167084, "rewards/margins": 0.06438593566417694, "rewards/rejected": -0.21166852116584778, "step": 502 }, { "epoch": 0.7927501970055162, "grad_norm": 0.17940200865268707, "learning_rate": 3.02842649641212e-06, "log_odds_chosen": 0.7171710729598999, "log_odds_ratio": -0.4066328704357147, "logits/chosen": -0.11533120274543762, "logits/rejected": -0.8427188992500305, "logps/chosen": -1.5167659521102905, "logps/rejected": -2.117778778076172, "loss": 1.5797, "nll_loss": 1.539076328277588, "rewards/accuracies": 1.0, "rewards/chosen": -0.15167661011219025, "rewards/margins": 0.06010129302740097, "rewards/rejected": -0.21177789568901062, "step": 503 }, { "epoch": 0.7943262411347518, "grad_norm": 0.18446724116802216, "learning_rate": 3.0237079619110554e-06, "log_odds_chosen": 0.7885653376579285, "log_odds_ratio": -0.38026753067970276, "logits/chosen": -0.22590716183185577, "logits/rejected": -1.1088080406188965, "logps/chosen": -1.4834203720092773, "logps/rejected": -2.1331162452697754, "loss": 1.5404, "nll_loss": 1.502375841140747, "rewards/accuracies": 1.0, "rewards/chosen": -0.14834202826023102, "rewards/margins": 0.06496960669755936, "rewards/rejected": -0.21331164240837097, "step": 504 }, { "epoch": 0.7959022852639874, "grad_norm": 0.18605737388134003, "learning_rate": 3.0189816937852976e-06, "log_odds_chosen": 0.7678451538085938, "log_odds_ratio": -0.3883986473083496, "logits/chosen": -0.31122487783432007, "logits/rejected": -1.0024007558822632, "logps/chosen": -1.4656697511672974, "logps/rejected": -2.099670648574829, "loss": 1.5128, "nll_loss": 1.4739203453063965, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465669721364975, "rewards/margins": 0.06340008974075317, "rewards/rejected": -0.20996706187725067, "step": 505 }, { "epoch": 0.797478329393223, "grad_norm": 0.18825288116931915, "learning_rate": 3.014247727739546e-06, "log_odds_chosen": 0.8952450752258301, "log_odds_ratio": -0.3497043251991272, "logits/chosen": -0.2389240562915802, "logits/rejected": -0.9752056002616882, "logps/chosen": -1.4637818336486816, "logps/rejected": -2.2115120887756348, "loss": 1.5203, "nll_loss": 1.485325574874878, "rewards/accuracies": 1.0, "rewards/chosen": -0.14637817442417145, "rewards/margins": 0.07477304339408875, "rewards/rejected": -0.2211512327194214, "step": 506 }, { "epoch": 0.7990543735224587, "grad_norm": 0.21791616082191467, "learning_rate": 3.009506099536653e-06, "log_odds_chosen": 0.6312094926834106, "log_odds_ratio": -0.4294767677783966, "logits/chosen": -0.19417151808738708, "logits/rejected": -1.0658773183822632, "logps/chosen": -1.5206272602081299, "logps/rejected": -2.0441548824310303, "loss": 1.5612, "nll_loss": 1.5182427167892456, "rewards/accuracies": 1.0, "rewards/chosen": -0.15206271409988403, "rewards/margins": 0.052352771162986755, "rewards/rejected": -0.2044154852628708, "step": 507 }, { "epoch": 0.8006304176516943, "grad_norm": 0.17880584299564362, "learning_rate": 3.0047568449973544e-06, "log_odds_chosen": 0.845009446144104, "log_odds_ratio": -0.36556166410446167, "logits/chosen": -0.27598047256469727, "logits/rejected": -0.9906629323959351, "logps/chosen": -1.3740437030792236, "logps/rejected": -2.063204050064087, "loss": 1.4355, "nll_loss": 1.3989356756210327, "rewards/accuracies": 1.0, "rewards/chosen": -0.13740436732769012, "rewards/margins": 0.06891604512929916, "rewards/rejected": -0.2063204050064087, "step": 508 }, { "epoch": 0.8022064617809299, "grad_norm": 0.19696396589279175, "learning_rate": 3e-06, "log_odds_chosen": 0.5537830591201782, "log_odds_ratio": -0.46190646290779114, "logits/chosen": -0.24417981505393982, "logits/rejected": -0.8974269032478333, "logps/chosen": -1.4498118162155151, "logps/rejected": -1.8991758823394775, "loss": 1.5135, "nll_loss": 1.4673458337783813, "rewards/accuracies": 1.0, "rewards/chosen": -0.14498119056224823, "rewards/margins": 0.044936403632164, "rewards/rejected": -0.18991759419441223, "step": 509 }, { "epoch": 0.8037825059101655, "grad_norm": 0.18351569771766663, "learning_rate": 2.9952356004802813e-06, "log_odds_chosen": 0.4943296015262604, "log_odds_ratio": -0.47951164841651917, "logits/chosen": -0.2026272565126419, "logits/rejected": -0.9693791270256042, "logps/chosen": -1.5497561693191528, "logps/rejected": -1.9583263397216797, "loss": 1.6012, "nll_loss": 1.5532541275024414, "rewards/accuracies": 1.0, "rewards/chosen": -0.15497562289237976, "rewards/margins": 0.04085702449083328, "rewards/rejected": -0.19583263993263245, "step": 510 }, { "epoch": 0.8053585500394012, "grad_norm": 0.182563915848732, "learning_rate": 2.9904636824309625e-06, "log_odds_chosen": 0.5191973447799683, "log_odds_ratio": -0.4684891700744629, "logits/chosen": -0.22202052175998688, "logits/rejected": -0.7160850763320923, "logps/chosen": -1.4603489637374878, "logps/rejected": -1.8831582069396973, "loss": 1.5243, "nll_loss": 1.477461576461792, "rewards/accuracies": 1.0, "rewards/chosen": -0.14603488147258759, "rewards/margins": 0.04228094220161438, "rewards/rejected": -0.18831583857536316, "step": 511 }, { "epoch": 0.8069345941686368, "grad_norm": 0.20777921378612518, "learning_rate": 2.985684281901603e-06, "log_odds_chosen": 0.5473999977111816, "log_odds_ratio": -0.4622959494590759, "logits/chosen": -0.1920110136270523, "logits/rejected": -0.8571043610572815, "logps/chosen": -1.5817649364471436, "logps/rejected": -2.035865306854248, "loss": 1.6274, "nll_loss": 1.5811474323272705, "rewards/accuracies": 1.0, "rewards/chosen": -0.1581764966249466, "rewards/margins": 0.04541003704071045, "rewards/rejected": -0.20358653366565704, "step": 512 }, { "epoch": 0.8085106382978723, "grad_norm": 0.2024535834789276, "learning_rate": 2.980897434998293e-06, "log_odds_chosen": 0.8001173734664917, "log_odds_ratio": -0.37829720973968506, "logits/chosen": -0.2540290057659149, "logits/rejected": -0.8866338729858398, "logps/chosen": -1.4272159337997437, "logps/rejected": -2.082188129425049, "loss": 1.4861, "nll_loss": 1.4482626914978027, "rewards/accuracies": 1.0, "rewards/chosen": -0.14272159337997437, "rewards/margins": 0.06549722701311111, "rewards/rejected": -0.20821882784366608, "step": 513 }, { "epoch": 0.8100866824271079, "grad_norm": 0.21344104409217834, "learning_rate": 2.976103177883374e-06, "log_odds_chosen": 0.5494855642318726, "log_odds_ratio": -0.4689217805862427, "logits/chosen": -0.2017340213060379, "logits/rejected": -0.7928360104560852, "logps/chosen": -1.5780706405639648, "logps/rejected": -2.041253089904785, "loss": 1.6319, "nll_loss": 1.5850133895874023, "rewards/accuracies": 1.0, "rewards/chosen": -0.15780706703662872, "rewards/margins": 0.046318259090185165, "rewards/rejected": -0.204125314950943, "step": 514 }, { "epoch": 0.8116627265563435, "grad_norm": 0.1840059459209442, "learning_rate": 2.971301546775167e-06, "log_odds_chosen": 0.7217794060707092, "log_odds_ratio": -0.3982866406440735, "logits/chosen": -0.2633856236934662, "logits/rejected": -1.0886204242706299, "logps/chosen": -1.481081485748291, "logps/rejected": -2.0756947994232178, "loss": 1.5421, "nll_loss": 1.5022910833358765, "rewards/accuracies": 1.0, "rewards/chosen": -0.14810813963413239, "rewards/margins": 0.059461336582899094, "rewards/rejected": -0.20756947994232178, "step": 515 }, { "epoch": 0.8132387706855791, "grad_norm": 0.19448216259479523, "learning_rate": 2.966492577947704e-06, "log_odds_chosen": 0.648414134979248, "log_odds_ratio": -0.42430779337882996, "logits/chosen": -0.23204441368579865, "logits/rejected": -1.0738632678985596, "logps/chosen": -1.5934054851531982, "logps/rejected": -2.139219284057617, "loss": 1.658, "nll_loss": 1.6155749559402466, "rewards/accuracies": 1.0, "rewards/chosen": -0.15934054553508759, "rewards/margins": 0.05458138883113861, "rewards/rejected": -0.2139219343662262, "step": 516 }, { "epoch": 0.8148148148148148, "grad_norm": 0.1880371868610382, "learning_rate": 2.9616763077304457e-06, "log_odds_chosen": 0.7537488341331482, "log_odds_ratio": -0.39062389731407166, "logits/chosen": -0.18156388401985168, "logits/rejected": -0.9598879814147949, "logps/chosen": -1.5222996473312378, "logps/rejected": -2.1508047580718994, "loss": 1.5898, "nll_loss": 1.5507152080535889, "rewards/accuracies": 1.0, "rewards/chosen": -0.15222994983196259, "rewards/margins": 0.06285052001476288, "rewards/rejected": -0.21508046984672546, "step": 517 }, { "epoch": 0.8163908589440504, "grad_norm": 0.1826397031545639, "learning_rate": 2.956852772508014e-06, "log_odds_chosen": 0.5726032257080078, "log_odds_ratio": -0.45972949266433716, "logits/chosen": -0.19936296343803406, "logits/rejected": -0.7047120928764343, "logps/chosen": -1.4242123365402222, "logps/rejected": -1.8930351734161377, "loss": 1.4857, "nll_loss": 1.4396919012069702, "rewards/accuracies": 1.0, "rewards/chosen": -0.14242123067378998, "rewards/margins": 0.04688228294253349, "rewards/rejected": -0.18930353224277496, "step": 518 }, { "epoch": 0.817966903073286, "grad_norm": 0.21100375056266785, "learning_rate": 2.952022008719914e-06, "log_odds_chosen": 0.4501050114631653, "log_odds_ratio": -0.4966769218444824, "logits/chosen": -0.19454988837242126, "logits/rejected": -0.7527884840965271, "logps/chosen": -1.5709049701690674, "logps/rejected": -1.9432517290115356, "loss": 1.625, "nll_loss": 1.5753822326660156, "rewards/accuracies": 1.0, "rewards/chosen": -0.15709049999713898, "rewards/margins": 0.03723466396331787, "rewards/rejected": -0.19432517886161804, "step": 519 }, { "epoch": 0.8195429472025216, "grad_norm": 0.1806715875864029, "learning_rate": 2.9471840528602573e-06, "log_odds_chosen": 0.6187878847122192, "log_odds_ratio": -0.4403775632381439, "logits/chosen": -0.3357143700122833, "logits/rejected": -0.8219318985939026, "logps/chosen": -1.5059443712234497, "logps/rejected": -2.0166878700256348, "loss": 1.5589, "nll_loss": 1.5149109363555908, "rewards/accuracies": 1.0, "rewards/chosen": -0.15059442818164825, "rewards/margins": 0.05107436329126358, "rewards/rejected": -0.20166879892349243, "step": 520 }, { "epoch": 0.8211189913317573, "grad_norm": 0.19994302093982697, "learning_rate": 2.9423389414774914e-06, "log_odds_chosen": 0.6800110340118408, "log_odds_ratio": -0.4156605005264282, "logits/chosen": -0.3176228106021881, "logits/rejected": -0.9663759469985962, "logps/chosen": -1.4739947319030762, "logps/rejected": -2.0357561111450195, "loss": 1.5316, "nll_loss": 1.4900025129318237, "rewards/accuracies": 1.0, "rewards/chosen": -0.14739948511123657, "rewards/margins": 0.05617612600326538, "rewards/rejected": -0.20357561111450195, "step": 521 }, { "epoch": 0.8226950354609929, "grad_norm": 0.18830455839633942, "learning_rate": 2.9374867111741174e-06, "log_odds_chosen": 0.6718218326568604, "log_odds_ratio": -0.4173552393913269, "logits/chosen": -0.1854146420955658, "logits/rejected": -1.0388704538345337, "logps/chosen": -1.4987577199935913, "logps/rejected": -2.0536603927612305, "loss": 1.5485, "nll_loss": 1.5067765712738037, "rewards/accuracies": 1.0, "rewards/chosen": -0.14987577497959137, "rewards/margins": 0.055490292608737946, "rewards/rejected": -0.20536606013774872, "step": 522 }, { "epoch": 0.8242710795902285, "grad_norm": 0.24118775129318237, "learning_rate": 2.9326273986064177e-06, "log_odds_chosen": 0.7184907793998718, "log_odds_ratio": -0.40271103382110596, "logits/chosen": -0.2235778570175171, "logits/rejected": -0.9474300742149353, "logps/chosen": -1.5202974081039429, "logps/rejected": -2.1176466941833496, "loss": 1.5825, "nll_loss": 1.5421916246414185, "rewards/accuracies": 1.0, "rewards/chosen": -0.15202973783016205, "rewards/margins": 0.05973491817712784, "rewards/rejected": -0.21176467835903168, "step": 523 }, { "epoch": 0.8258471237194641, "grad_norm": 0.21040187776088715, "learning_rate": 2.9277610404841787e-06, "log_odds_chosen": 0.5722682476043701, "log_odds_ratio": -0.4491087794303894, "logits/chosen": -0.2672809660434723, "logits/rejected": -0.812029242515564, "logps/chosen": -1.4958889484405518, "logps/rejected": -1.9657450914382935, "loss": 1.5532, "nll_loss": 1.5083181858062744, "rewards/accuracies": 1.0, "rewards/chosen": -0.14958889782428741, "rewards/margins": 0.04698561877012253, "rewards/rejected": -0.19657449424266815, "step": 524 }, { "epoch": 0.8274231678486997, "grad_norm": 0.19844910502433777, "learning_rate": 2.9228876735704107e-06, "log_odds_chosen": 0.5255635976791382, "log_odds_ratio": -0.46768850088119507, "logits/chosen": -0.25896498560905457, "logits/rejected": -0.9287459850311279, "logps/chosen": -1.3910770416259766, "logps/rejected": -1.8074634075164795, "loss": 1.46, "nll_loss": 1.413203239440918, "rewards/accuracies": 1.0, "rewards/chosen": -0.13910770416259766, "rewards/margins": 0.041638631373643875, "rewards/rejected": -0.18074636161327362, "step": 525 }, { "epoch": 0.8289992119779354, "grad_norm": 0.208455428481102, "learning_rate": 2.9180073346810738e-06, "log_odds_chosen": 0.5423698425292969, "log_odds_ratio": -0.461501806974411, "logits/chosen": -0.2490745484828949, "logits/rejected": -0.9533830285072327, "logps/chosen": -1.657263159751892, "logps/rejected": -2.1163740158081055, "loss": 1.6921, "nll_loss": 1.6459681987762451, "rewards/accuracies": 1.0, "rewards/chosen": -0.16572631895542145, "rewards/margins": 0.045911066234111786, "rewards/rejected": -0.21163739264011383, "step": 526 }, { "epoch": 0.830575256107171, "grad_norm": 0.18426814675331116, "learning_rate": 2.9131200606847957e-06, "log_odds_chosen": 0.6018238067626953, "log_odds_ratio": -0.4452371597290039, "logits/chosen": -0.18675662577152252, "logits/rejected": -0.92929607629776, "logps/chosen": -1.525328516960144, "logps/rejected": -2.024035692214966, "loss": 1.5812, "nll_loss": 1.5366522073745728, "rewards/accuracies": 1.0, "rewards/chosen": -0.15253286063671112, "rewards/margins": 0.049870721995830536, "rewards/rejected": -0.20240359008312225, "step": 527 }, { "epoch": 0.8321513002364066, "grad_norm": 0.19409975409507751, "learning_rate": 2.9082258885025995e-06, "log_odds_chosen": 0.6873412132263184, "log_odds_ratio": -0.41440120339393616, "logits/chosen": -0.3409496545791626, "logits/rejected": -0.9762820601463318, "logps/chosen": -1.5744646787643433, "logps/rejected": -2.1509451866149902, "loss": 1.6173, "nll_loss": 1.5758929252624512, "rewards/accuracies": 1.0, "rewards/chosen": -0.1574464738368988, "rewards/margins": 0.05764804407954216, "rewards/rejected": -0.21509452164173126, "step": 528 }, { "epoch": 0.8337273443656422, "grad_norm": 0.19676561653614044, "learning_rate": 2.9033248551076167e-06, "log_odds_chosen": 0.5434961915016174, "log_odds_ratio": -0.4601861238479614, "logits/chosen": -0.18111221492290497, "logits/rejected": -0.7811101675033569, "logps/chosen": -1.6350023746490479, "logps/rejected": -2.0918056964874268, "loss": 1.6862, "nll_loss": 1.6402305364608765, "rewards/accuracies": 1.0, "rewards/chosen": -0.16350023448467255, "rewards/margins": 0.045680344104766846, "rewards/rejected": -0.2091805636882782, "step": 529 }, { "epoch": 0.8353033884948778, "grad_norm": 0.1952933520078659, "learning_rate": 2.8984169975248138e-06, "log_odds_chosen": 0.7148233652114868, "log_odds_ratio": -0.402948796749115, "logits/chosen": -0.24029017984867096, "logits/rejected": -0.8942462205886841, "logps/chosen": -1.4987061023712158, "logps/rejected": -2.0888586044311523, "loss": 1.5633, "nll_loss": 1.523031234741211, "rewards/accuracies": 1.0, "rewards/chosen": -0.1498706191778183, "rewards/margins": 0.059015266597270966, "rewards/rejected": -0.20888587832450867, "step": 530 }, { "epoch": 0.8368794326241135, "grad_norm": 0.19431234896183014, "learning_rate": 2.893502352830712e-06, "log_odds_chosen": 0.8639093041419983, "log_odds_ratio": -0.35711535811424255, "logits/chosen": -0.2862634062767029, "logits/rejected": -1.0381038188934326, "logps/chosen": -1.404466986656189, "logps/rejected": -2.11556339263916, "loss": 1.4509, "nll_loss": 1.4152060747146606, "rewards/accuracies": 1.0, "rewards/chosen": -0.1404467076063156, "rewards/margins": 0.07110964506864548, "rewards/rejected": -0.21155637502670288, "step": 531 }, { "epoch": 0.8384554767533491, "grad_norm": 0.18546882271766663, "learning_rate": 2.888580958153103e-06, "log_odds_chosen": 0.6866245269775391, "log_odds_ratio": -0.4116785526275635, "logits/chosen": -0.28869181871414185, "logits/rejected": -1.0059574842453003, "logps/chosen": -1.4527959823608398, "logps/rejected": -2.0164341926574707, "loss": 1.5201, "nll_loss": 1.4789022207260132, "rewards/accuracies": 1.0, "rewards/chosen": -0.14527958631515503, "rewards/margins": 0.05636381730437279, "rewards/rejected": -0.20164339244365692, "step": 532 }, { "epoch": 0.8400315208825847, "grad_norm": 0.19686128199100494, "learning_rate": 2.8836528506707733e-06, "log_odds_chosen": 0.7787541747093201, "log_odds_ratio": -0.3828950524330139, "logits/chosen": -0.27011820673942566, "logits/rejected": -0.9735762476921082, "logps/chosen": -1.5172505378723145, "logps/rejected": -2.170649528503418, "loss": 1.5655, "nll_loss": 1.5271790027618408, "rewards/accuracies": 1.0, "rewards/chosen": -0.15172503888607025, "rewards/margins": 0.06533990055322647, "rewards/rejected": -0.2170649617910385, "step": 533 }, { "epoch": 0.8416075650118203, "grad_norm": 0.18512630462646484, "learning_rate": 2.878718067613222e-06, "log_odds_chosen": 0.5752986669540405, "log_odds_ratio": -0.45054376125335693, "logits/chosen": -0.22947821021080017, "logits/rejected": -0.9168241620063782, "logps/chosen": -1.5729241371154785, "logps/rejected": -2.0529274940490723, "loss": 1.6249, "nll_loss": 1.5798618793487549, "rewards/accuracies": 1.0, "rewards/chosen": -0.1572924107313156, "rewards/margins": 0.04800035431981087, "rewards/rejected": -0.205292746424675, "step": 534 }, { "epoch": 0.843183609141056, "grad_norm": 0.19882473349571228, "learning_rate": 2.8737766462603763e-06, "log_odds_chosen": 0.6943291425704956, "log_odds_ratio": -0.4189774990081787, "logits/chosen": -0.22633996605873108, "logits/rejected": -0.7776287198066711, "logps/chosen": -1.5123060941696167, "logps/rejected": -2.0951101779937744, "loss": 1.5618, "nll_loss": 1.5198948383331299, "rewards/accuracies": 1.0, "rewards/chosen": -0.1512306183576584, "rewards/margins": 0.05828040838241577, "rewards/rejected": -0.20951102674007416, "step": 535 }, { "epoch": 0.8447596532702916, "grad_norm": 0.18347592651844025, "learning_rate": 2.8688286239423167e-06, "log_odds_chosen": 0.5979099869728088, "log_odds_ratio": -0.4487925171852112, "logits/chosen": -0.22389663755893707, "logits/rejected": -0.9340906739234924, "logps/chosen": -1.5493533611297607, "logps/rejected": -2.0505897998809814, "loss": 1.5918, "nll_loss": 1.5469051599502563, "rewards/accuracies": 1.0, "rewards/chosen": -0.1549353450536728, "rewards/margins": 0.050123654305934906, "rewards/rejected": -0.2050590068101883, "step": 536 }, { "epoch": 0.8463356973995272, "grad_norm": 0.18582738935947418, "learning_rate": 2.8638740380389862e-06, "log_odds_chosen": 0.6674666404724121, "log_odds_ratio": -0.4190034568309784, "logits/chosen": -0.2528231739997864, "logits/rejected": -1.03038489818573, "logps/chosen": -1.4592210054397583, "logps/rejected": -2.0057992935180664, "loss": 1.5218, "nll_loss": 1.479940414428711, "rewards/accuracies": 1.0, "rewards/chosen": -0.14592207968235016, "rewards/margins": 0.05465785413980484, "rewards/rejected": -0.2005799412727356, "step": 537 }, { "epoch": 0.8479117415287628, "grad_norm": 0.1874462068080902, "learning_rate": 2.8589129259799164e-06, "log_odds_chosen": 0.7556334733963013, "log_odds_ratio": -0.39815816283226013, "logits/chosen": -0.2480260729789734, "logits/rejected": -0.8977173566818237, "logps/chosen": -1.5349764823913574, "logps/rejected": -2.1701834201812744, "loss": 1.5802, "nll_loss": 1.5403693914413452, "rewards/accuracies": 1.0, "rewards/chosen": -0.15349766612052917, "rewards/margins": 0.06352068483829498, "rewards/rejected": -0.21701833605766296, "step": 538 }, { "epoch": 0.8494877856579984, "grad_norm": 0.18529048562049866, "learning_rate": 2.853945325243938e-06, "log_odds_chosen": 0.6124276518821716, "log_odds_ratio": -0.4346122741699219, "logits/chosen": -0.26516175270080566, "logits/rejected": -0.908424437046051, "logps/chosen": -1.5476510524749756, "logps/rejected": -2.0556130409240723, "loss": 1.5934, "nll_loss": 1.549971342086792, "rewards/accuracies": 1.0, "rewards/chosen": -0.15476511418819427, "rewards/margins": 0.05079619213938713, "rewards/rejected": -0.2055612951517105, "step": 539 }, { "epoch": 0.851063829787234, "grad_norm": 0.18703651428222656, "learning_rate": 2.848971273358903e-06, "log_odds_chosen": 0.7656717896461487, "log_odds_ratio": -0.3846908211708069, "logits/chosen": -0.3264932632446289, "logits/rejected": -0.9102271795272827, "logps/chosen": -1.4829479455947876, "logps/rejected": -2.118494749069214, "loss": 1.5145, "nll_loss": 1.4760135412216187, "rewards/accuracies": 1.0, "rewards/chosen": -0.14829479157924652, "rewards/margins": 0.06355466693639755, "rewards/rejected": -0.21184945106506348, "step": 540 }, { "epoch": 0.8526398739164697, "grad_norm": 0.2110549807548523, "learning_rate": 2.843990807901397e-06, "log_odds_chosen": 0.5705313682556152, "log_odds_ratio": -0.45376867055892944, "logits/chosen": -0.23610883951187134, "logits/rejected": -0.6957866549491882, "logps/chosen": -1.5706590414047241, "logps/rejected": -2.0457687377929688, "loss": 1.6237, "nll_loss": 1.5782999992370605, "rewards/accuracies": 1.0, "rewards/chosen": -0.15706589818000793, "rewards/margins": 0.047510966658592224, "rewards/rejected": -0.20457686483860016, "step": 541 }, { "epoch": 0.8542159180457053, "grad_norm": 0.245357483625412, "learning_rate": 2.839003966496458e-06, "log_odds_chosen": 0.7403507232666016, "log_odds_ratio": -0.4056608974933624, "logits/chosen": -0.15189459919929504, "logits/rejected": -0.9524875283241272, "logps/chosen": -1.530955195426941, "logps/rejected": -2.1495132446289062, "loss": 1.5886, "nll_loss": 1.5479965209960938, "rewards/accuracies": 1.0, "rewards/chosen": -0.15309551358222961, "rewards/margins": 0.06185580790042877, "rewards/rejected": -0.21495133638381958, "step": 542 }, { "epoch": 0.8557919621749409, "grad_norm": 0.18227285146713257, "learning_rate": 2.8340107868172905e-06, "log_odds_chosen": 0.7201700210571289, "log_odds_ratio": -0.40194499492645264, "logits/chosen": -0.25142914056777954, "logits/rejected": -0.9272531270980835, "logps/chosen": -1.3741106986999512, "logps/rejected": -1.9591838121414185, "loss": 1.4317, "nll_loss": 1.3915280103683472, "rewards/accuracies": 1.0, "rewards/chosen": -0.13741108775138855, "rewards/margins": 0.05850730091333389, "rewards/rejected": -0.19591839611530304, "step": 543 }, { "epoch": 0.8573680063041765, "grad_norm": 0.17307905852794647, "learning_rate": 2.8290113065849826e-06, "log_odds_chosen": 0.7753461599349976, "log_odds_ratio": -0.38507741689682007, "logits/chosen": -0.2292054295539856, "logits/rejected": -1.118272304534912, "logps/chosen": -1.4850552082061768, "logps/rejected": -2.1288723945617676, "loss": 1.5405, "nll_loss": 1.5019530057907104, "rewards/accuracies": 1.0, "rewards/chosen": -0.14850552380084991, "rewards/margins": 0.06438171863555908, "rewards/rejected": -0.212887242436409, "step": 544 }, { "epoch": 0.8589440504334122, "grad_norm": 0.19122162461280823, "learning_rate": 2.8240055635682193e-06, "log_odds_chosen": 0.8948450088500977, "log_odds_ratio": -0.3542863428592682, "logits/chosen": -0.26844245195388794, "logits/rejected": -1.008026123046875, "logps/chosen": -1.4540432691574097, "logps/rejected": -2.199685573577881, "loss": 1.4922, "nll_loss": 1.456789493560791, "rewards/accuracies": 1.0, "rewards/chosen": -0.14540432393550873, "rewards/margins": 0.07456424832344055, "rewards/rejected": -0.21996855735778809, "step": 545 }, { "epoch": 0.8605200945626478, "grad_norm": 0.19578073918819427, "learning_rate": 2.8189935955829973e-06, "log_odds_chosen": 0.8017619848251343, "log_odds_ratio": -0.37794774770736694, "logits/chosen": -0.23730486631393433, "logits/rejected": -0.9040564298629761, "logps/chosen": -1.513983964920044, "logps/rejected": -2.187089681625366, "loss": 1.5588, "nll_loss": 1.5209602117538452, "rewards/accuracies": 1.0, "rewards/chosen": -0.15139839053153992, "rewards/margins": 0.06731057912111282, "rewards/rejected": -0.21870897710323334, "step": 546 }, { "epoch": 0.8620961386918834, "grad_norm": 0.18498314917087555, "learning_rate": 2.813975440492342e-06, "log_odds_chosen": 0.7070278525352478, "log_odds_ratio": -0.4100656509399414, "logits/chosen": -0.27136483788490295, "logits/rejected": -0.9217202663421631, "logps/chosen": -1.5072296857833862, "logps/rejected": -2.0933709144592285, "loss": 1.5681, "nll_loss": 1.5270682573318481, "rewards/accuracies": 1.0, "rewards/chosen": -0.15072298049926758, "rewards/margins": 0.058614134788513184, "rewards/rejected": -0.20933710038661957, "step": 547 }, { "epoch": 0.863672182821119, "grad_norm": 0.20699243247509003, "learning_rate": 2.8089511362060182e-06, "log_odds_chosen": 0.5432732105255127, "log_odds_ratio": -0.46091192960739136, "logits/chosen": -0.27546390891075134, "logits/rejected": -0.8888724446296692, "logps/chosen": -1.6448028087615967, "logps/rejected": -2.1047120094299316, "loss": 1.6727, "nll_loss": 1.6266342401504517, "rewards/accuracies": 1.0, "rewards/chosen": -0.1644802838563919, "rewards/margins": 0.04599091410636902, "rewards/rejected": -0.21047121286392212, "step": 548 }, { "epoch": 0.8652482269503546, "grad_norm": 0.19648247957229614, "learning_rate": 2.803920720680244e-06, "log_odds_chosen": 0.6895655393600464, "log_odds_ratio": -0.4106389880180359, "logits/chosen": -0.2301250696182251, "logits/rejected": -0.9588841795921326, "logps/chosen": -1.5039875507354736, "logps/rejected": -2.0738420486450195, "loss": 1.5566, "nll_loss": 1.5155189037322998, "rewards/accuracies": 1.0, "rewards/chosen": -0.15039876103401184, "rewards/margins": 0.056985460221767426, "rewards/rejected": -0.20738422870635986, "step": 549 }, { "epoch": 0.8668242710795903, "grad_norm": 0.20175597071647644, "learning_rate": 2.7988842319174075e-06, "log_odds_chosen": 0.7359188199043274, "log_odds_ratio": -0.40029722452163696, "logits/chosen": -0.35446590185165405, "logits/rejected": -1.0895055532455444, "logps/chosen": -1.573097825050354, "logps/rejected": -2.1937241554260254, "loss": 1.6089, "nll_loss": 1.5688972473144531, "rewards/accuracies": 1.0, "rewards/chosen": -0.15730977058410645, "rewards/margins": 0.06206265091896057, "rewards/rejected": -0.21937242150306702, "step": 550 }, { "epoch": 0.8684003152088259, "grad_norm": 0.1791771799325943, "learning_rate": 2.7938417079657743e-06, "log_odds_chosen": 0.7481501698493958, "log_odds_ratio": -0.3928907811641693, "logits/chosen": -0.29553061723709106, "logits/rejected": -1.0559701919555664, "logps/chosen": -1.4160282611846924, "logps/rejected": -2.0292470455169678, "loss": 1.4711, "nll_loss": 1.4318538904190063, "rewards/accuracies": 1.0, "rewards/chosen": -0.14160282909870148, "rewards/margins": 0.06132189929485321, "rewards/rejected": -0.2029247134923935, "step": 551 }, { "epoch": 0.8699763593380615, "grad_norm": 0.174880251288414, "learning_rate": 2.7887931869192047e-06, "log_odds_chosen": 0.6612383127212524, "log_odds_ratio": -0.41974276304244995, "logits/chosen": -0.3128218948841095, "logits/rejected": -0.9789490699768066, "logps/chosen": -1.4977728128433228, "logps/rejected": -2.044727325439453, "loss": 1.5458, "nll_loss": 1.5038683414459229, "rewards/accuracies": 1.0, "rewards/chosen": -0.14977729320526123, "rewards/margins": 0.05469541996717453, "rewards/rejected": -0.20447272062301636, "step": 552 }, { "epoch": 0.8715524034672971, "grad_norm": 0.20123504102230072, "learning_rate": 2.783738706916865e-06, "log_odds_chosen": 0.7667443156242371, "log_odds_ratio": -0.3888227641582489, "logits/chosen": -0.25346291065216064, "logits/rejected": -1.1120731830596924, "logps/chosen": -1.5336663722991943, "logps/rejected": -2.1720023155212402, "loss": 1.5728, "nll_loss": 1.53391695022583, "rewards/accuracies": 1.0, "rewards/chosen": -0.15336662530899048, "rewards/margins": 0.06383360177278519, "rewards/rejected": -0.21720023453235626, "step": 553 }, { "epoch": 0.8731284475965327, "grad_norm": 0.18896009027957916, "learning_rate": 2.7786783061429356e-06, "log_odds_chosen": 0.6369379758834839, "log_odds_ratio": -0.4261271357536316, "logits/chosen": -0.3290690779685974, "logits/rejected": -0.8201688528060913, "logps/chosen": -1.470665693283081, "logps/rejected": -1.9926190376281738, "loss": 1.5066, "nll_loss": 1.464026689529419, "rewards/accuracies": 1.0, "rewards/chosen": -0.14706656336784363, "rewards/margins": 0.05219534412026405, "rewards/rejected": -0.19926190376281738, "step": 554 }, { "epoch": 0.8747044917257684, "grad_norm": 0.1996404379606247, "learning_rate": 2.7736120228263287e-06, "log_odds_chosen": 0.771382749080658, "log_odds_ratio": -0.3887188136577606, "logits/chosen": -0.2468704730272293, "logits/rejected": -0.9418681859970093, "logps/chosen": -1.4597346782684326, "logps/rejected": -2.0989632606506348, "loss": 1.5108, "nll_loss": 1.47196364402771, "rewards/accuracies": 1.0, "rewards/chosen": -0.14597345888614655, "rewards/margins": 0.06392288208007812, "rewards/rejected": -0.20989632606506348, "step": 555 }, { "epoch": 0.876280535855004, "grad_norm": 0.17402994632720947, "learning_rate": 2.768539895240394e-06, "log_odds_chosen": 0.7088902592658997, "log_odds_ratio": -0.4131091833114624, "logits/chosen": -0.27824634313583374, "logits/rejected": -1.0621111392974854, "logps/chosen": -1.4249601364135742, "logps/rejected": -2.0007967948913574, "loss": 1.4746, "nll_loss": 1.4332572221755981, "rewards/accuracies": 1.0, "rewards/chosen": -0.1424960196018219, "rewards/margins": 0.05758364126086235, "rewards/rejected": -0.20007966458797455, "step": 556 }, { "epoch": 0.8778565799842396, "grad_norm": 0.19509798288345337, "learning_rate": 2.763461961702633e-06, "log_odds_chosen": 0.7418805360794067, "log_odds_ratio": -0.3986319303512573, "logits/chosen": -0.28149279952049255, "logits/rejected": -1.0734108686447144, "logps/chosen": -1.5373849868774414, "logps/rejected": -2.159186363220215, "loss": 1.5602, "nll_loss": 1.520325779914856, "rewards/accuracies": 1.0, "rewards/chosen": -0.15373851358890533, "rewards/margins": 0.06218012422323227, "rewards/rejected": -0.2159186154603958, "step": 557 }, { "epoch": 0.8794326241134752, "grad_norm": 0.19465316832065582, "learning_rate": 2.758378260574409e-06, "log_odds_chosen": 0.6508104205131531, "log_odds_ratio": -0.4230552315711975, "logits/chosen": -0.26566624641418457, "logits/rejected": -0.9579252004623413, "logps/chosen": -1.5240145921707153, "logps/rejected": -2.0630476474761963, "loss": 1.5823, "nll_loss": 1.5399930477142334, "rewards/accuracies": 1.0, "rewards/chosen": -0.15240147709846497, "rewards/margins": 0.05390328913927078, "rewards/rejected": -0.20630475878715515, "step": 558 }, { "epoch": 0.8810086682427108, "grad_norm": 0.18553242087364197, "learning_rate": 2.753288830260655e-06, "log_odds_chosen": 0.9022888541221619, "log_odds_ratio": -0.35411563515663147, "logits/chosen": -0.2562481760978699, "logits/rejected": -1.1569766998291016, "logps/chosen": -1.519883155822754, "logps/rejected": -2.2826790809631348, "loss": 1.5574, "nll_loss": 1.5220351219177246, "rewards/accuracies": 1.0, "rewards/chosen": -0.15198831260204315, "rewards/margins": 0.07627959549427032, "rewards/rejected": -0.22826790809631348, "step": 559 }, { "epoch": 0.8825847123719465, "grad_norm": 0.1907990574836731, "learning_rate": 2.7481937092095866e-06, "log_odds_chosen": 0.46901825070381165, "log_odds_ratio": -0.49443766474723816, "logits/chosen": -0.2248474508523941, "logits/rejected": -0.9241709113121033, "logps/chosen": -1.5699660778045654, "logps/rejected": -1.9645986557006836, "loss": 1.6042, "nll_loss": 1.5547926425933838, "rewards/accuracies": 1.0, "rewards/chosen": -0.15699660778045654, "rewards/margins": 0.039463259279727936, "rewards/rejected": -0.19645987451076508, "step": 560 }, { "epoch": 0.8841607565011821, "grad_norm": 0.18569010496139526, "learning_rate": 2.7430929359124086e-06, "log_odds_chosen": 0.5979553461074829, "log_odds_ratio": -0.44180724024772644, "logits/chosen": -0.28195708990097046, "logits/rejected": -0.7758825421333313, "logps/chosen": -1.4177727699279785, "logps/rejected": -1.9020490646362305, "loss": 1.487, "nll_loss": 1.4427917003631592, "rewards/accuracies": 1.0, "rewards/chosen": -0.14177727699279785, "rewards/margins": 0.04842764884233475, "rewards/rejected": -0.190204918384552, "step": 561 }, { "epoch": 0.8857368006304176, "grad_norm": 0.1999945193529129, "learning_rate": 2.737986548903029e-06, "log_odds_chosen": 0.9901692271232605, "log_odds_ratio": -0.3190964460372925, "logits/chosen": -0.47356563806533813, "logits/rejected": -1.037635326385498, "logps/chosen": -1.394824504852295, "logps/rejected": -2.215360641479492, "loss": 1.4528, "nll_loss": 1.420884132385254, "rewards/accuracies": 1.0, "rewards/chosen": -0.13948245346546173, "rewards/margins": 0.08205362409353256, "rewards/rejected": -0.2215360552072525, "step": 562 }, { "epoch": 0.8873128447596532, "grad_norm": 0.1902477741241455, "learning_rate": 2.7328745867577604e-06, "log_odds_chosen": 0.5759560465812683, "log_odds_ratio": -0.4480469822883606, "logits/chosen": -0.2436152994632721, "logits/rejected": -1.003432273864746, "logps/chosen": -1.5550605058670044, "logps/rejected": -2.0334320068359375, "loss": 1.6093, "nll_loss": 1.5645390748977661, "rewards/accuracies": 1.0, "rewards/chosen": -0.15550604462623596, "rewards/margins": 0.047837138175964355, "rewards/rejected": -0.20334316790103912, "step": 563 }, { "epoch": 0.8888888888888888, "grad_norm": 0.19590599834918976, "learning_rate": 2.727757088095037e-06, "log_odds_chosen": 0.8063519597053528, "log_odds_ratio": -0.3720436990261078, "logits/chosen": -0.2839893698692322, "logits/rejected": -0.990349292755127, "logps/chosen": -1.5392509698867798, "logps/rejected": -2.2164721488952637, "loss": 1.579, "nll_loss": 1.5417624711990356, "rewards/accuracies": 1.0, "rewards/chosen": -0.1539250910282135, "rewards/margins": 0.0677221268415451, "rewards/rejected": -0.2216472029685974, "step": 564 }, { "epoch": 0.8904649330181245, "grad_norm": 0.18826933205127716, "learning_rate": 2.7226340915751156e-06, "log_odds_chosen": 0.6856199502944946, "log_odds_ratio": -0.41544997692108154, "logits/chosen": -0.2608083188533783, "logits/rejected": -0.875864565372467, "logps/chosen": -1.5443227291107178, "logps/rejected": -2.1181650161743164, "loss": 1.5985, "nll_loss": 1.5569982528686523, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544322669506073, "rewards/margins": 0.05738425254821777, "rewards/rejected": -0.21181651949882507, "step": 565 }, { "epoch": 0.8920409771473601, "grad_norm": 0.18859736621379852, "learning_rate": 2.7175056358997887e-06, "log_odds_chosen": 0.5587659478187561, "log_odds_ratio": -0.4620354175567627, "logits/chosen": -0.18402978777885437, "logits/rejected": -1.086783766746521, "logps/chosen": -1.5183101892471313, "logps/rejected": -1.9805251359939575, "loss": 1.5636, "nll_loss": 1.5173804759979248, "rewards/accuracies": 1.0, "rewards/chosen": -0.1518310159444809, "rewards/margins": 0.046221502125263214, "rewards/rejected": -0.1980525106191635, "step": 566 }, { "epoch": 0.8936170212765957, "grad_norm": 0.18647578358650208, "learning_rate": 2.7123717598120892e-06, "log_odds_chosen": 0.8211861848831177, "log_odds_ratio": -0.3777605891227722, "logits/chosen": -0.37871992588043213, "logits/rejected": -1.0676195621490479, "logps/chosen": -1.4652924537658691, "logps/rejected": -2.153637647628784, "loss": 1.5109, "nll_loss": 1.4731453657150269, "rewards/accuracies": 1.0, "rewards/chosen": -0.14652925729751587, "rewards/margins": 0.06883449852466583, "rewards/rejected": -0.2153637558221817, "step": 567 }, { "epoch": 0.8951930654058313, "grad_norm": 0.18031582236289978, "learning_rate": 2.7072325020959985e-06, "log_odds_chosen": 0.7887471318244934, "log_odds_ratio": -0.3807092010974884, "logits/chosen": -0.2174908071756363, "logits/rejected": -0.855463445186615, "logps/chosen": -1.3680405616760254, "logps/rejected": -2.00773286819458, "loss": 1.4363, "nll_loss": 1.398259162902832, "rewards/accuracies": 1.0, "rewards/chosen": -0.13680405914783478, "rewards/margins": 0.06396923959255219, "rewards/rejected": -0.20077329874038696, "step": 568 }, { "epoch": 0.8967691095350669, "grad_norm": 0.19582530856132507, "learning_rate": 2.702087901576155e-06, "log_odds_chosen": 0.8434200882911682, "log_odds_ratio": -0.3614668846130371, "logits/chosen": -0.16909779608249664, "logits/rejected": -0.9553425908088684, "logps/chosen": -1.5029596090316772, "logps/rejected": -2.2103042602539062, "loss": 1.5319, "nll_loss": 1.495743989944458, "rewards/accuracies": 1.0, "rewards/chosen": -0.15029597282409668, "rewards/margins": 0.07073444128036499, "rewards/rejected": -0.22103042900562286, "step": 569 }, { "epoch": 0.8983451536643026, "grad_norm": 0.19486035406589508, "learning_rate": 2.6969379971175576e-06, "log_odds_chosen": 0.7621928453445435, "log_odds_ratio": -0.3882827162742615, "logits/chosen": -0.2586689293384552, "logits/rejected": -1.1366548538208008, "logps/chosen": -1.550236463546753, "logps/rejected": -2.191896438598633, "loss": 1.599, "nll_loss": 1.5601599216461182, "rewards/accuracies": 1.0, "rewards/chosen": -0.15502366423606873, "rewards/margins": 0.06416598707437515, "rewards/rejected": -0.21918964385986328, "step": 570 }, { "epoch": 0.8999211977935382, "grad_norm": 0.19886843860149384, "learning_rate": 2.6917828276252745e-06, "log_odds_chosen": 0.8460420966148376, "log_odds_ratio": -0.36211562156677246, "logits/chosen": -0.24897100031375885, "logits/rejected": -1.034935474395752, "logps/chosen": -1.4619159698486328, "logps/rejected": -2.162952423095703, "loss": 1.5063, "nll_loss": 1.470069169998169, "rewards/accuracies": 1.0, "rewards/chosen": -0.14619161188602448, "rewards/margins": 0.07010364532470703, "rewards/rejected": -0.2162952423095703, "step": 571 }, { "epoch": 0.9014972419227738, "grad_norm": 0.18841660022735596, "learning_rate": 2.686622432044149e-06, "log_odds_chosen": 0.6772502064704895, "log_odds_ratio": -0.4151155352592468, "logits/chosen": -0.25014546513557434, "logits/rejected": -1.0090041160583496, "logps/chosen": -1.4051274061203003, "logps/rejected": -1.9560467004776, "loss": 1.466, "nll_loss": 1.4244604110717773, "rewards/accuracies": 1.0, "rewards/chosen": -0.14051274955272675, "rewards/margins": 0.055091917514801025, "rewards/rejected": -0.19560466706752777, "step": 572 }, { "epoch": 0.9030732860520094, "grad_norm": 0.17933540046215057, "learning_rate": 2.681456849358505e-06, "log_odds_chosen": 0.7187561988830566, "log_odds_ratio": -0.41289833188056946, "logits/chosen": -0.35038578510284424, "logits/rejected": -1.133527159690857, "logps/chosen": -1.453390121459961, "logps/rejected": -2.0473554134368896, "loss": 1.5045, "nll_loss": 1.4631835222244263, "rewards/accuracies": 0.875, "rewards/chosen": -0.1453390270471573, "rewards/margins": 0.05939652770757675, "rewards/rejected": -0.20473553240299225, "step": 573 }, { "epoch": 0.904649330181245, "grad_norm": 0.17947132885456085, "learning_rate": 2.6762861185918528e-06, "log_odds_chosen": 1.0744562149047852, "log_odds_ratio": -0.32229819893836975, "logits/chosen": -0.3679516911506653, "logits/rejected": -1.0092076063156128, "logps/chosen": -1.3659673929214478, "logps/rejected": -2.264378070831299, "loss": 1.415, "nll_loss": 1.382771372795105, "rewards/accuracies": 1.0, "rewards/chosen": -0.13659675419330597, "rewards/margins": 0.08984105288982391, "rewards/rejected": -0.22643780708312988, "step": 574 }, { "epoch": 0.9062253743104807, "grad_norm": 0.1912039816379547, "learning_rate": 2.6711102788065934e-06, "log_odds_chosen": 0.8282746076583862, "log_odds_ratio": -0.3737855553627014, "logits/chosen": -0.27804040908813477, "logits/rejected": -1.0117435455322266, "logps/chosen": -1.5437945127487183, "logps/rejected": -2.2480297088623047, "loss": 1.5923, "nll_loss": 1.55488121509552, "rewards/accuracies": 1.0, "rewards/chosen": -0.1543794423341751, "rewards/margins": 0.07042355090379715, "rewards/rejected": -0.22480300068855286, "step": 575 }, { "epoch": 0.9078014184397163, "grad_norm": 0.2041914314031601, "learning_rate": 2.665929369103724e-06, "log_odds_chosen": 0.522678017616272, "log_odds_ratio": -0.47281134128570557, "logits/chosen": -0.233540341258049, "logits/rejected": -0.7462192177772522, "logps/chosen": -1.59604811668396, "logps/rejected": -2.027956247329712, "loss": 1.6332, "nll_loss": 1.5858947038650513, "rewards/accuracies": 1.0, "rewards/chosen": -0.15960480272769928, "rewards/margins": 0.04319081827998161, "rewards/rejected": -0.2027956247329712, "step": 576 }, { "epoch": 0.9093774625689519, "grad_norm": 0.19025495648384094, "learning_rate": 2.6607434286225427e-06, "log_odds_chosen": 0.7371255159378052, "log_odds_ratio": -0.4003181755542755, "logits/chosen": -0.22684240341186523, "logits/rejected": -0.97023606300354, "logps/chosen": -1.481331706047058, "logps/rejected": -2.0959441661834717, "loss": 1.5353, "nll_loss": 1.4952728748321533, "rewards/accuracies": 1.0, "rewards/chosen": -0.14813315868377686, "rewards/margins": 0.06146124005317688, "rewards/rejected": -0.20959442853927612, "step": 577 }, { "epoch": 0.9109535066981875, "grad_norm": 0.18634669482707977, "learning_rate": 2.6555524965403533e-06, "log_odds_chosen": 0.9257493615150452, "log_odds_ratio": -0.3365446925163269, "logits/chosen": -0.3423171639442444, "logits/rejected": -1.0061731338500977, "logps/chosen": -1.4069764614105225, "logps/rejected": -2.1728689670562744, "loss": 1.4653, "nll_loss": 1.4316625595092773, "rewards/accuracies": 1.0, "rewards/chosen": -0.1406976580619812, "rewards/margins": 0.07658925652503967, "rewards/rejected": -0.21728689968585968, "step": 578 }, { "epoch": 0.9125295508274232, "grad_norm": 0.19038228690624237, "learning_rate": 2.6503566120721683e-06, "log_odds_chosen": 0.8884246349334717, "log_odds_ratio": -0.34692028164863586, "logits/chosen": -0.2838938534259796, "logits/rejected": -1.040396809577942, "logps/chosen": -1.4974538087844849, "logps/rejected": -2.242021083831787, "loss": 1.5458, "nll_loss": 1.511068344116211, "rewards/accuracies": 1.0, "rewards/chosen": -0.149745374917984, "rewards/margins": 0.07445673644542694, "rewards/rejected": -0.22420212626457214, "step": 579 }, { "epoch": 0.9141055949566588, "grad_norm": 0.1914753019809723, "learning_rate": 2.6451558144704126e-06, "log_odds_chosen": 0.8355820775032043, "log_odds_ratio": -0.3655070662498474, "logits/chosen": -0.39890602231025696, "logits/rejected": -1.0061347484588623, "logps/chosen": -1.4560400247573853, "logps/rejected": -2.148988723754883, "loss": 1.4946, "nll_loss": 1.4580916166305542, "rewards/accuracies": 1.0, "rewards/chosen": -0.14560401439666748, "rewards/margins": 0.06929486244916916, "rewards/rejected": -0.21489885449409485, "step": 580 }, { "epoch": 0.9156816390858944, "grad_norm": 0.19239261746406555, "learning_rate": 2.6399501430246286e-06, "log_odds_chosen": 0.7336123585700989, "log_odds_ratio": -0.39277443289756775, "logits/chosen": -0.2979855239391327, "logits/rejected": -0.8990359902381897, "logps/chosen": -1.4309515953063965, "logps/rejected": -2.029104232788086, "loss": 1.4751, "nll_loss": 1.4358205795288086, "rewards/accuracies": 1.0, "rewards/chosen": -0.14309516549110413, "rewards/margins": 0.05981525778770447, "rewards/rejected": -0.2029104232788086, "step": 581 }, { "epoch": 0.91725768321513, "grad_norm": 0.18860149383544922, "learning_rate": 2.634739637061177e-06, "log_odds_chosen": 0.5968453288078308, "log_odds_ratio": -0.44231024384498596, "logits/chosen": -0.31671106815338135, "logits/rejected": -0.9569072723388672, "logps/chosen": -1.490100622177124, "logps/rejected": -1.979933261871338, "loss": 1.5415, "nll_loss": 1.4972805976867676, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490100622177124, "rewards/margins": 0.04898326098918915, "rewards/rejected": -0.19799332320690155, "step": 582 }, { "epoch": 0.9188337273443656, "grad_norm": 0.19839900732040405, "learning_rate": 2.6295243359429423e-06, "log_odds_chosen": 0.8661398887634277, "log_odds_ratio": -0.35571688413619995, "logits/chosen": -0.2846633493900299, "logits/rejected": -1.100461483001709, "logps/chosen": -1.5038632154464722, "logps/rejected": -2.2323145866394043, "loss": 1.5463, "nll_loss": 1.5107214450836182, "rewards/accuracies": 1.0, "rewards/chosen": -0.15038631856441498, "rewards/margins": 0.07284514605998993, "rewards/rejected": -0.2232314795255661, "step": 583 }, { "epoch": 0.9204097714736013, "grad_norm": 0.18807195127010345, "learning_rate": 2.624304279069033e-06, "log_odds_chosen": 0.8489887118339539, "log_odds_ratio": -0.3605824112892151, "logits/chosen": -0.2581827640533447, "logits/rejected": -0.9408934712409973, "logps/chosen": -1.4321138858795166, "logps/rejected": -2.1309585571289062, "loss": 1.4999, "nll_loss": 1.4638035297393799, "rewards/accuracies": 1.0, "rewards/chosen": -0.14321140944957733, "rewards/margins": 0.06988446414470673, "rewards/rejected": -0.21309585869312286, "step": 584 }, { "epoch": 0.9219858156028369, "grad_norm": 0.20861946046352386, "learning_rate": 2.6190795058744854e-06, "log_odds_chosen": 0.8077428340911865, "log_odds_ratio": -0.38045015931129456, "logits/chosen": -0.3215436041355133, "logits/rejected": -0.9677871465682983, "logps/chosen": -1.5217182636260986, "logps/rejected": -2.20338773727417, "loss": 1.5655, "nll_loss": 1.5274369716644287, "rewards/accuracies": 1.0, "rewards/chosen": -0.15217182040214539, "rewards/margins": 0.06816694140434265, "rewards/rejected": -0.22033876180648804, "step": 585 }, { "epoch": 0.9235618597320725, "grad_norm": 0.20319119095802307, "learning_rate": 2.6138500558299664e-06, "log_odds_chosen": 0.6915637850761414, "log_odds_ratio": -0.41175147891044617, "logits/chosen": -0.307695597410202, "logits/rejected": -0.8532412648200989, "logps/chosen": -1.505434274673462, "logps/rejected": -2.0810351371765137, "loss": 1.5483, "nll_loss": 1.5071660280227661, "rewards/accuracies": 1.0, "rewards/chosen": -0.1505434364080429, "rewards/margins": 0.057560086250305176, "rewards/rejected": -0.2081035077571869, "step": 586 }, { "epoch": 0.9251379038613081, "grad_norm": 0.2038610279560089, "learning_rate": 2.608615968441472e-06, "log_odds_chosen": 0.7816276550292969, "log_odds_ratio": -0.3860572874546051, "logits/chosen": -0.34239432215690613, "logits/rejected": -1.029060959815979, "logps/chosen": -1.5441906452178955, "logps/rejected": -2.205928325653076, "loss": 1.5903, "nll_loss": 1.5516810417175293, "rewards/accuracies": 1.0, "rewards/chosen": -0.15441906452178955, "rewards/margins": 0.06617378443479538, "rewards/rejected": -0.22059285640716553, "step": 587 }, { "epoch": 0.9267139479905437, "grad_norm": 0.1958923637866974, "learning_rate": 2.6033772832500333e-06, "log_odds_chosen": 0.7172558903694153, "log_odds_ratio": -0.4018914997577667, "logits/chosen": -0.365552693605423, "logits/rejected": -0.98423832654953, "logps/chosen": -1.5432528257369995, "logps/rejected": -2.1449496746063232, "loss": 1.589, "nll_loss": 1.5487916469573975, "rewards/accuracies": 1.0, "rewards/chosen": -0.15432527661323547, "rewards/margins": 0.060169681906700134, "rewards/rejected": -0.2144949585199356, "step": 588 }, { "epoch": 0.9282899921197794, "grad_norm": 0.22085590660572052, "learning_rate": 2.5981340398314146e-06, "log_odds_chosen": 0.7815366983413696, "log_odds_ratio": -0.38727831840515137, "logits/chosen": -0.37216079235076904, "logits/rejected": -1.0728775262832642, "logps/chosen": -1.4311816692352295, "logps/rejected": -2.072625160217285, "loss": 1.4821, "nll_loss": 1.443368911743164, "rewards/accuracies": 1.0, "rewards/chosen": -0.14311817288398743, "rewards/margins": 0.06414434313774109, "rewards/rejected": -0.20726250112056732, "step": 589 }, { "epoch": 0.929866036249015, "grad_norm": 0.19818605482578278, "learning_rate": 2.592886277795815e-06, "log_odds_chosen": 0.7484084367752075, "log_odds_ratio": -0.39639464020729065, "logits/chosen": -0.32693415880203247, "logits/rejected": -1.203582525253296, "logps/chosen": -1.5525538921356201, "logps/rejected": -2.1846108436584473, "loss": 1.6056, "nll_loss": 1.5659488439559937, "rewards/accuracies": 1.0, "rewards/chosen": -0.15525537729263306, "rewards/margins": 0.06320571899414062, "rewards/rejected": -0.21846111118793488, "step": 590 }, { "epoch": 0.9314420803782506, "grad_norm": 0.19394098222255707, "learning_rate": 2.5876340367875706e-06, "log_odds_chosen": 0.9185118079185486, "log_odds_ratio": -0.3428211212158203, "logits/chosen": -0.32702964544296265, "logits/rejected": -1.2011594772338867, "logps/chosen": -1.5020562410354614, "logps/rejected": -2.2766404151916504, "loss": 1.551, "nll_loss": 1.5167332887649536, "rewards/accuracies": 1.0, "rewards/chosen": -0.15020562708377838, "rewards/margins": 0.07745840400457382, "rewards/rejected": -0.2276640385389328, "step": 591 }, { "epoch": 0.9330181245074862, "grad_norm": 0.1836569905281067, "learning_rate": 2.582377356484853e-06, "log_odds_chosen": 0.753261387348175, "log_odds_ratio": -0.38973143696784973, "logits/chosen": -0.36859768629074097, "logits/rejected": -0.9721928834915161, "logps/chosen": -1.3960907459259033, "logps/rejected": -2.0059423446655273, "loss": 1.4571, "nll_loss": 1.418078899383545, "rewards/accuracies": 1.0, "rewards/chosen": -0.13960908353328705, "rewards/margins": 0.060985155403614044, "rewards/rejected": -0.2005942463874817, "step": 592 }, { "epoch": 0.9345941686367218, "grad_norm": 0.19026999175548553, "learning_rate": 2.577116276599373e-06, "log_odds_chosen": 0.7147908210754395, "log_odds_ratio": -0.40482261776924133, "logits/chosen": -0.36179396510124207, "logits/rejected": -0.776434063911438, "logps/chosen": -1.4311368465423584, "logps/rejected": -2.014333963394165, "loss": 1.5026, "nll_loss": 1.4621307849884033, "rewards/accuracies": 1.0, "rewards/chosen": -0.14311370253562927, "rewards/margins": 0.05831972509622574, "rewards/rejected": -0.2014334350824356, "step": 593 }, { "epoch": 0.9361702127659575, "grad_norm": 0.20503179728984833, "learning_rate": 2.5718508368760737e-06, "log_odds_chosen": 0.7495462894439697, "log_odds_ratio": -0.4093177020549774, "logits/chosen": -0.3689710199832916, "logits/rejected": -0.9747545123100281, "logps/chosen": -1.4401062726974487, "logps/rejected": -2.060880184173584, "loss": 1.5021, "nll_loss": 1.4611238241195679, "rewards/accuracies": 1.0, "rewards/chosen": -0.14401061832904816, "rewards/margins": 0.06207740306854248, "rewards/rejected": -0.20608803629875183, "step": 594 }, { "epoch": 0.9377462568951931, "grad_norm": 0.19248925149440765, "learning_rate": 2.5665810770928386e-06, "log_odds_chosen": 0.865845799446106, "log_odds_ratio": -0.3555818796157837, "logits/chosen": -0.3957040309906006, "logits/rejected": -1.0820808410644531, "logps/chosen": -1.429842472076416, "logps/rejected": -2.1452674865722656, "loss": 1.4972, "nll_loss": 1.4616844654083252, "rewards/accuracies": 1.0, "rewards/chosen": -0.14298425614833832, "rewards/margins": 0.07154248654842377, "rewards/rejected": -0.21452677249908447, "step": 595 }, { "epoch": 0.9393223010244287, "grad_norm": 0.1944245547056198, "learning_rate": 2.5613070370601863e-06, "log_odds_chosen": 0.8471028804779053, "log_odds_ratio": -0.35954809188842773, "logits/chosen": -0.3246954679489136, "logits/rejected": -1.0067228078842163, "logps/chosen": -1.4850720167160034, "logps/rejected": -2.193937301635742, "loss": 1.5218, "nll_loss": 1.4858888387680054, "rewards/accuracies": 1.0, "rewards/chosen": -0.14850720763206482, "rewards/margins": 0.0708865076303482, "rewards/rejected": -0.21939371526241302, "step": 596 }, { "epoch": 0.9408983451536643, "grad_norm": 0.1972317397594452, "learning_rate": 2.556028756620969e-06, "log_odds_chosen": 0.7651692032814026, "log_odds_ratio": -0.3863201141357422, "logits/chosen": -0.35301584005355835, "logits/rejected": -1.0971697568893433, "logps/chosen": -1.5071377754211426, "logps/rejected": -2.144935369491577, "loss": 1.5368, "nll_loss": 1.498155117034912, "rewards/accuracies": 1.0, "rewards/chosen": -0.15071378648281097, "rewards/margins": 0.06377977132797241, "rewards/rejected": -0.2144935429096222, "step": 597 }, { "epoch": 0.9424743892829, "grad_norm": 0.2032039314508438, "learning_rate": 2.5507462756500747e-06, "log_odds_chosen": 0.9233656525611877, "log_odds_ratio": -0.35150372982025146, "logits/chosen": -0.2441563755273819, "logits/rejected": -0.9663263559341431, "logps/chosen": -1.5112857818603516, "logps/rejected": -2.2921390533447266, "loss": 1.5487, "nll_loss": 1.5135796070098877, "rewards/accuracies": 1.0, "rewards/chosen": -0.15112856030464172, "rewards/margins": 0.07808534055948257, "rewards/rejected": -0.2292139232158661, "step": 598 }, { "epoch": 0.9440504334121356, "grad_norm": 0.21819739043712616, "learning_rate": 2.5454596340541245e-06, "log_odds_chosen": 0.780358076095581, "log_odds_ratio": -0.3912939429283142, "logits/chosen": -0.42639774084091187, "logits/rejected": -1.0730507373809814, "logps/chosen": -1.544683814048767, "logps/rejected": -2.2039945125579834, "loss": 1.5843, "nll_loss": 1.5451691150665283, "rewards/accuracies": 1.0, "rewards/chosen": -0.15446837246418, "rewards/margins": 0.06593109667301178, "rewards/rejected": -0.22039945423603058, "step": 599 }, { "epoch": 0.9456264775413712, "grad_norm": 0.191095232963562, "learning_rate": 2.5401688717711702e-06, "log_odds_chosen": 0.7160026431083679, "log_odds_ratio": -0.40228599309921265, "logits/chosen": -0.31293508410453796, "logits/rejected": -0.9643535614013672, "logps/chosen": -1.477915644645691, "logps/rejected": -2.0708792209625244, "loss": 1.5323, "nll_loss": 1.4920848608016968, "rewards/accuracies": 1.0, "rewards/chosen": -0.1477915644645691, "rewards/margins": 0.05929635465145111, "rewards/rejected": -0.2070879191160202, "step": 600 }, { "epoch": 0.9472025216706068, "grad_norm": 0.18284882605075836, "learning_rate": 2.5348740287703937e-06, "log_odds_chosen": 0.9218278527259827, "log_odds_ratio": -0.3381814956665039, "logits/chosen": -0.3719578683376312, "logits/rejected": -0.9361214637756348, "logps/chosen": -1.381032943725586, "logps/rejected": -2.1370444297790527, "loss": 1.4457, "nll_loss": 1.4118531942367554, "rewards/accuracies": 1.0, "rewards/chosen": -0.13810329139232635, "rewards/margins": 0.07560117542743683, "rewards/rejected": -0.21370446681976318, "step": 601 }, { "epoch": 0.9487785657998424, "grad_norm": 0.20864719152450562, "learning_rate": 2.529575145051805e-06, "log_odds_chosen": 0.8491575717926025, "log_odds_ratio": -0.36945462226867676, "logits/chosen": -0.3754251003265381, "logits/rejected": -0.9714959859848022, "logps/chosen": -1.455926775932312, "logps/rejected": -2.1642541885375977, "loss": 1.5258, "nll_loss": 1.4888064861297607, "rewards/accuracies": 1.0, "rewards/chosen": -0.14559268951416016, "rewards/margins": 0.0708327367901802, "rewards/rejected": -0.21642543375492096, "step": 602 }, { "epoch": 0.950354609929078, "grad_norm": 0.20159947872161865, "learning_rate": 2.52427226064594e-06, "log_odds_chosen": 0.9014179706573486, "log_odds_ratio": -0.34767764806747437, "logits/chosen": -0.4360904395580292, "logits/rejected": -1.0693079233169556, "logps/chosen": -1.4922616481781006, "logps/rejected": -2.2479288578033447, "loss": 1.5226, "nll_loss": 1.4878350496292114, "rewards/accuracies": 1.0, "rewards/chosen": -0.14922615885734558, "rewards/margins": 0.07556671649217606, "rewards/rejected": -0.22479286789894104, "step": 603 }, { "epoch": 0.9519306540583137, "grad_norm": 0.18663589656352997, "learning_rate": 2.518965415613557e-06, "log_odds_chosen": 1.0358104705810547, "log_odds_ratio": -0.3195894658565521, "logits/chosen": -0.3785373568534851, "logits/rejected": -1.0553038120269775, "logps/chosen": -1.3895487785339355, "logps/rejected": -2.249821662902832, "loss": 1.4498, "nll_loss": 1.4178262948989868, "rewards/accuracies": 1.0, "rewards/chosen": -0.13895487785339355, "rewards/margins": 0.08602726459503174, "rewards/rejected": -0.22498217225074768, "step": 604 }, { "epoch": 0.9535066981875493, "grad_norm": 0.19644887745380402, "learning_rate": 2.513654650045336e-06, "log_odds_chosen": 0.8293303847312927, "log_odds_ratio": -0.3664936423301697, "logits/chosen": -0.38007354736328125, "logits/rejected": -1.2221169471740723, "logps/chosen": -1.4231748580932617, "logps/rejected": -2.107367753982544, "loss": 1.4711, "nll_loss": 1.4344661235809326, "rewards/accuracies": 1.0, "rewards/chosen": -0.1423174887895584, "rewards/margins": 0.06841929256916046, "rewards/rejected": -0.21073678135871887, "step": 605 }, { "epoch": 0.9550827423167849, "grad_norm": 0.1929960697889328, "learning_rate": 2.508340004061574e-06, "log_odds_chosen": 0.982408881187439, "log_odds_ratio": -0.3273788392543793, "logits/chosen": -0.402699738740921, "logits/rejected": -0.8669203519821167, "logps/chosen": -1.4364259243011475, "logps/rejected": -2.257878541946411, "loss": 1.4884, "nll_loss": 1.4556838274002075, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436426043510437, "rewards/margins": 0.08214528858661652, "rewards/rejected": -0.22578787803649902, "step": 606 }, { "epoch": 0.9566587864460205, "grad_norm": 0.2228441834449768, "learning_rate": 2.503021517811882e-06, "log_odds_chosen": 0.8784299492835999, "log_odds_ratio": -0.3510138988494873, "logits/chosen": -0.45283588767051697, "logits/rejected": -1.127648115158081, "logps/chosen": -1.5121246576309204, "logps/rejected": -2.251612663269043, "loss": 1.5604, "nll_loss": 1.5253283977508545, "rewards/accuracies": 1.0, "rewards/chosen": -0.15121246874332428, "rewards/margins": 0.07394880801439285, "rewards/rejected": -0.22516128420829773, "step": 607 }, { "epoch": 0.9582348305752562, "grad_norm": 0.19003160297870636, "learning_rate": 2.497699231474885e-06, "log_odds_chosen": 0.9765473008155823, "log_odds_ratio": -0.33263981342315674, "logits/chosen": -0.41315698623657227, "logits/rejected": -1.1643365621566772, "logps/chosen": -1.4046815633773804, "logps/rejected": -2.2223408222198486, "loss": 1.4457, "nll_loss": 1.4124497175216675, "rewards/accuracies": 1.0, "rewards/chosen": -0.14046816527843475, "rewards/margins": 0.08176591247320175, "rewards/rejected": -0.2222340852022171, "step": 608 }, { "epoch": 0.9598108747044918, "grad_norm": 0.1974783092737198, "learning_rate": 2.4923731852579127e-06, "log_odds_chosen": 0.9320014119148254, "log_odds_ratio": -0.3443461060523987, "logits/chosen": -0.431832492351532, "logits/rejected": -0.9174313545227051, "logps/chosen": -1.430959939956665, "logps/rejected": -2.2114200592041016, "loss": 1.4836, "nll_loss": 1.4491451978683472, "rewards/accuracies": 1.0, "rewards/chosen": -0.1430959850549698, "rewards/margins": 0.07804602384567261, "rewards/rejected": -0.22114203870296478, "step": 609 }, { "epoch": 0.9613869188337274, "grad_norm": 0.19195294380187988, "learning_rate": 2.4870434193967017e-06, "log_odds_chosen": 1.083929181098938, "log_odds_ratio": -0.30106061697006226, "logits/chosen": -0.37555810809135437, "logits/rejected": -1.1971845626831055, "logps/chosen": -1.4301390647888184, "logps/rejected": -2.340451240539551, "loss": 1.4589, "nll_loss": 1.4287785291671753, "rewards/accuracies": 1.0, "rewards/chosen": -0.14301392436027527, "rewards/margins": 0.09103122353553772, "rewards/rejected": -0.2340451329946518, "step": 610 }, { "epoch": 0.9629629629629629, "grad_norm": 0.20741422474384308, "learning_rate": 2.481709974155086e-06, "log_odds_chosen": 1.1081900596618652, "log_odds_ratio": -0.302015095949173, "logits/chosen": -0.33150750398635864, "logits/rejected": -0.9886937737464905, "logps/chosen": -1.4514014720916748, "logps/rejected": -2.389432668685913, "loss": 1.4896, "nll_loss": 1.4593768119812012, "rewards/accuracies": 1.0, "rewards/chosen": -0.145140141248703, "rewards/margins": 0.09380312263965607, "rewards/rejected": -0.23894327878952026, "step": 611 }, { "epoch": 0.9645390070921985, "grad_norm": 0.21632716059684753, "learning_rate": 2.4763728898246983e-06, "log_odds_chosen": 1.1941622495651245, "log_odds_ratio": -0.27469301223754883, "logits/chosen": -0.24985259771347046, "logits/rejected": -1.0489330291748047, "logps/chosen": -1.3368079662322998, "logps/rejected": -2.3148045539855957, "loss": 1.3692, "nll_loss": 1.341727614402771, "rewards/accuracies": 1.0, "rewards/chosen": -0.1336808055639267, "rewards/margins": 0.09779965132474899, "rewards/rejected": -0.2314804643392563, "step": 612 }, { "epoch": 0.9661150512214342, "grad_norm": 0.1790483146905899, "learning_rate": 2.4710322067246607e-06, "log_odds_chosen": 0.9041817784309387, "log_odds_ratio": -0.34515616297721863, "logits/chosen": -0.31219547986984253, "logits/rejected": -1.0100858211517334, "logps/chosen": -1.4692482948303223, "logps/rejected": -2.2232794761657715, "loss": 1.5011, "nll_loss": 1.4666236639022827, "rewards/accuracies": 1.0, "rewards/chosen": -0.14692482352256775, "rewards/margins": 0.0754031166434288, "rewards/rejected": -0.22232794761657715, "step": 613 }, { "epoch": 0.9676910953506698, "grad_norm": 0.18665866553783417, "learning_rate": 2.465687965201283e-06, "log_odds_chosen": 0.9689115285873413, "log_odds_ratio": -0.3286559581756592, "logits/chosen": -0.3585508167743683, "logits/rejected": -1.103761911392212, "logps/chosen": -1.370980143547058, "logps/rejected": -2.169504165649414, "loss": 1.43, "nll_loss": 1.3971062898635864, "rewards/accuracies": 1.0, "rewards/chosen": -0.1370980143547058, "rewards/margins": 0.0798523798584938, "rewards/rejected": -0.2169504016637802, "step": 614 }, { "epoch": 0.9692671394799054, "grad_norm": 0.19198034703731537, "learning_rate": 2.4603402056277577e-06, "log_odds_chosen": 0.9163122177124023, "log_odds_ratio": -0.34312671422958374, "logits/chosen": -0.4962213635444641, "logits/rejected": -1.010047435760498, "logps/chosen": -1.4312962293624878, "logps/rejected": -2.1902687549591064, "loss": 1.4866, "nll_loss": 1.4523093700408936, "rewards/accuracies": 1.0, "rewards/chosen": -0.1431296020746231, "rewards/margins": 0.07589725404977798, "rewards/rejected": -0.2190268635749817, "step": 615 }, { "epoch": 0.970843183609141, "grad_norm": 0.20627878606319427, "learning_rate": 2.454988968403854e-06, "log_odds_chosen": 0.7183381915092468, "log_odds_ratio": -0.3999221622943878, "logits/chosen": -0.34786874055862427, "logits/rejected": -1.08138108253479, "logps/chosen": -1.4816724061965942, "logps/rejected": -2.0751776695251465, "loss": 1.5148, "nll_loss": 1.4748446941375732, "rewards/accuracies": 1.0, "rewards/chosen": -0.14816723763942719, "rewards/margins": 0.059350527822971344, "rewards/rejected": -0.20751777291297913, "step": 616 }, { "epoch": 0.9724192277383766, "grad_norm": 0.19011566042900085, "learning_rate": 2.4496342939556133e-06, "log_odds_chosen": 0.9232086539268494, "log_odds_ratio": -0.3415546417236328, "logits/chosen": -0.2815484404563904, "logits/rejected": -0.9943175315856934, "logps/chosen": -1.4415873289108276, "logps/rejected": -2.209988832473755, "loss": 1.4903, "nll_loss": 1.4561052322387695, "rewards/accuracies": 1.0, "rewards/chosen": -0.144158735871315, "rewards/margins": 0.07684013247489929, "rewards/rejected": -0.2209988832473755, "step": 617 }, { "epoch": 0.9739952718676123, "grad_norm": 0.20069187879562378, "learning_rate": 2.444276222735043e-06, "log_odds_chosen": 0.9212395548820496, "log_odds_ratio": -0.33747851848602295, "logits/chosen": -0.4475453794002533, "logits/rejected": -1.1382890939712524, "logps/chosen": -1.406221866607666, "logps/rejected": -2.1670939922332764, "loss": 1.4749, "nll_loss": 1.4411038160324097, "rewards/accuracies": 1.0, "rewards/chosen": -0.14062218368053436, "rewards/margins": 0.07608722895383835, "rewards/rejected": -0.21670940518379211, "step": 618 }, { "epoch": 0.9755713159968479, "grad_norm": 0.19539254903793335, "learning_rate": 2.4389147952198127e-06, "log_odds_chosen": 0.9214218258857727, "log_odds_ratio": -0.3395494818687439, "logits/chosen": -0.48484835028648376, "logits/rejected": -1.0643705129623413, "logps/chosen": -1.449306607246399, "logps/rejected": -2.216517686843872, "loss": 1.4759, "nll_loss": 1.4419844150543213, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449306607246399, "rewards/margins": 0.07672111690044403, "rewards/rejected": -0.22165179252624512, "step": 619 }, { "epoch": 0.9771473601260835, "grad_norm": 0.18427987396717072, "learning_rate": 2.433550051912946e-06, "log_odds_chosen": 1.017160415649414, "log_odds_ratio": -0.3145020306110382, "logits/chosen": -0.41202792525291443, "logits/rejected": -1.1166754961013794, "logps/chosen": -1.3387302160263062, "logps/rejected": -2.168827533721924, "loss": 1.3877, "nll_loss": 1.3562980890274048, "rewards/accuracies": 1.0, "rewards/chosen": -0.13387303054332733, "rewards/margins": 0.0830097496509552, "rewards/rejected": -0.21688278019428253, "step": 620 }, { "epoch": 0.9787234042553191, "grad_norm": 0.20985229313373566, "learning_rate": 2.4281820333425167e-06, "log_odds_chosen": 0.943986713886261, "log_odds_ratio": -0.34006890654563904, "logits/chosen": -0.4079330861568451, "logits/rejected": -1.031942367553711, "logps/chosen": -1.554274082183838, "logps/rejected": -2.3601934909820557, "loss": 1.5772, "nll_loss": 1.5432261228561401, "rewards/accuracies": 1.0, "rewards/chosen": -0.15542739629745483, "rewards/margins": 0.08059194684028625, "rewards/rejected": -0.2360193431377411, "step": 621 }, { "epoch": 0.9802994483845547, "grad_norm": 0.20018957555294037, "learning_rate": 2.42281078006134e-06, "log_odds_chosen": 1.1363192796707153, "log_odds_ratio": -0.28101885318756104, "logits/chosen": -0.43212658166885376, "logits/rejected": -1.2950831651687622, "logps/chosen": -1.536520004272461, "logps/rejected": -2.5101449489593506, "loss": 1.5696, "nll_loss": 1.5414538383483887, "rewards/accuracies": 1.0, "rewards/chosen": -0.15365199744701385, "rewards/margins": 0.09736253321170807, "rewards/rejected": -0.2510145306587219, "step": 622 }, { "epoch": 0.9818754925137904, "grad_norm": 0.21111983060836792, "learning_rate": 2.4174363326466703e-06, "log_odds_chosen": 0.8958853483200073, "log_odds_ratio": -0.35368579626083374, "logits/chosen": -0.4243263900279999, "logits/rejected": -1.3082011938095093, "logps/chosen": -1.4661465883255005, "logps/rejected": -2.2221148014068604, "loss": 1.5065, "nll_loss": 1.4710960388183594, "rewards/accuracies": 1.0, "rewards/chosen": -0.146614670753479, "rewards/margins": 0.07559681683778763, "rewards/rejected": -0.22221146523952484, "step": 623 }, { "epoch": 0.983451536643026, "grad_norm": 0.20371636748313904, "learning_rate": 2.41205873169989e-06, "log_odds_chosen": 0.8425401449203491, "log_odds_ratio": -0.3650510907173157, "logits/chosen": -0.34256961941719055, "logits/rejected": -0.8869901895523071, "logps/chosen": -1.4562673568725586, "logps/rejected": -2.1548898220062256, "loss": 1.5041, "nll_loss": 1.46755850315094, "rewards/accuracies": 1.0, "rewards/chosen": -0.1456267386674881, "rewards/margins": 0.0698622465133667, "rewards/rejected": -0.2154889851808548, "step": 624 }, { "epoch": 0.9850275807722616, "grad_norm": 0.18814495205879211, "learning_rate": 2.4066780178462058e-06, "log_odds_chosen": 1.0019807815551758, "log_odds_ratio": -0.327586829662323, "logits/chosen": -0.27672079205513, "logits/rejected": -1.0097203254699707, "logps/chosen": -1.4872336387634277, "logps/rejected": -2.3390111923217773, "loss": 1.5225, "nll_loss": 1.4897515773773193, "rewards/accuracies": 1.0, "rewards/chosen": -0.14872336387634277, "rewards/margins": 0.08517776429653168, "rewards/rejected": -0.23390114307403564, "step": 625 }, { "epoch": 0.9866036249014972, "grad_norm": 0.1942882239818573, "learning_rate": 2.40129423173434e-06, "log_odds_chosen": 0.9100236892700195, "log_odds_ratio": -0.3475266098976135, "logits/chosen": -0.3510172367095947, "logits/rejected": -1.1144819259643555, "logps/chosen": -1.4395970106124878, "logps/rejected": -2.195122241973877, "loss": 1.4951, "nll_loss": 1.4603657722473145, "rewards/accuracies": 1.0, "rewards/chosen": -0.14395970106124878, "rewards/margins": 0.0755525454878807, "rewards/rejected": -0.2195122390985489, "step": 626 }, { "epoch": 0.9881796690307328, "grad_norm": 0.21359357237815857, "learning_rate": 2.3959074140362274e-06, "log_odds_chosen": 0.9286834001541138, "log_odds_ratio": -0.3380786180496216, "logits/chosen": -0.4979647696018219, "logits/rejected": -1.0843995809555054, "logps/chosen": -1.4171068668365479, "logps/rejected": -2.1853513717651367, "loss": 1.4577, "nll_loss": 1.4239323139190674, "rewards/accuracies": 1.0, "rewards/chosen": -0.14171069860458374, "rewards/margins": 0.07682443410158157, "rewards/rejected": -0.21853512525558472, "step": 627 }, { "epoch": 0.9897557131599685, "grad_norm": 0.21708841621875763, "learning_rate": 2.3905176054467007e-06, "log_odds_chosen": 1.143787145614624, "log_odds_ratio": -0.2860889434814453, "logits/chosen": -0.47657203674316406, "logits/rejected": -1.1084681749343872, "logps/chosen": -1.402820348739624, "logps/rejected": -2.365870475769043, "loss": 1.4551, "nll_loss": 1.4265071153640747, "rewards/accuracies": 1.0, "rewards/chosen": -0.1402820497751236, "rewards/margins": 0.0963050127029419, "rewards/rejected": -0.2365870326757431, "step": 628 }, { "epoch": 0.9913317572892041, "grad_norm": 0.21842071413993835, "learning_rate": 2.3851248466831905e-06, "log_odds_chosen": 0.9840641617774963, "log_odds_ratio": -0.33589670062065125, "logits/chosen": -0.3594658076763153, "logits/rejected": -1.005044937133789, "logps/chosen": -1.4061402082443237, "logps/rejected": -2.22995924949646, "loss": 1.4409, "nll_loss": 1.4073493480682373, "rewards/accuracies": 1.0, "rewards/chosen": -0.14061403274536133, "rewards/margins": 0.08238191157579422, "rewards/rejected": -0.22299592196941376, "step": 629 }, { "epoch": 0.9929078014184397, "grad_norm": 0.1944458782672882, "learning_rate": 2.379729178485412e-06, "log_odds_chosen": 0.9093501567840576, "log_odds_ratio": -0.3428385853767395, "logits/chosen": -0.3921302855014801, "logits/rejected": -1.1238219738006592, "logps/chosen": -1.5065221786499023, "logps/rejected": -2.2746124267578125, "loss": 1.5453, "nll_loss": 1.5109822750091553, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506522297859192, "rewards/margins": 0.07680904865264893, "rewards/rejected": -0.22746124863624573, "step": 630 }, { "epoch": 0.9944838455476753, "grad_norm": 0.23414118587970734, "learning_rate": 2.3743306416150636e-06, "log_odds_chosen": 0.9369878172874451, "log_odds_ratio": -0.33358249068260193, "logits/chosen": -0.35220739245414734, "logits/rejected": -1.041300654411316, "logps/chosen": -1.463112235069275, "logps/rejected": -2.244133472442627, "loss": 1.5032, "nll_loss": 1.4698688983917236, "rewards/accuracies": 1.0, "rewards/chosen": -0.1463112235069275, "rewards/margins": 0.07810211926698685, "rewards/rejected": -0.22441333532333374, "step": 631 }, { "epoch": 0.996059889676911, "grad_norm": 0.19981160759925842, "learning_rate": 2.368929276855512e-06, "log_odds_chosen": 0.793098509311676, "log_odds_ratio": -0.37449538707733154, "logits/chosen": -0.3573625683784485, "logits/rejected": -1.0121304988861084, "logps/chosen": -1.565767765045166, "logps/rejected": -2.2371394634246826, "loss": 1.5824, "nll_loss": 1.5449631214141846, "rewards/accuracies": 1.0, "rewards/chosen": -0.15657676756381989, "rewards/margins": 0.06713716685771942, "rewards/rejected": -0.2237139344215393, "step": 632 }, { "epoch": 0.9976359338061466, "grad_norm": 0.20657162368297577, "learning_rate": 2.363525125011487e-06, "log_odds_chosen": 0.8742801547050476, "log_odds_ratio": -0.3614480197429657, "logits/chosen": -0.3909716010093689, "logits/rejected": -1.1499360799789429, "logps/chosen": -1.583171010017395, "logps/rejected": -2.3313608169555664, "loss": 1.6231, "nll_loss": 1.5869425535202026, "rewards/accuracies": 1.0, "rewards/chosen": -0.15831710398197174, "rewards/margins": 0.07481897622346878, "rewards/rejected": -0.23313607275485992, "step": 633 }, { "epoch": 0.9992119779353822, "grad_norm": 0.19248680770397186, "learning_rate": 2.3581182269087755e-06, "log_odds_chosen": 1.3030043840408325, "log_odds_ratio": -0.28635847568511963, "logits/chosen": -0.36139392852783203, "logits/rejected": -1.1686794757843018, "logps/chosen": -1.3925961256027222, "logps/rejected": -2.4817843437194824, "loss": 1.4414, "nll_loss": 1.4127696752548218, "rewards/accuracies": 1.0, "rewards/chosen": -0.13925962150096893, "rewards/margins": 0.10891882330179214, "rewards/rejected": -0.24817843735218048, "step": 634 }, { "epoch": 1.0, "grad_norm": 0.25399571657180786, "learning_rate": 2.3527086233939097e-06, "log_odds_chosen": 1.0740129947662354, "log_odds_ratio": -0.29763367772102356, "logits/chosen": -0.3640671372413635, "logits/rejected": -1.3394858837127686, "logps/chosen": -1.4742673635482788, "logps/rejected": -2.3840951919555664, "loss": 1.5183, "nll_loss": 1.4885731935501099, "rewards/accuracies": 1.0, "rewards/chosen": -0.14742672443389893, "rewards/margins": 0.09098279476165771, "rewards/rejected": -0.23840951919555664, "step": 635 }, { "epoch": 1.0015760441292356, "grad_norm": 0.2341020256280899, "learning_rate": 2.347296355333861e-06, "log_odds_chosen": 0.9331901669502258, "log_odds_ratio": -0.33497533202171326, "logits/chosen": -0.4703174829483032, "logits/rejected": -1.0244230031967163, "logps/chosen": -1.4870705604553223, "logps/rejected": -2.2731425762176514, "loss": 1.5256, "nll_loss": 1.4920775890350342, "rewards/accuracies": 1.0, "rewards/chosen": -0.1487070620059967, "rewards/margins": 0.07860719412565231, "rewards/rejected": -0.22731426358222961, "step": 636 }, { "epoch": 1.0031520882584712, "grad_norm": 0.20277716219425201, "learning_rate": 2.3418814636157283e-06, "log_odds_chosen": 0.8205506801605225, "log_odds_ratio": -0.3699876666069031, "logits/chosen": -0.4176909625530243, "logits/rejected": -1.1859151124954224, "logps/chosen": -1.5247273445129395, "logps/rejected": -2.211496353149414, "loss": 1.5542, "nll_loss": 1.5171836614608765, "rewards/accuracies": 1.0, "rewards/chosen": -0.15247273445129395, "rewards/margins": 0.0686768889427185, "rewards/rejected": -0.22114965319633484, "step": 637 }, { "epoch": 1.0047281323877069, "grad_norm": 0.20370414853096008, "learning_rate": 2.336463989146434e-06, "log_odds_chosen": 0.9526439309120178, "log_odds_ratio": -0.3336739242076874, "logits/chosen": -0.4233691096305847, "logits/rejected": -0.9744773507118225, "logps/chosen": -1.4175734519958496, "logps/rejected": -2.2113776206970215, "loss": 1.454, "nll_loss": 1.4206714630126953, "rewards/accuracies": 1.0, "rewards/chosen": -0.14175733923912048, "rewards/margins": 0.07938041538000107, "rewards/rejected": -0.22113776206970215, "step": 638 }, { "epoch": 1.0063041765169425, "grad_norm": 0.19846287369728088, "learning_rate": 2.3310439728524074e-06, "log_odds_chosen": 0.9999067783355713, "log_odds_ratio": -0.3160192668437958, "logits/chosen": -0.4435569941997528, "logits/rejected": -1.079360842704773, "logps/chosen": -1.4388091564178467, "logps/rejected": -2.2747817039489746, "loss": 1.487, "nll_loss": 1.4554084539413452, "rewards/accuracies": 1.0, "rewards/chosen": -0.1438809186220169, "rewards/margins": 0.08359724283218384, "rewards/rejected": -0.22747816145420074, "step": 639 }, { "epoch": 1.007880220646178, "grad_norm": 0.2016543298959732, "learning_rate": 2.325621455679286e-06, "log_odds_chosen": 1.130279541015625, "log_odds_ratio": -0.29136916995048523, "logits/chosen": -0.3651258051395416, "logits/rejected": -1.0471364259719849, "logps/chosen": -1.4179776906967163, "logps/rejected": -2.3699052333831787, "loss": 1.4515, "nll_loss": 1.4223867654800415, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417977660894394, "rewards/margins": 0.09519276767969131, "rewards/rejected": -0.2369905263185501, "step": 640 }, { "epoch": 1.0094562647754137, "grad_norm": 0.20089617371559143, "learning_rate": 2.3201964785915953e-06, "log_odds_chosen": 0.8257086277008057, "log_odds_ratio": -0.3646969497203827, "logits/chosen": -0.45511627197265625, "logits/rejected": -1.0016907453536987, "logps/chosen": -1.4859669208526611, "logps/rejected": -2.174349546432495, "loss": 1.5268, "nll_loss": 1.490374207496643, "rewards/accuracies": 1.0, "rewards/chosen": -0.14859670400619507, "rewards/margins": 0.06883824616670609, "rewards/rejected": -0.21743495762348175, "step": 641 }, { "epoch": 1.0110323089046493, "grad_norm": 0.20475664734840393, "learning_rate": 2.3147690825724457e-06, "log_odds_chosen": 0.7797296643257141, "log_odds_ratio": -0.38332077860832214, "logits/chosen": -0.3707115650177002, "logits/rejected": -0.9922008514404297, "logps/chosen": -1.532851219177246, "logps/rejected": -2.191016674041748, "loss": 1.5656, "nll_loss": 1.527304768562317, "rewards/accuracies": 1.0, "rewards/chosen": -0.15328513085842133, "rewards/margins": 0.06581656634807587, "rewards/rejected": -0.2191016972064972, "step": 642 }, { "epoch": 1.012608353033885, "grad_norm": 0.207964688539505, "learning_rate": 2.3093393086232226e-06, "log_odds_chosen": 1.0434799194335938, "log_odds_ratio": -0.3049820363521576, "logits/chosen": -0.36297136545181274, "logits/rejected": -1.0211080312728882, "logps/chosen": -1.3913506269454956, "logps/rejected": -2.2583303451538086, "loss": 1.4433, "nll_loss": 1.4127908945083618, "rewards/accuracies": 1.0, "rewards/chosen": -0.13913507759571075, "rewards/margins": 0.08669795840978622, "rewards/rejected": -0.22583304345607758, "step": 643 }, { "epoch": 1.0141843971631206, "grad_norm": 0.19038182497024536, "learning_rate": 2.3039071977632748e-06, "log_odds_chosen": 1.0444650650024414, "log_odds_ratio": -0.31638944149017334, "logits/chosen": -0.44605910778045654, "logits/rejected": -1.219096302986145, "logps/chosen": -1.4533777236938477, "logps/rejected": -2.3402957916259766, "loss": 1.4867, "nll_loss": 1.4550527334213257, "rewards/accuracies": 1.0, "rewards/chosen": -0.1453377604484558, "rewards/margins": 0.0886918157339096, "rewards/rejected": -0.2340295910835266, "step": 644 }, { "epoch": 1.0157604412923562, "grad_norm": 0.20148001611232758, "learning_rate": 2.2984727910296044e-06, "log_odds_chosen": 0.8372101187705994, "log_odds_ratio": -0.36413484811782837, "logits/chosen": -0.41543513536453247, "logits/rejected": -1.1079984903335571, "logps/chosen": -1.512190341949463, "logps/rejected": -2.217898368835449, "loss": 1.5646, "nll_loss": 1.5281473398208618, "rewards/accuracies": 1.0, "rewards/chosen": -0.15121904015541077, "rewards/margins": 0.07057081162929535, "rewards/rejected": -0.22178983688354492, "step": 645 }, { "epoch": 1.0173364854215918, "grad_norm": 0.21638479828834534, "learning_rate": 2.2930361294765594e-06, "log_odds_chosen": 0.9642891883850098, "log_odds_ratio": -0.3264886736869812, "logits/chosen": -0.3254181146621704, "logits/rejected": -1.217842936515808, "logps/chosen": -1.5167113542556763, "logps/rejected": -2.335024118423462, "loss": 1.5677, "nll_loss": 1.5350638628005981, "rewards/accuracies": 1.0, "rewards/chosen": -0.1516711413860321, "rewards/margins": 0.08183128386735916, "rewards/rejected": -0.23350241780281067, "step": 646 }, { "epoch": 1.0189125295508275, "grad_norm": 0.195595845580101, "learning_rate": 2.287597254175521e-06, "log_odds_chosen": 1.0821458101272583, "log_odds_ratio": -0.3104124665260315, "logits/chosen": -0.38989654183387756, "logits/rejected": -1.0959748029708862, "logps/chosen": -1.3808361291885376, "logps/rejected": -2.283047676086426, "loss": 1.4429, "nll_loss": 1.4118428230285645, "rewards/accuracies": 1.0, "rewards/chosen": -0.13808360695838928, "rewards/margins": 0.09022115916013718, "rewards/rejected": -0.22830477356910706, "step": 647 }, { "epoch": 1.020488573680063, "grad_norm": 0.20041058957576752, "learning_rate": 2.2821562062145938e-06, "log_odds_chosen": 1.1318691968917847, "log_odds_ratio": -0.29464760422706604, "logits/chosen": -0.3787132501602173, "logits/rejected": -1.2762706279754639, "logps/chosen": -1.5364724397659302, "logps/rejected": -2.5166966915130615, "loss": 1.5488, "nll_loss": 1.519294023513794, "rewards/accuracies": 1.0, "rewards/chosen": -0.15364722907543182, "rewards/margins": 0.09802241623401642, "rewards/rejected": -0.25166964530944824, "step": 648 }, { "epoch": 1.0220646178092987, "grad_norm": 0.21906161308288574, "learning_rate": 2.2767130266982967e-06, "log_odds_chosen": 0.8292859792709351, "log_odds_ratio": -0.3726491630077362, "logits/chosen": -0.4554440379142761, "logits/rejected": -0.9412230253219604, "logps/chosen": -1.4513643980026245, "logps/rejected": -2.141998767852783, "loss": 1.4973, "nll_loss": 1.4600082635879517, "rewards/accuracies": 1.0, "rewards/chosen": -0.14513644576072693, "rewards/margins": 0.06906343996524811, "rewards/rejected": -0.21419987082481384, "step": 649 }, { "epoch": 1.0236406619385343, "grad_norm": 0.19466465711593628, "learning_rate": 2.271267756747251e-06, "log_odds_chosen": 1.0175737142562866, "log_odds_ratio": -0.3189276158809662, "logits/chosen": -0.3954612612724304, "logits/rejected": -1.086215615272522, "logps/chosen": -1.4187949895858765, "logps/rejected": -2.267597198486328, "loss": 1.4501, "nll_loss": 1.4182374477386475, "rewards/accuracies": 1.0, "rewards/chosen": -0.14187949895858765, "rewards/margins": 0.08488023281097412, "rewards/rejected": -0.22675973176956177, "step": 650 }, { "epoch": 1.02521670606777, "grad_norm": 0.1900855451822281, "learning_rate": 2.265820437497871e-06, "log_odds_chosen": 0.8398513793945312, "log_odds_ratio": -0.3705929219722748, "logits/chosen": -0.3605864346027374, "logits/rejected": -0.9096065759658813, "logps/chosen": -1.4144604206085205, "logps/rejected": -2.109245538711548, "loss": 1.4759, "nll_loss": 1.4387967586517334, "rewards/accuracies": 1.0, "rewards/chosen": -0.1414460390806198, "rewards/margins": 0.06947849690914154, "rewards/rejected": -0.21092455089092255, "step": 651 }, { "epoch": 1.0267927501970056, "grad_norm": 0.2057836800813675, "learning_rate": 2.26037111010205e-06, "log_odds_chosen": 0.8601279258728027, "log_odds_ratio": -0.35581517219543457, "logits/chosen": -0.37350016832351685, "logits/rejected": -1.0680174827575684, "logps/chosen": -1.4062732458114624, "logps/rejected": -2.112203598022461, "loss": 1.4523, "nll_loss": 1.4167579412460327, "rewards/accuracies": 1.0, "rewards/chosen": -0.14062732458114624, "rewards/margins": 0.07059301435947418, "rewards/rejected": -0.21122033894062042, "step": 652 }, { "epoch": 1.0283687943262412, "grad_norm": 0.19696998596191406, "learning_rate": 2.254919815726856e-06, "log_odds_chosen": 0.8508312702178955, "log_odds_ratio": -0.36220934987068176, "logits/chosen": -0.4131045341491699, "logits/rejected": -1.0494024753570557, "logps/chosen": -1.5010753870010376, "logps/rejected": -2.2163710594177246, "loss": 1.5548, "nll_loss": 1.51859712600708, "rewards/accuracies": 1.0, "rewards/chosen": -0.15010753273963928, "rewards/margins": 0.0715295821428299, "rewards/rejected": -0.22163712978363037, "step": 653 }, { "epoch": 1.0299448384554768, "grad_norm": 0.2088329941034317, "learning_rate": 2.2494665955542127e-06, "log_odds_chosen": 1.1874462366104126, "log_odds_ratio": -0.278756320476532, "logits/chosen": -0.5181041955947876, "logits/rejected": -1.097906470298767, "logps/chosen": -1.4152716398239136, "logps/rejected": -2.4139039516448975, "loss": 1.4532, "nll_loss": 1.4253708124160767, "rewards/accuracies": 1.0, "rewards/chosen": -0.14152716100215912, "rewards/margins": 0.09986322373151779, "rewards/rejected": -0.2413903772830963, "step": 654 }, { "epoch": 1.0315208825847124, "grad_norm": 0.22889749705791473, "learning_rate": 2.2440114907805942e-06, "log_odds_chosen": 0.9439932107925415, "log_odds_ratio": -0.3342151343822479, "logits/chosen": -0.5031033754348755, "logits/rejected": -1.068403959274292, "logps/chosen": -1.5435153245925903, "logps/rejected": -2.344334840774536, "loss": 1.5613, "nll_loss": 1.5279016494750977, "rewards/accuracies": 1.0, "rewards/chosen": -0.15435153245925903, "rewards/margins": 0.08008193969726562, "rewards/rejected": -0.23443348705768585, "step": 655 }, { "epoch": 1.033096926713948, "grad_norm": 0.21637524664402008, "learning_rate": 2.2385545426167112e-06, "log_odds_chosen": 0.9159579873085022, "log_odds_ratio": -0.3429696559906006, "logits/chosen": -0.33303484320640564, "logits/rejected": -1.233472228050232, "logps/chosen": -1.525570273399353, "logps/rejected": -2.296790361404419, "loss": 1.5694, "nll_loss": 1.5350866317749023, "rewards/accuracies": 1.0, "rewards/chosen": -0.15255701541900635, "rewards/margins": 0.0771220251917839, "rewards/rejected": -0.22967906296253204, "step": 656 }, { "epoch": 1.0346729708431837, "grad_norm": 0.2274521440267563, "learning_rate": 2.2330957922872016e-06, "log_odds_chosen": 1.2828874588012695, "log_odds_ratio": -0.2576564848423004, "logits/chosen": -0.4802379012107849, "logits/rejected": -1.2480734586715698, "logps/chosen": -1.4443159103393555, "logps/rejected": -2.5413949489593506, "loss": 1.4802, "nll_loss": 1.454451560974121, "rewards/accuracies": 1.0, "rewards/chosen": -0.14443160593509674, "rewards/margins": 0.10970787703990936, "rewards/rejected": -0.2541394829750061, "step": 657 }, { "epoch": 1.0362490149724193, "grad_norm": 0.18611596524715424, "learning_rate": 2.2276352810303166e-06, "log_odds_chosen": 1.196053147315979, "log_odds_ratio": -0.27312737703323364, "logits/chosen": -0.42611566185951233, "logits/rejected": -1.1597315073013306, "logps/chosen": -1.348031759262085, "logps/rejected": -2.3457837104797363, "loss": 1.3965, "nll_loss": 1.369180679321289, "rewards/accuracies": 1.0, "rewards/chosen": -0.1348031759262085, "rewards/margins": 0.09977522492408752, "rewards/rejected": -0.23457840085029602, "step": 658 }, { "epoch": 1.037825059101655, "grad_norm": 0.20620247721672058, "learning_rate": 2.222173050097609e-06, "log_odds_chosen": 0.979144275188446, "log_odds_ratio": -0.32210391759872437, "logits/chosen": -0.4380725622177124, "logits/rejected": -1.1677042245864868, "logps/chosen": -1.4617573022842407, "logps/rejected": -2.2830255031585693, "loss": 1.5061, "nll_loss": 1.4739348888397217, "rewards/accuracies": 1.0, "rewards/chosen": -0.14617572724819183, "rewards/margins": 0.08212681114673615, "rewards/rejected": -0.22830253839492798, "step": 659 }, { "epoch": 1.0394011032308905, "grad_norm": 0.21873502433300018, "learning_rate": 2.2167091407536272e-06, "log_odds_chosen": 0.9612632989883423, "log_odds_ratio": -0.327973872423172, "logits/chosen": -0.4479270279407501, "logits/rejected": -1.0037411451339722, "logps/chosen": -1.4820947647094727, "logps/rejected": -2.2882509231567383, "loss": 1.5329, "nll_loss": 1.5001122951507568, "rewards/accuracies": 1.0, "rewards/chosen": -0.14820946753025055, "rewards/margins": 0.08061562478542328, "rewards/rejected": -0.22882507741451263, "step": 660 }, { "epoch": 1.0409771473601261, "grad_norm": 0.20383982360363007, "learning_rate": 2.211243594275594e-06, "log_odds_chosen": 0.925430178642273, "log_odds_ratio": -0.3432849049568176, "logits/chosen": -0.4625731408596039, "logits/rejected": -0.9524559378623962, "logps/chosen": -1.505110740661621, "logps/rejected": -2.285020112991333, "loss": 1.5257, "nll_loss": 1.4913411140441895, "rewards/accuracies": 1.0, "rewards/chosen": -0.15051107108592987, "rewards/margins": 0.07799092680215836, "rewards/rejected": -0.22850200533866882, "step": 661 }, { "epoch": 1.0425531914893618, "grad_norm": 0.20724867284297943, "learning_rate": 2.2057764519531034e-06, "log_odds_chosen": 1.0101503133773804, "log_odds_ratio": -0.31730180978775024, "logits/chosen": -0.40949714183807373, "logits/rejected": -1.1010946035385132, "logps/chosen": -1.4974428415298462, "logps/rejected": -2.3540596961975098, "loss": 1.5187, "nll_loss": 1.4869714975357056, "rewards/accuracies": 1.0, "rewards/chosen": -0.14974427223205566, "rewards/margins": 0.08566167950630188, "rewards/rejected": -0.23540595173835754, "step": 662 }, { "epoch": 1.0441292356185974, "grad_norm": 0.21524253487586975, "learning_rate": 2.2003077550878053e-06, "log_odds_chosen": 1.1178256273269653, "log_odds_ratio": -0.2855745255947113, "logits/chosen": -0.434054970741272, "logits/rejected": -1.1269792318344116, "logps/chosen": -1.4879037141799927, "logps/rejected": -2.4403305053710938, "loss": 1.5034, "nll_loss": 1.4748553037643433, "rewards/accuracies": 1.0, "rewards/chosen": -0.1487903892993927, "rewards/margins": 0.09524266421794891, "rewards/rejected": -0.2440330535173416, "step": 663 }, { "epoch": 1.045705279747833, "grad_norm": 0.21574142575263977, "learning_rate": 2.1948375449930915e-06, "log_odds_chosen": 0.9753544330596924, "log_odds_ratio": -0.32818540930747986, "logits/chosen": -0.43173885345458984, "logits/rejected": -1.1743507385253906, "logps/chosen": -1.5300014019012451, "logps/rejected": -2.3634021282196045, "loss": 1.5536, "nll_loss": 1.5207420587539673, "rewards/accuracies": 1.0, "rewards/chosen": -0.153000146150589, "rewards/margins": 0.08334007114171982, "rewards/rejected": -0.23634019494056702, "step": 664 }, { "epoch": 1.0472813238770686, "grad_norm": 0.19998648762702942, "learning_rate": 2.189365862993787e-06, "log_odds_chosen": 1.0352458953857422, "log_odds_ratio": -0.31010156869888306, "logits/chosen": -0.36734890937805176, "logits/rejected": -1.0934194326400757, "logps/chosen": -1.5144354104995728, "logps/rejected": -2.396885871887207, "loss": 1.5516, "nll_loss": 1.5205800533294678, "rewards/accuracies": 1.0, "rewards/chosen": -0.15144354104995728, "rewards/margins": 0.08824507147073746, "rewards/rejected": -0.23968861997127533, "step": 665 }, { "epoch": 1.0488573680063042, "grad_norm": 0.20791448652744293, "learning_rate": 2.1838927504258354e-06, "log_odds_chosen": 1.0235421657562256, "log_odds_ratio": -0.3121985197067261, "logits/chosen": -0.40192878246307373, "logits/rejected": -1.1209992170333862, "logps/chosen": -1.4703030586242676, "logps/rejected": -2.3345675468444824, "loss": 1.501, "nll_loss": 1.4697712659835815, "rewards/accuracies": 1.0, "rewards/chosen": -0.147030308842659, "rewards/margins": 0.08642645180225372, "rewards/rejected": -0.2334567755460739, "step": 666 }, { "epoch": 1.0504334121355399, "grad_norm": 0.23455047607421875, "learning_rate": 2.178418248635988e-06, "log_odds_chosen": 0.9050512313842773, "log_odds_ratio": -0.34915196895599365, "logits/chosen": -0.42459243535995483, "logits/rejected": -0.9065911769866943, "logps/chosen": -1.537688970565796, "logps/rejected": -2.3091390132904053, "loss": 1.6026, "nll_loss": 1.567721962928772, "rewards/accuracies": 1.0, "rewards/chosen": -0.1537688970565796, "rewards/margins": 0.07714502513408661, "rewards/rejected": -0.2309139221906662, "step": 667 }, { "epoch": 1.0520094562647755, "grad_norm": 0.20443259179592133, "learning_rate": 2.1729423989814912e-06, "log_odds_chosen": 0.896743655204773, "log_odds_ratio": -0.34869539737701416, "logits/chosen": -0.4670064151287079, "logits/rejected": -1.0372049808502197, "logps/chosen": -1.4642890691757202, "logps/rejected": -2.216113567352295, "loss": 1.4891, "nll_loss": 1.4542030096054077, "rewards/accuracies": 1.0, "rewards/chosen": -0.1464288979768753, "rewards/margins": 0.0751824826002121, "rewards/rejected": -0.2216113805770874, "step": 668 }, { "epoch": 1.053585500394011, "grad_norm": 0.20364665985107422, "learning_rate": 2.167465242829774e-06, "log_odds_chosen": 0.9622216820716858, "log_odds_ratio": -0.33110612630844116, "logits/chosen": -0.45987939834594727, "logits/rejected": -1.053950548171997, "logps/chosen": -1.5159955024719238, "logps/rejected": -2.3358848094940186, "loss": 1.5487, "nll_loss": 1.515584945678711, "rewards/accuracies": 1.0, "rewards/chosen": -0.15159955620765686, "rewards/margins": 0.08198892325162888, "rewards/rejected": -0.23358847200870514, "step": 669 }, { "epoch": 1.0551615445232467, "grad_norm": 0.20203962922096252, "learning_rate": 2.1619868215581343e-06, "log_odds_chosen": 1.0443751811981201, "log_odds_ratio": -0.30652064085006714, "logits/chosen": -0.4601061940193176, "logits/rejected": -1.0360335111618042, "logps/chosen": -1.442453384399414, "logps/rejected": -2.3161885738372803, "loss": 1.4966, "nll_loss": 1.4659488201141357, "rewards/accuracies": 1.0, "rewards/chosen": -0.14424534142017365, "rewards/margins": 0.0873735174536705, "rewards/rejected": -0.23161886632442474, "step": 670 }, { "epoch": 1.0567375886524824, "grad_norm": 0.2019282579421997, "learning_rate": 2.1565071765534287e-06, "log_odds_chosen": 0.9411356449127197, "log_odds_ratio": -0.3352993130683899, "logits/chosen": -0.39819008111953735, "logits/rejected": -1.1410562992095947, "logps/chosen": -1.5189672708511353, "logps/rejected": -2.3168811798095703, "loss": 1.5487, "nll_loss": 1.515213131904602, "rewards/accuracies": 1.0, "rewards/chosen": -0.15189671516418457, "rewards/margins": 0.07979140430688858, "rewards/rejected": -0.23168812692165375, "step": 671 }, { "epoch": 1.058313632781718, "grad_norm": 0.20654666423797607, "learning_rate": 2.1510263492117574e-06, "log_odds_chosen": 0.7844110727310181, "log_odds_ratio": -0.380917489528656, "logits/chosen": -0.39263033866882324, "logits/rejected": -0.9207086563110352, "logps/chosen": -1.5072509050369263, "logps/rejected": -2.165677070617676, "loss": 1.5587, "nll_loss": 1.5206021070480347, "rewards/accuracies": 1.0, "rewards/chosen": -0.1507250815629959, "rewards/margins": 0.0658426284790039, "rewards/rejected": -0.216567724943161, "step": 672 }, { "epoch": 1.0598896769109536, "grad_norm": 0.19343319535255432, "learning_rate": 2.1455443809381535e-06, "log_odds_chosen": 1.131264090538025, "log_odds_ratio": -0.29176151752471924, "logits/chosen": -0.4694420397281647, "logits/rejected": -1.2475625276565552, "logps/chosen": -1.437273383140564, "logps/rejected": -2.392691135406494, "loss": 1.4736, "nll_loss": 1.44437837600708, "rewards/accuracies": 1.0, "rewards/chosen": -0.1437273472547531, "rewards/margins": 0.09554175287485123, "rewards/rejected": -0.23926910758018494, "step": 673 }, { "epoch": 1.0614657210401892, "grad_norm": 0.20887427031993866, "learning_rate": 2.1400613131462697e-06, "log_odds_chosen": 1.0344176292419434, "log_odds_ratio": -0.31027257442474365, "logits/chosen": -0.3992098271846771, "logits/rejected": -1.1831079721450806, "logps/chosen": -1.5804861783981323, "logps/rejected": -2.4737448692321777, "loss": 1.6, "nll_loss": 1.5689915418624878, "rewards/accuracies": 1.0, "rewards/chosen": -0.1580485999584198, "rewards/margins": 0.08932587504386902, "rewards/rejected": -0.24737448990345, "step": 674 }, { "epoch": 1.0630417651694248, "grad_norm": 0.20254096388816833, "learning_rate": 2.1345771872580628e-06, "log_odds_chosen": 1.0729328393936157, "log_odds_ratio": -0.31691911816596985, "logits/chosen": -0.44671961665153503, "logits/rejected": -1.0629268884658813, "logps/chosen": -1.4917783737182617, "logps/rejected": -2.413780689239502, "loss": 1.5157, "nll_loss": 1.4839630126953125, "rewards/accuracies": 1.0, "rewards/chosen": -0.14917783439159393, "rewards/margins": 0.09220023453235626, "rewards/rejected": -0.2413780689239502, "step": 675 }, { "epoch": 1.0646178092986605, "grad_norm": 0.22033196687698364, "learning_rate": 2.1290920447034846e-06, "log_odds_chosen": 0.9962490200996399, "log_odds_ratio": -0.3179359436035156, "logits/chosen": -0.4028359651565552, "logits/rejected": -1.0985596179962158, "logps/chosen": -1.46484375, "logps/rejected": -2.301034927368164, "loss": 1.502, "nll_loss": 1.4701924324035645, "rewards/accuracies": 1.0, "rewards/chosen": -0.146484375, "rewards/margins": 0.0836191400885582, "rewards/rejected": -0.2301035225391388, "step": 676 }, { "epoch": 1.066193853427896, "grad_norm": 0.2044062316417694, "learning_rate": 2.1236059269201683e-06, "log_odds_chosen": 0.8381149768829346, "log_odds_ratio": -0.36613181233406067, "logits/chosen": -0.4779806435108185, "logits/rejected": -1.0318169593811035, "logps/chosen": -1.5059196949005127, "logps/rejected": -2.211935520172119, "loss": 1.5274, "nll_loss": 1.4907872676849365, "rewards/accuracies": 1.0, "rewards/chosen": -0.15059196949005127, "rewards/margins": 0.07060159742832184, "rewards/rejected": -0.2211935669183731, "step": 677 }, { "epoch": 1.0677698975571317, "grad_norm": 0.2638196051120758, "learning_rate": 2.1181188753531124e-06, "log_odds_chosen": 0.9892770051956177, "log_odds_ratio": -0.32067570090293884, "logits/chosen": -0.5230172276496887, "logits/rejected": -1.1027065515518188, "logps/chosen": -1.6359044313430786, "logps/rejected": -2.4953019618988037, "loss": 1.6528, "nll_loss": 1.6207354068756104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1635904610157013, "rewards/margins": 0.08593976497650146, "rewards/rejected": -0.24953021109104156, "step": 678 }, { "epoch": 1.0693459416863673, "grad_norm": 0.20186831057071686, "learning_rate": 2.112630931454371e-06, "log_odds_chosen": 1.0757050514221191, "log_odds_ratio": -0.2998811602592468, "logits/chosen": -0.42896658182144165, "logits/rejected": -1.0567528009414673, "logps/chosen": -1.4061732292175293, "logps/rejected": -2.3023366928100586, "loss": 1.4503, "nll_loss": 1.4202725887298584, "rewards/accuracies": 1.0, "rewards/chosen": -0.14061731100082397, "rewards/margins": 0.0896163359284401, "rewards/rejected": -0.23023366928100586, "step": 679 }, { "epoch": 1.070921985815603, "grad_norm": 0.19069170951843262, "learning_rate": 2.10714213668274e-06, "log_odds_chosen": 0.95136958360672, "log_odds_ratio": -0.32961148023605347, "logits/chosen": -0.4347802400588989, "logits/rejected": -0.9980989694595337, "logps/chosen": -1.453007459640503, "logps/rejected": -2.249767541885376, "loss": 1.5009, "nll_loss": 1.4679436683654785, "rewards/accuracies": 1.0, "rewards/chosen": -0.1453007459640503, "rewards/margins": 0.07967600226402283, "rewards/rejected": -0.22497676312923431, "step": 680 }, { "epoch": 1.0724980299448386, "grad_norm": 0.26213082671165466, "learning_rate": 2.1016525325034403e-06, "log_odds_chosen": 1.1897566318511963, "log_odds_ratio": -0.2947249412536621, "logits/chosen": -0.4331728518009186, "logits/rejected": -1.0856270790100098, "logps/chosen": -1.4654250144958496, "logps/rejected": -2.4852752685546875, "loss": 1.4944, "nll_loss": 1.4649194478988647, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465425044298172, "rewards/margins": 0.10198501497507095, "rewards/rejected": -0.24852752685546875, "step": 681 }, { "epoch": 1.074074074074074, "grad_norm": 0.19874443113803864, "learning_rate": 2.096162160387811e-06, "log_odds_chosen": 0.8557685017585754, "log_odds_ratio": -0.3608901798725128, "logits/chosen": -0.3798831105232239, "logits/rejected": -1.0774297714233398, "logps/chosen": -1.3996827602386475, "logps/rejected": -2.094787836074829, "loss": 1.4587, "nll_loss": 1.422644019126892, "rewards/accuracies": 1.0, "rewards/chosen": -0.13996827602386475, "rewards/margins": 0.06951050460338593, "rewards/rejected": -0.20947878062725067, "step": 682 }, { "epoch": 1.0756501182033098, "grad_norm": 0.18426741659641266, "learning_rate": 2.09067106181299e-06, "log_odds_chosen": 1.0730984210968018, "log_odds_ratio": -0.30270951986312866, "logits/chosen": -0.4391212463378906, "logits/rejected": -1.0265476703643799, "logps/chosen": -1.34595787525177, "logps/rejected": -2.2293782234191895, "loss": 1.3906, "nll_loss": 1.360366940498352, "rewards/accuracies": 1.0, "rewards/chosen": -0.13459578156471252, "rewards/margins": 0.08834204822778702, "rewards/rejected": -0.22293783724308014, "step": 683 }, { "epoch": 1.0772261623325452, "grad_norm": 0.19141462445259094, "learning_rate": 2.0851792782616052e-06, "log_odds_chosen": 1.2230709791183472, "log_odds_ratio": -0.2712379992008209, "logits/chosen": -0.3514632284641266, "logits/rejected": -1.1503692865371704, "logps/chosen": -1.4395736455917358, "logps/rejected": -2.4825069904327393, "loss": 1.4782, "nll_loss": 1.451075792312622, "rewards/accuracies": 1.0, "rewards/chosen": -0.14395737648010254, "rewards/margins": 0.10429331660270691, "rewards/rejected": -0.24825069308280945, "step": 684 }, { "epoch": 1.078802206461781, "grad_norm": 0.20022742450237274, "learning_rate": 2.0796868512214576e-06, "log_odds_chosen": 1.0932321548461914, "log_odds_ratio": -0.30648404359817505, "logits/chosen": -0.5324531197547913, "logits/rejected": -1.0599697828292847, "logps/chosen": -1.432153582572937, "logps/rejected": -2.3527703285217285, "loss": 1.4856, "nll_loss": 1.4549826383590698, "rewards/accuracies": 1.0, "rewards/chosen": -0.1432153582572937, "rewards/margins": 0.09206165373325348, "rewards/rejected": -0.23527702689170837, "step": 685 }, { "epoch": 1.0803782505910164, "grad_norm": 0.19438046216964722, "learning_rate": 2.0741938221852103e-06, "log_odds_chosen": 1.0255165100097656, "log_odds_ratio": -0.31384575366973877, "logits/chosen": -0.4031597375869751, "logits/rejected": -1.1125953197479248, "logps/chosen": -1.4304150342941284, "logps/rejected": -2.29276704788208, "loss": 1.4772, "nll_loss": 1.4457709789276123, "rewards/accuracies": 1.0, "rewards/chosen": -0.1430414915084839, "rewards/margins": 0.08623519539833069, "rewards/rejected": -0.22927670180797577, "step": 686 }, { "epoch": 1.081954294720252, "grad_norm": 0.23257215321063995, "learning_rate": 2.0687002326500743e-06, "log_odds_chosen": 1.0908153057098389, "log_odds_ratio": -0.2946583032608032, "logits/chosen": -0.4947584867477417, "logits/rejected": -1.1951780319213867, "logps/chosen": -1.435703992843628, "logps/rejected": -2.3544812202453613, "loss": 1.47, "nll_loss": 1.4405105113983154, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435704082250595, "rewards/margins": 0.09187769889831543, "rewards/rejected": -0.23544810712337494, "step": 687 }, { "epoch": 1.0835303388494877, "grad_norm": 0.25616809725761414, "learning_rate": 2.0632061241174942e-06, "log_odds_chosen": 1.0912388563156128, "log_odds_ratio": -0.30473601818084717, "logits/chosen": -0.48068806529045105, "logits/rejected": -1.1367725133895874, "logps/chosen": -1.4829750061035156, "logps/rejected": -2.412815809249878, "loss": 1.5099, "nll_loss": 1.479424238204956, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482975035905838, "rewards/margins": 0.09298409521579742, "rewards/rejected": -0.24128161370754242, "step": 688 }, { "epoch": 1.0851063829787233, "grad_norm": 0.2075040638446808, "learning_rate": 2.0577115380928364e-06, "log_odds_chosen": 1.049268126487732, "log_odds_ratio": -0.30988502502441406, "logits/chosen": -0.4560019373893738, "logits/rejected": -1.0982509851455688, "logps/chosen": -1.4558194875717163, "logps/rejected": -2.345250129699707, "loss": 1.4804, "nll_loss": 1.449371099472046, "rewards/accuracies": 1.0, "rewards/chosen": -0.14558196067810059, "rewards/margins": 0.08894304931163788, "rewards/rejected": -0.23452502489089966, "step": 689 }, { "epoch": 1.086682427107959, "grad_norm": 0.20016005635261536, "learning_rate": 2.052216516085073e-06, "log_odds_chosen": 0.9001410007476807, "log_odds_ratio": -0.3552258610725403, "logits/chosen": -0.4590488374233246, "logits/rejected": -1.1044946908950806, "logps/chosen": -1.4092308282852173, "logps/rejected": -2.160625457763672, "loss": 1.4608, "nll_loss": 1.4252573251724243, "rewards/accuracies": 1.0, "rewards/chosen": -0.14092308282852173, "rewards/margins": 0.07513947784900665, "rewards/rejected": -0.21606257557868958, "step": 690 }, { "epoch": 1.0882584712371945, "grad_norm": 0.21634356677532196, "learning_rate": 2.0467210996064707e-06, "log_odds_chosen": 0.882314920425415, "log_odds_ratio": -0.35628363490104675, "logits/chosen": -0.5301987528800964, "logits/rejected": -0.7669265866279602, "logps/chosen": -1.4660329818725586, "logps/rejected": -2.2064380645751953, "loss": 1.4966, "nll_loss": 1.4610079526901245, "rewards/accuracies": 1.0, "rewards/chosen": -0.1466033011674881, "rewards/margins": 0.07404051721096039, "rewards/rejected": -0.2206438183784485, "step": 691 }, { "epoch": 1.0898345153664302, "grad_norm": 0.1884789764881134, "learning_rate": 2.0412253301722774e-06, "log_odds_chosen": 1.099226713180542, "log_odds_ratio": -0.2903672754764557, "logits/chosen": -0.4105028510093689, "logits/rejected": -1.2399115562438965, "logps/chosen": -1.5177373886108398, "logps/rejected": -2.458678960800171, "loss": 1.5506, "nll_loss": 1.5215407609939575, "rewards/accuracies": 1.0, "rewards/chosen": -0.15177372097969055, "rewards/margins": 0.0940941646695137, "rewards/rejected": -0.24586789309978485, "step": 692 }, { "epoch": 1.0914105594956658, "grad_norm": 0.19985778629779816, "learning_rate": 2.0357292493004044e-06, "log_odds_chosen": 1.05976402759552, "log_odds_ratio": -0.3027058243751526, "logits/chosen": -0.41368889808654785, "logits/rejected": -1.1152468919754028, "logps/chosen": -1.4381895065307617, "logps/rejected": -2.3304762840270996, "loss": 1.46, "nll_loss": 1.429701328277588, "rewards/accuracies": 1.0, "rewards/chosen": -0.1438189446926117, "rewards/margins": 0.08922870457172394, "rewards/rejected": -0.23304764926433563, "step": 693 }, { "epoch": 1.0929866036249014, "grad_norm": 0.19305787980556488, "learning_rate": 2.0302328985111193e-06, "log_odds_chosen": 1.1289278268814087, "log_odds_ratio": -0.3005900979042053, "logits/chosen": -0.35483166575431824, "logits/rejected": -1.0899856090545654, "logps/chosen": -1.399819016456604, "logps/rejected": -2.3541691303253174, "loss": 1.4489, "nll_loss": 1.4188140630722046, "rewards/accuracies": 1.0, "rewards/chosen": -0.13998189568519592, "rewards/margins": 0.09543503075838089, "rewards/rejected": -0.2354169338941574, "step": 694 }, { "epoch": 1.094562647754137, "grad_norm": 0.219266876578331, "learning_rate": 2.0247363193267256e-06, "log_odds_chosen": 1.0104608535766602, "log_odds_ratio": -0.3161815106868744, "logits/chosen": -0.3792533278465271, "logits/rejected": -1.1410810947418213, "logps/chosen": -1.5047600269317627, "logps/rejected": -2.364670515060425, "loss": 1.5311, "nll_loss": 1.4994986057281494, "rewards/accuracies": 1.0, "rewards/chosen": -0.15047602355480194, "rewards/margins": 0.08599104732275009, "rewards/rejected": -0.23646706342697144, "step": 695 }, { "epoch": 1.0961386918833727, "grad_norm": 0.21849985420703888, "learning_rate": 2.019239553271255e-06, "log_odds_chosen": 1.134856939315796, "log_odds_ratio": -0.28787127137184143, "logits/chosen": -0.4282324016094208, "logits/rejected": -1.1290134191513062, "logps/chosen": -1.443998098373413, "logps/rejected": -2.4043867588043213, "loss": 1.4818, "nll_loss": 1.4529818296432495, "rewards/accuracies": 1.0, "rewards/chosen": -0.14439982175827026, "rewards/margins": 0.09603887051343918, "rewards/rejected": -0.24043869972229004, "step": 696 }, { "epoch": 1.0977147360126083, "grad_norm": 0.20203694701194763, "learning_rate": 2.0137426418701488e-06, "log_odds_chosen": 1.1506050825119019, "log_odds_ratio": -0.287505567073822, "logits/chosen": -0.39062759280204773, "logits/rejected": -1.1472235918045044, "logps/chosen": -1.4531985521316528, "logps/rejected": -2.4368832111358643, "loss": 1.4893, "nll_loss": 1.4605103731155396, "rewards/accuracies": 1.0, "rewards/chosen": -0.145319864153862, "rewards/margins": 0.09836847335100174, "rewards/rejected": -0.24368833005428314, "step": 697 }, { "epoch": 1.099290780141844, "grad_norm": 0.1927991509437561, "learning_rate": 2.008245626649947e-06, "log_odds_chosen": 1.0805182456970215, "log_odds_ratio": -0.31285375356674194, "logits/chosen": -0.4425123333930969, "logits/rejected": -1.2885690927505493, "logps/chosen": -1.3922683000564575, "logps/rejected": -2.300920248031616, "loss": 1.4445, "nll_loss": 1.413204312324524, "rewards/accuracies": 1.0, "rewards/chosen": -0.13922683894634247, "rewards/margins": 0.09086517989635468, "rewards/rejected": -0.23009204864501953, "step": 698 }, { "epoch": 1.1008668242710795, "grad_norm": 0.21855147182941437, "learning_rate": 2.0027485491379746e-06, "log_odds_chosen": 1.1558557748794556, "log_odds_ratio": -0.28409045934677124, "logits/chosen": -0.587797224521637, "logits/rejected": -1.016740322113037, "logps/chosen": -1.5715876817703247, "logps/rejected": -2.569084644317627, "loss": 1.5936, "nll_loss": 1.5651780366897583, "rewards/accuracies": 1.0, "rewards/chosen": -0.1571587771177292, "rewards/margins": 0.09974966943264008, "rewards/rejected": -0.25690847635269165, "step": 699 }, { "epoch": 1.1024428684003151, "grad_norm": 0.21685844659805298, "learning_rate": 1.9972514508620256e-06, "log_odds_chosen": 1.043199896812439, "log_odds_ratio": -0.31403082609176636, "logits/chosen": -0.4411376118659973, "logits/rejected": -1.0698708295822144, "logps/chosen": -1.4290989637374878, "logps/rejected": -2.3050222396850586, "loss": 1.4704, "nll_loss": 1.4390466213226318, "rewards/accuracies": 1.0, "rewards/chosen": -0.1429099142551422, "rewards/margins": 0.08759227395057678, "rewards/rejected": -0.2305022031068802, "step": 700 }, { "epoch": 1.1040189125295508, "grad_norm": 0.19890421628952026, "learning_rate": 1.991754373350053e-06, "log_odds_chosen": 0.8240249156951904, "log_odds_ratio": -0.37228381633758545, "logits/chosen": -0.4788777828216553, "logits/rejected": -1.1789363622665405, "logps/chosen": -1.4875948429107666, "logps/rejected": -2.1778481006622314, "loss": 1.5327, "nll_loss": 1.495429515838623, "rewards/accuracies": 1.0, "rewards/chosen": -0.14875948429107666, "rewards/margins": 0.06902535259723663, "rewards/rejected": -0.2177848368883133, "step": 701 }, { "epoch": 1.1055949566587864, "grad_norm": 0.2003931999206543, "learning_rate": 1.986257358129852e-06, "log_odds_chosen": 1.0024278163909912, "log_odds_ratio": -0.31901293992996216, "logits/chosen": -0.4613266885280609, "logits/rejected": -1.0688016414642334, "logps/chosen": -1.387376070022583, "logps/rejected": -2.21213960647583, "loss": 1.4305, "nll_loss": 1.3986289501190186, "rewards/accuracies": 1.0, "rewards/chosen": -0.13873758912086487, "rewards/margins": 0.08247636258602142, "rewards/rejected": -0.22121396660804749, "step": 702 }, { "epoch": 1.107171000788022, "grad_norm": 0.2786200940608978, "learning_rate": 1.9807604467287453e-06, "log_odds_chosen": 0.9921194911003113, "log_odds_ratio": -0.32200291752815247, "logits/chosen": -0.5436258316040039, "logits/rejected": -1.0129154920578003, "logps/chosen": -1.506229281425476, "logps/rejected": -2.3450465202331543, "loss": 1.5304, "nll_loss": 1.4981842041015625, "rewards/accuracies": 1.0, "rewards/chosen": -0.15062293410301208, "rewards/margins": 0.08388172090053558, "rewards/rejected": -0.23450465500354767, "step": 703 }, { "epoch": 1.1087470449172576, "grad_norm": 0.2078019082546234, "learning_rate": 1.9752636806732742e-06, "log_odds_chosen": 1.015597939491272, "log_odds_ratio": -0.3124316334724426, "logits/chosen": -0.4127010703086853, "logits/rejected": -1.0869649648666382, "logps/chosen": -1.423706293106079, "logps/rejected": -2.272918701171875, "loss": 1.4518, "nll_loss": 1.4205102920532227, "rewards/accuracies": 1.0, "rewards/chosen": -0.14237064123153687, "rewards/margins": 0.08492126315832138, "rewards/rejected": -0.22729191184043884, "step": 704 }, { "epoch": 1.1103230890464932, "grad_norm": 0.21454213559627533, "learning_rate": 1.9697671014888805e-06, "log_odds_chosen": 1.0594077110290527, "log_odds_ratio": -0.30437707901000977, "logits/chosen": -0.5677601099014282, "logits/rejected": -1.0766658782958984, "logps/chosen": -1.5006600618362427, "logps/rejected": -2.4016785621643066, "loss": 1.5297, "nll_loss": 1.4992733001708984, "rewards/accuracies": 1.0, "rewards/chosen": -0.15006601810455322, "rewards/margins": 0.09010183066129684, "rewards/rejected": -0.24016784131526947, "step": 705 }, { "epoch": 1.1118991331757289, "grad_norm": 0.20227719843387604, "learning_rate": 1.9642707506995954e-06, "log_odds_chosen": 1.2080175876617432, "log_odds_ratio": -0.2692100405693054, "logits/chosen": -0.4823134243488312, "logits/rejected": -1.1231062412261963, "logps/chosen": -1.444769263267517, "logps/rejected": -2.474184513092041, "loss": 1.4765, "nll_loss": 1.449571132659912, "rewards/accuracies": 1.0, "rewards/chosen": -0.14447692036628723, "rewards/margins": 0.10294152796268463, "rewards/rejected": -0.24741844832897186, "step": 706 }, { "epoch": 1.1134751773049645, "grad_norm": 0.19623802602291107, "learning_rate": 1.9587746698277232e-06, "log_odds_chosen": 0.9517149925231934, "log_odds_ratio": -0.3393785059452057, "logits/chosen": -0.49622777104377747, "logits/rejected": -1.05003023147583, "logps/chosen": -1.4486303329467773, "logps/rejected": -2.248914957046509, "loss": 1.4823, "nll_loss": 1.4483492374420166, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448630392551422, "rewards/margins": 0.0800284594297409, "rewards/rejected": -0.22489148378372192, "step": 707 }, { "epoch": 1.1150512214342, "grad_norm": 0.22997026145458221, "learning_rate": 1.953278900393529e-06, "log_odds_chosen": 1.0142403841018677, "log_odds_ratio": -0.323846697807312, "logits/chosen": -0.5470091104507446, "logits/rejected": -0.9484399557113647, "logps/chosen": -1.489638328552246, "logps/rejected": -2.3507144451141357, "loss": 1.4994, "nll_loss": 1.4669960737228394, "rewards/accuracies": 1.0, "rewards/chosen": -0.14896385371685028, "rewards/margins": 0.08610758185386658, "rewards/rejected": -0.23507143557071686, "step": 708 }, { "epoch": 1.1166272655634357, "grad_norm": 0.20863686501979828, "learning_rate": 1.9477834839149274e-06, "log_odds_chosen": 1.0503376722335815, "log_odds_ratio": -0.3066602349281311, "logits/chosen": -0.517970621585846, "logits/rejected": -1.1250596046447754, "logps/chosen": -1.39430570602417, "logps/rejected": -2.270462989807129, "loss": 1.4514, "nll_loss": 1.4207215309143066, "rewards/accuracies": 1.0, "rewards/chosen": -0.13943056762218475, "rewards/margins": 0.08761575818061829, "rewards/rejected": -0.22704632580280304, "step": 709 }, { "epoch": 1.1182033096926713, "grad_norm": 0.2264174073934555, "learning_rate": 1.942288461907164e-06, "log_odds_chosen": 1.0601511001586914, "log_odds_ratio": -0.31187596917152405, "logits/chosen": -0.513648509979248, "logits/rejected": -1.0661511421203613, "logps/chosen": -1.3969511985778809, "logps/rejected": -2.2883617877960205, "loss": 1.4501, "nll_loss": 1.418950080871582, "rewards/accuracies": 1.0, "rewards/chosen": -0.13969512283802032, "rewards/margins": 0.08914104849100113, "rewards/rejected": -0.22883617877960205, "step": 710 }, { "epoch": 1.119779353821907, "grad_norm": 0.2070910483598709, "learning_rate": 1.936793875882505e-06, "log_odds_chosen": 1.0743812322616577, "log_odds_ratio": -0.29790014028549194, "logits/chosen": -0.3282049596309662, "logits/rejected": -1.1131049394607544, "logps/chosen": -1.5509098768234253, "logps/rejected": -2.4752249717712402, "loss": 1.5966, "nll_loss": 1.566826343536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.15509100258350372, "rewards/margins": 0.09243150800466537, "rewards/rejected": -0.2475224882364273, "step": 711 }, { "epoch": 1.1213553979511426, "grad_norm": 0.2101864367723465, "learning_rate": 1.931299767349926e-06, "log_odds_chosen": 1.0785924196243286, "log_odds_ratio": -0.303970605134964, "logits/chosen": -0.5170964598655701, "logits/rejected": -0.9204715490341187, "logps/chosen": -1.384275197982788, "logps/rejected": -2.27579927444458, "loss": 1.4282, "nll_loss": 1.3977988958358765, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384275257587433, "rewards/margins": 0.08915239572525024, "rewards/rejected": -0.22757990658283234, "step": 712 }, { "epoch": 1.1229314420803782, "grad_norm": 0.23512229323387146, "learning_rate": 1.9258061778147895e-06, "log_odds_chosen": 0.9895341396331787, "log_odds_ratio": -0.32403087615966797, "logits/chosen": -0.5285176038742065, "logits/rejected": -1.1997897624969482, "logps/chosen": -1.490332007408142, "logps/rejected": -2.329256534576416, "loss": 1.5276, "nll_loss": 1.4951735734939575, "rewards/accuracies": 1.0, "rewards/chosen": -0.14903320372104645, "rewards/margins": 0.08389242738485336, "rewards/rejected": -0.2329256534576416, "step": 713 }, { "epoch": 1.1245074862096138, "grad_norm": 0.20936742424964905, "learning_rate": 1.9203131487785426e-06, "log_odds_chosen": 1.2472249269485474, "log_odds_ratio": -0.2569371461868286, "logits/chosen": -0.6314047574996948, "logits/rejected": -1.001774549484253, "logps/chosen": -1.3764400482177734, "logps/rejected": -2.4243812561035156, "loss": 1.4233, "nll_loss": 1.3976125717163086, "rewards/accuracies": 1.0, "rewards/chosen": -0.13764400780200958, "rewards/margins": 0.10479413717985153, "rewards/rejected": -0.24243813753128052, "step": 714 }, { "epoch": 1.1260835303388494, "grad_norm": 0.21361640095710754, "learning_rate": 1.9148207217383946e-06, "log_odds_chosen": 1.3761745691299438, "log_odds_ratio": -0.23102892935276031, "logits/chosen": -0.4612743854522705, "logits/rejected": -1.2388428449630737, "logps/chosen": -1.4838101863861084, "logps/rejected": -2.6757049560546875, "loss": 1.4935, "nll_loss": 1.470429539680481, "rewards/accuracies": 1.0, "rewards/chosen": -0.14838102459907532, "rewards/margins": 0.11918946355581284, "rewards/rejected": -0.26757049560546875, "step": 715 }, { "epoch": 1.127659574468085, "grad_norm": 0.22327671945095062, "learning_rate": 1.9093289381870094e-06, "log_odds_chosen": 0.8977928757667542, "log_odds_ratio": -0.3478083610534668, "logits/chosen": -0.48920726776123047, "logits/rejected": -1.1055833101272583, "logps/chosen": -1.480262279510498, "logps/rejected": -2.235654592514038, "loss": 1.5193, "nll_loss": 1.4845046997070312, "rewards/accuracies": 1.0, "rewards/chosen": -0.148026242852211, "rewards/margins": 0.07553920894861221, "rewards/rejected": -0.22356544435024261, "step": 716 }, { "epoch": 1.1292356185973207, "grad_norm": 0.2112058401107788, "learning_rate": 1.9038378396121895e-06, "log_odds_chosen": 1.112621545791626, "log_odds_ratio": -0.289861798286438, "logits/chosen": -0.3071185350418091, "logits/rejected": -1.344996690750122, "logps/chosen": -1.5218966007232666, "logps/rejected": -2.479416847229004, "loss": 1.5515, "nll_loss": 1.5225107669830322, "rewards/accuracies": 1.0, "rewards/chosen": -0.15218965709209442, "rewards/margins": 0.09575201570987701, "rewards/rejected": -0.24794165790081024, "step": 717 }, { "epoch": 1.1308116627265563, "grad_norm": 0.193327859044075, "learning_rate": 1.8983474674965597e-06, "log_odds_chosen": 1.0291838645935059, "log_odds_ratio": -0.3200269937515259, "logits/chosen": -0.4998539686203003, "logits/rejected": -1.2129194736480713, "logps/chosen": -1.4524807929992676, "logps/rejected": -2.3183648586273193, "loss": 1.4889, "nll_loss": 1.4568555355072021, "rewards/accuracies": 1.0, "rewards/chosen": -0.14524807035923004, "rewards/margins": 0.08658841997385025, "rewards/rejected": -0.2318364977836609, "step": 718 }, { "epoch": 1.132387706855792, "grad_norm": 0.20588983595371246, "learning_rate": 1.8928578633172603e-06, "log_odds_chosen": 1.2142523527145386, "log_odds_ratio": -0.2634655237197876, "logits/chosen": -0.4686565399169922, "logits/rejected": -1.2642706632614136, "logps/chosen": -1.4907618761062622, "logps/rejected": -2.53181791305542, "loss": 1.5268, "nll_loss": 1.5004228353500366, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490761786699295, "rewards/margins": 0.1041056290268898, "rewards/rejected": -0.2531818151473999, "step": 719 }, { "epoch": 1.1339637509850276, "grad_norm": 0.20332835614681244, "learning_rate": 1.8873690685456283e-06, "log_odds_chosen": 1.136569619178772, "log_odds_ratio": -0.2866378426551819, "logits/chosen": -0.5068315267562866, "logits/rejected": -1.1299577951431274, "logps/chosen": -1.4339442253112793, "logps/rejected": -2.394634246826172, "loss": 1.466, "nll_loss": 1.437342882156372, "rewards/accuracies": 1.0, "rewards/chosen": -0.14339442551136017, "rewards/margins": 0.09606900066137314, "rewards/rejected": -0.2394634336233139, "step": 720 }, { "epoch": 1.1355397951142632, "grad_norm": 0.19959917664527893, "learning_rate": 1.8818811246468872e-06, "log_odds_chosen": 1.2784744501113892, "log_odds_ratio": -0.2632465660572052, "logits/chosen": -0.4899711608886719, "logits/rejected": -1.2248135805130005, "logps/chosen": -1.4192323684692383, "logps/rejected": -2.509288787841797, "loss": 1.4527, "nll_loss": 1.4263888597488403, "rewards/accuracies": 1.0, "rewards/chosen": -0.14192324876785278, "rewards/margins": 0.1090056374669075, "rewards/rejected": -0.2509288787841797, "step": 721 }, { "epoch": 1.1371158392434988, "grad_norm": 0.22737723588943481, "learning_rate": 1.876394073079832e-06, "log_odds_chosen": 1.2036951780319214, "log_odds_ratio": -0.2746358811855316, "logits/chosen": -0.5705820322036743, "logits/rejected": -1.0677545070648193, "logps/chosen": -1.5508770942687988, "logps/rejected": -2.5946004390716553, "loss": 1.5707, "nll_loss": 1.5432214736938477, "rewards/accuracies": 1.0, "rewards/chosen": -0.15508772432804108, "rewards/margins": 0.10437234491109848, "rewards/rejected": -0.25946006178855896, "step": 722 }, { "epoch": 1.1386918833727344, "grad_norm": 0.2092571258544922, "learning_rate": 1.8709079552965152e-06, "log_odds_chosen": 1.1382397413253784, "log_odds_ratio": -0.2936469316482544, "logits/chosen": -0.4636017382144928, "logits/rejected": -1.1149533987045288, "logps/chosen": -1.4305751323699951, "logps/rejected": -2.392271041870117, "loss": 1.465, "nll_loss": 1.4355995655059814, "rewards/accuracies": 1.0, "rewards/chosen": -0.14305752515792847, "rewards/margins": 0.09616957604885101, "rewards/rejected": -0.23922710120677948, "step": 723 }, { "epoch": 1.14026792750197, "grad_norm": 0.2043333351612091, "learning_rate": 1.8654228127419375e-06, "log_odds_chosen": 0.9879157543182373, "log_odds_ratio": -0.3219069838523865, "logits/chosen": -0.5522278547286987, "logits/rejected": -1.0981004238128662, "logps/chosen": -1.4390687942504883, "logps/rejected": -2.268620014190674, "loss": 1.4734, "nll_loss": 1.4411959648132324, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439068764448166, "rewards/margins": 0.08295512199401855, "rewards/rejected": -0.22686201333999634, "step": 724 }, { "epoch": 1.1418439716312057, "grad_norm": 0.1940774768590927, "learning_rate": 1.8599386868537306e-06, "log_odds_chosen": 1.2275222539901733, "log_odds_ratio": -0.2604098916053772, "logits/chosen": -0.4882233738899231, "logits/rejected": -1.1863940954208374, "logps/chosen": -1.324068307876587, "logps/rejected": -2.340636968612671, "loss": 1.3729, "nll_loss": 1.3468197584152222, "rewards/accuracies": 1.0, "rewards/chosen": -0.1324068307876587, "rewards/margins": 0.10165688395500183, "rewards/rejected": -0.23406372964382172, "step": 725 }, { "epoch": 1.1434200157604413, "grad_norm": 0.21520903706550598, "learning_rate": 1.8544556190618464e-06, "log_odds_chosen": 1.1299086809158325, "log_odds_ratio": -0.28232723474502563, "logits/chosen": -0.5628550052642822, "logits/rejected": -1.062525987625122, "logps/chosen": -1.5172274112701416, "logps/rejected": -2.484386444091797, "loss": 1.539, "nll_loss": 1.5107759237289429, "rewards/accuracies": 1.0, "rewards/chosen": -0.1517227441072464, "rewards/margins": 0.09671590477228165, "rewards/rejected": -0.24843862652778625, "step": 726 }, { "epoch": 1.144996059889677, "grad_norm": 0.21658599376678467, "learning_rate": 1.8489736507882424e-06, "log_odds_chosen": 1.3678784370422363, "log_odds_ratio": -0.24981451034545898, "logits/chosen": -0.4981183409690857, "logits/rejected": -1.1440948247909546, "logps/chosen": -1.4253902435302734, "logps/rejected": -2.603610038757324, "loss": 1.451, "nll_loss": 1.4260121583938599, "rewards/accuracies": 1.0, "rewards/chosen": -0.14253900945186615, "rewards/margins": 0.11782196164131165, "rewards/rejected": -0.2603609561920166, "step": 727 }, { "epoch": 1.1465721040189125, "grad_norm": 0.19922254979610443, "learning_rate": 1.8434928234465716e-06, "log_odds_chosen": 1.179607629776001, "log_odds_ratio": -0.2756151854991913, "logits/chosen": -0.5783892869949341, "logits/rejected": -1.1218026876449585, "logps/chosen": -1.3971948623657227, "logps/rejected": -2.3909451961517334, "loss": 1.4376, "nll_loss": 1.4100377559661865, "rewards/accuracies": 1.0, "rewards/chosen": -0.13971950113773346, "rewards/margins": 0.09937502443790436, "rewards/rejected": -0.23909451067447662, "step": 728 }, { "epoch": 1.1481481481481481, "grad_norm": 0.23259800672531128, "learning_rate": 1.8380131784418657e-06, "log_odds_chosen": 1.2003117799758911, "log_odds_ratio": -0.28130030632019043, "logits/chosen": -0.4541264772415161, "logits/rejected": -1.1483299732208252, "logps/chosen": -1.4475376605987549, "logps/rejected": -2.474031925201416, "loss": 1.4884, "nll_loss": 1.4602681398391724, "rewards/accuracies": 1.0, "rewards/chosen": -0.14475378394126892, "rewards/margins": 0.10264939814805984, "rewards/rejected": -0.24740318953990936, "step": 729 }, { "epoch": 1.1497241922773838, "grad_norm": 0.20956143736839294, "learning_rate": 1.8325347571702259e-06, "log_odds_chosen": 1.171805500984192, "log_odds_ratio": -0.28054097294807434, "logits/chosen": -0.5485932230949402, "logits/rejected": -1.2339439392089844, "logps/chosen": -1.5010040998458862, "logps/rejected": -2.508570909500122, "loss": 1.5144, "nll_loss": 1.4863710403442383, "rewards/accuracies": 1.0, "rewards/chosen": -0.15010042488574982, "rewards/margins": 0.10075666755437851, "rewards/rejected": -0.25085708498954773, "step": 730 }, { "epoch": 1.1513002364066194, "grad_norm": 0.22823800146579742, "learning_rate": 1.827057601018509e-06, "log_odds_chosen": 1.2394187450408936, "log_odds_ratio": -0.26493823528289795, "logits/chosen": -0.5842269062995911, "logits/rejected": -1.14989173412323, "logps/chosen": -1.3878412246704102, "logps/rejected": -2.428112506866455, "loss": 1.4369, "nll_loss": 1.410446286201477, "rewards/accuracies": 1.0, "rewards/chosen": -0.13878411054611206, "rewards/margins": 0.104027159512043, "rewards/rejected": -0.24281126260757446, "step": 731 }, { "epoch": 1.152876280535855, "grad_norm": 0.21540215611457825, "learning_rate": 1.8215817513640122e-06, "log_odds_chosen": 1.2438088655471802, "log_odds_ratio": -0.26774293184280396, "logits/chosen": -0.500880777835846, "logits/rejected": -1.1993787288665771, "logps/chosen": -1.436571478843689, "logps/rejected": -2.493516445159912, "loss": 1.4606, "nll_loss": 1.4338470697402954, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436571627855301, "rewards/margins": 0.1056944876909256, "rewards/rejected": -0.2493516206741333, "step": 732 }, { "epoch": 1.1544523246650906, "grad_norm": 0.20388521254062653, "learning_rate": 1.8161072495741647e-06, "log_odds_chosen": 1.2222822904586792, "log_odds_ratio": -0.2652607560157776, "logits/chosen": -0.47571003437042236, "logits/rejected": -1.2325369119644165, "logps/chosen": -1.397996187210083, "logps/rejected": -2.426295280456543, "loss": 1.4368, "nll_loss": 1.4102247953414917, "rewards/accuracies": 1.0, "rewards/chosen": -0.13979962468147278, "rewards/margins": 0.10282988846302032, "rewards/rejected": -0.2426295131444931, "step": 733 }, { "epoch": 1.1560283687943262, "grad_norm": 0.21977047622203827, "learning_rate": 1.810634137006213e-06, "log_odds_chosen": 0.8912585377693176, "log_odds_ratio": -0.3467335104942322, "logits/chosen": -0.572501003742218, "logits/rejected": -0.9267016649246216, "logps/chosen": -1.4890767335891724, "logps/rejected": -2.238137722015381, "loss": 1.5308, "nll_loss": 1.4961200952529907, "rewards/accuracies": 1.0, "rewards/chosen": -0.14890767633914948, "rewards/margins": 0.07490610331296921, "rewards/rejected": -0.22381377220153809, "step": 734 }, { "epoch": 1.1576044129235619, "grad_norm": 0.2076544612646103, "learning_rate": 1.805162455006908e-06, "log_odds_chosen": 1.0436756610870361, "log_odds_ratio": -0.3102042078971863, "logits/chosen": -0.4995110034942627, "logits/rejected": -0.9959861040115356, "logps/chosen": -1.496568202972412, "logps/rejected": -2.384639263153076, "loss": 1.5105, "nll_loss": 1.4794573783874512, "rewards/accuracies": 1.0, "rewards/chosen": -0.14965681731700897, "rewards/margins": 0.0888071283698082, "rewards/rejected": -0.23846393823623657, "step": 735 }, { "epoch": 1.1591804570527975, "grad_norm": 0.20805534720420837, "learning_rate": 1.799692244912195e-06, "log_odds_chosen": 1.0122272968292236, "log_odds_ratio": -0.3370021879673004, "logits/chosen": -0.48991894721984863, "logits/rejected": -0.955334484577179, "logps/chosen": -1.4916082620620728, "logps/rejected": -2.357954502105713, "loss": 1.5354, "nll_loss": 1.5016999244689941, "rewards/accuracies": 1.0, "rewards/chosen": -0.14916083216667175, "rewards/margins": 0.08663463592529297, "rewards/rejected": -0.23579545319080353, "step": 736 }, { "epoch": 1.160756501182033, "grad_norm": 0.2044851928949356, "learning_rate": 1.7942235480468964e-06, "log_odds_chosen": 1.1048537492752075, "log_odds_ratio": -0.2972021698951721, "logits/chosen": -0.5370530486106873, "logits/rejected": -1.0902405977249146, "logps/chosen": -1.3856706619262695, "logps/rejected": -2.308851957321167, "loss": 1.426, "nll_loss": 1.3962414264678955, "rewards/accuracies": 1.0, "rewards/chosen": -0.13856706023216248, "rewards/margins": 0.09231814742088318, "rewards/rejected": -0.23088520765304565, "step": 737 }, { "epoch": 1.1623325453112687, "grad_norm": 0.20801378786563873, "learning_rate": 1.7887564057244062e-06, "log_odds_chosen": 1.152199625968933, "log_odds_ratio": -0.3021039664745331, "logits/chosen": -0.4954368472099304, "logits/rejected": -1.163634181022644, "logps/chosen": -1.3468817472457886, "logps/rejected": -2.3115694522857666, "loss": 1.3899, "nll_loss": 1.3597118854522705, "rewards/accuracies": 1.0, "rewards/chosen": -0.13468816876411438, "rewards/margins": 0.09646876901388168, "rewards/rejected": -0.23115693032741547, "step": 738 }, { "epoch": 1.1639085894405043, "grad_norm": 0.2156391441822052, "learning_rate": 1.7832908592463732e-06, "log_odds_chosen": 1.0522640943527222, "log_odds_ratio": -0.30799242854118347, "logits/chosen": -0.6145192384719849, "logits/rejected": -1.1441192626953125, "logps/chosen": -1.4775124788284302, "logps/rejected": -2.3586699962615967, "loss": 1.5172, "nll_loss": 1.4864474534988403, "rewards/accuracies": 1.0, "rewards/chosen": -0.14775125682353973, "rewards/margins": 0.08811572194099426, "rewards/rejected": -0.235866978764534, "step": 739 }, { "epoch": 1.16548463356974, "grad_norm": 0.2290491759777069, "learning_rate": 1.7778269499023908e-06, "log_odds_chosen": 1.137778401374817, "log_odds_ratio": -0.2878759205341339, "logits/chosen": -0.5140742063522339, "logits/rejected": -1.1521577835083008, "logps/chosen": -1.5119346380233765, "logps/rejected": -2.4900472164154053, "loss": 1.5397, "nll_loss": 1.5108823776245117, "rewards/accuracies": 1.0, "rewards/chosen": -0.15119343996047974, "rewards/margins": 0.097811259329319, "rewards/rejected": -0.24900470674037933, "step": 740 }, { "epoch": 1.1670606776989756, "grad_norm": 0.23755483329296112, "learning_rate": 1.7723647189696843e-06, "log_odds_chosen": 1.0798280239105225, "log_odds_ratio": -0.30177611112594604, "logits/chosen": -0.5057775974273682, "logits/rejected": -0.8975050449371338, "logps/chosen": -1.3979859352111816, "logps/rejected": -2.2984511852264404, "loss": 1.4494, "nll_loss": 1.4192428588867188, "rewards/accuracies": 1.0, "rewards/chosen": -0.1397985816001892, "rewards/margins": 0.09004653990268707, "rewards/rejected": -0.22984512150287628, "step": 741 }, { "epoch": 1.1686367218282112, "grad_norm": 0.21693411469459534, "learning_rate": 1.7669042077127982e-06, "log_odds_chosen": 1.2233357429504395, "log_odds_ratio": -0.2739095091819763, "logits/chosen": -0.490731805562973, "logits/rejected": -1.0928269624710083, "logps/chosen": -1.4665374755859375, "logps/rejected": -2.5132076740264893, "loss": 1.4897, "nll_loss": 1.4623292684555054, "rewards/accuracies": 1.0, "rewards/chosen": -0.14665377140045166, "rewards/margins": 0.10466702282428741, "rewards/rejected": -0.2513207793235779, "step": 742 }, { "epoch": 1.1702127659574468, "grad_norm": 0.19443337619304657, "learning_rate": 1.7614454573832886e-06, "log_odds_chosen": 1.25938081741333, "log_odds_ratio": -0.2549465298652649, "logits/chosen": -0.6110751628875732, "logits/rejected": -1.1547801494598389, "logps/chosen": -1.344724178314209, "logps/rejected": -2.3946738243103027, "loss": 1.3934, "nll_loss": 1.3679234981536865, "rewards/accuracies": 1.0, "rewards/chosen": -0.13447242975234985, "rewards/margins": 0.10499494522809982, "rewards/rejected": -0.23946736752986908, "step": 743 }, { "epoch": 1.1717888100866825, "grad_norm": 0.23334260284900665, "learning_rate": 1.7559885092194058e-06, "log_odds_chosen": 0.9362188577651978, "log_odds_ratio": -0.33437472581863403, "logits/chosen": -0.47565728425979614, "logits/rejected": -1.1559867858886719, "logps/chosen": -1.4956122636795044, "logps/rejected": -2.285295009613037, "loss": 1.5374, "nll_loss": 1.5039212703704834, "rewards/accuracies": 1.0, "rewards/chosen": -0.14956121146678925, "rewards/margins": 0.07896829396486282, "rewards/rejected": -0.22852949798107147, "step": 744 }, { "epoch": 1.173364854215918, "grad_norm": 0.20293299853801727, "learning_rate": 1.7505334044457871e-06, "log_odds_chosen": 1.15781831741333, "log_odds_ratio": -0.2767452895641327, "logits/chosen": -0.5087063312530518, "logits/rejected": -1.1647037267684937, "logps/chosen": -1.384293794631958, "logps/rejected": -2.3487789630889893, "loss": 1.4157, "nll_loss": 1.3880729675292969, "rewards/accuracies": 1.0, "rewards/chosen": -0.13842938840389252, "rewards/margins": 0.09644850343465805, "rewards/rejected": -0.23487789928913116, "step": 745 }, { "epoch": 1.1749408983451537, "grad_norm": 0.19345815479755402, "learning_rate": 1.7450801842731443e-06, "log_odds_chosen": 1.1751313209533691, "log_odds_ratio": -0.27271798253059387, "logits/chosen": -0.4331468343734741, "logits/rejected": -1.0915415287017822, "logps/chosen": -1.4927812814712524, "logps/rejected": -2.5001838207244873, "loss": 1.5216, "nll_loss": 1.4943312406539917, "rewards/accuracies": 1.0, "rewards/chosen": -0.14927814900875092, "rewards/margins": 0.10074023902416229, "rewards/rejected": -0.2500183880329132, "step": 746 }, { "epoch": 1.1765169424743893, "grad_norm": 0.20561347901821136, "learning_rate": 1.7396288898979497e-06, "log_odds_chosen": 1.3191629648208618, "log_odds_ratio": -0.2436976134777069, "logits/chosen": -0.5448561906814575, "logits/rejected": -1.2705950736999512, "logps/chosen": -1.3848819732666016, "logps/rejected": -2.502424955368042, "loss": 1.4157, "nll_loss": 1.3912888765335083, "rewards/accuracies": 1.0, "rewards/chosen": -0.13848818838596344, "rewards/margins": 0.11175429075956345, "rewards/rejected": -0.2502424716949463, "step": 747 }, { "epoch": 1.178092986603625, "grad_norm": 0.21567635238170624, "learning_rate": 1.7341795625021292e-06, "log_odds_chosen": 1.265607476234436, "log_odds_ratio": -0.2554873824119568, "logits/chosen": -0.5339645147323608, "logits/rejected": -1.3403193950653076, "logps/chosen": -1.4224684238433838, "logps/rejected": -2.4986305236816406, "loss": 1.46, "nll_loss": 1.4344130754470825, "rewards/accuracies": 1.0, "rewards/chosen": -0.14224685728549957, "rewards/margins": 0.10761621594429016, "rewards/rejected": -0.24986307322978973, "step": 748 }, { "epoch": 1.1796690307328606, "grad_norm": 0.22615347802639008, "learning_rate": 1.7287322432527485e-06, "log_odds_chosen": 1.1519222259521484, "log_odds_ratio": -0.28157109022140503, "logits/chosen": -0.5418803691864014, "logits/rejected": -1.2943555116653442, "logps/chosen": -1.5079830884933472, "logps/rejected": -2.496225595474243, "loss": 1.5326, "nll_loss": 1.5044697523117065, "rewards/accuracies": 1.0, "rewards/chosen": -0.15079830586910248, "rewards/margins": 0.09882427752017975, "rewards/rejected": -0.24962255358695984, "step": 749 }, { "epoch": 1.1812450748620962, "grad_norm": 0.2121811807155609, "learning_rate": 1.7232869733017038e-06, "log_odds_chosen": 1.1103050708770752, "log_odds_ratio": -0.2931010127067566, "logits/chosen": -0.5359842777252197, "logits/rejected": -1.2005261182785034, "logps/chosen": -1.460727334022522, "logps/rejected": -2.402257204055786, "loss": 1.4867, "nll_loss": 1.457384705543518, "rewards/accuracies": 1.0, "rewards/chosen": -0.14607274532318115, "rewards/margins": 0.09415297955274582, "rewards/rejected": -0.24022571742534637, "step": 750 }, { "epoch": 1.1828211189913318, "grad_norm": 0.22276343405246735, "learning_rate": 1.7178437937854065e-06, "log_odds_chosen": 1.445096731185913, "log_odds_ratio": -0.2155788391828537, "logits/chosen": -0.6294986009597778, "logits/rejected": -1.3958948850631714, "logps/chosen": -1.5007898807525635, "logps/rejected": -2.758064031600952, "loss": 1.5205, "nll_loss": 1.4989529848098755, "rewards/accuracies": 1.0, "rewards/chosen": -0.15007898211479187, "rewards/margins": 0.12572741508483887, "rewards/rejected": -0.2758064270019531, "step": 751 }, { "epoch": 1.1843971631205674, "grad_norm": 0.2451620101928711, "learning_rate": 1.7124027458244794e-06, "log_odds_chosen": 1.2201664447784424, "log_odds_ratio": -0.26983270049095154, "logits/chosen": -0.526127815246582, "logits/rejected": -1.054230809211731, "logps/chosen": -1.420742154121399, "logps/rejected": -2.4593186378479004, "loss": 1.4622, "nll_loss": 1.4352266788482666, "rewards/accuracies": 1.0, "rewards/chosen": -0.14207421243190765, "rewards/margins": 0.10385768115520477, "rewards/rejected": -0.24593187868595123, "step": 752 }, { "epoch": 1.185973207249803, "grad_norm": 0.202499121427536, "learning_rate": 1.7069638705234407e-06, "log_odds_chosen": 0.9306235909461975, "log_odds_ratio": -0.3378705382347107, "logits/chosen": -0.39728713035583496, "logits/rejected": -1.079502820968628, "logps/chosen": -1.4892499446868896, "logps/rejected": -2.2738990783691406, "loss": 1.5318, "nll_loss": 1.4980517625808716, "rewards/accuracies": 1.0, "rewards/chosen": -0.1489250212907791, "rewards/margins": 0.07846489548683167, "rewards/rejected": -0.22738990187644958, "step": 753 }, { "epoch": 1.1875492513790387, "grad_norm": 0.208243265748024, "learning_rate": 1.7015272089703954e-06, "log_odds_chosen": 1.1252555847167969, "log_odds_ratio": -0.2902730107307434, "logits/chosen": -0.5638817548751831, "logits/rejected": -1.2265125513076782, "logps/chosen": -1.4594535827636719, "logps/rejected": -2.4073901176452637, "loss": 1.4837, "nll_loss": 1.4547126293182373, "rewards/accuracies": 1.0, "rewards/chosen": -0.14594534039497375, "rewards/margins": 0.09479367733001709, "rewards/rejected": -0.24073903262615204, "step": 754 }, { "epoch": 1.1891252955082743, "grad_norm": 0.2076011747121811, "learning_rate": 1.6960928022367261e-06, "log_odds_chosen": 1.0964902639389038, "log_odds_ratio": -0.2991068661212921, "logits/chosen": -0.5469743013381958, "logits/rejected": -1.1416915655136108, "logps/chosen": -1.489222764968872, "logps/rejected": -2.428842067718506, "loss": 1.517, "nll_loss": 1.487074375152588, "rewards/accuracies": 1.0, "rewards/chosen": -0.14892229437828064, "rewards/margins": 0.0939619243144989, "rewards/rejected": -0.24288418889045715, "step": 755 }, { "epoch": 1.19070133963751, "grad_norm": 0.19759686291217804, "learning_rate": 1.6906606913767776e-06, "log_odds_chosen": 1.0877560377120972, "log_odds_ratio": -0.29867690801620483, "logits/chosen": -0.5584855079650879, "logits/rejected": -1.238073706626892, "logps/chosen": -1.3947066068649292, "logps/rejected": -2.3050076961517334, "loss": 1.434, "nll_loss": 1.404129981994629, "rewards/accuracies": 1.0, "rewards/chosen": -0.1394706517457962, "rewards/margins": 0.09103012084960938, "rewards/rejected": -0.23050078749656677, "step": 756 }, { "epoch": 1.1922773837667455, "grad_norm": 0.21942470967769623, "learning_rate": 1.6852309174275543e-06, "log_odds_chosen": 1.1033775806427002, "log_odds_ratio": -0.2920140027999878, "logits/chosen": -0.6717433333396912, "logits/rejected": -1.1116172075271606, "logps/chosen": -1.3147002458572388, "logps/rejected": -2.2198548316955566, "loss": 1.3521, "nll_loss": 1.3228713274002075, "rewards/accuracies": 1.0, "rewards/chosen": -0.13147002458572388, "rewards/margins": 0.09051544964313507, "rewards/rejected": -0.22198548913002014, "step": 757 }, { "epoch": 1.1938534278959811, "grad_norm": 0.21318544447422028, "learning_rate": 1.6798035214084047e-06, "log_odds_chosen": 1.1570626497268677, "log_odds_ratio": -0.2804250717163086, "logits/chosen": -0.6080818772315979, "logits/rejected": -1.2784500122070312, "logps/chosen": -1.4208025932312012, "logps/rejected": -2.398790121078491, "loss": 1.4582, "nll_loss": 1.4301108121871948, "rewards/accuracies": 1.0, "rewards/chosen": -0.14208026230335236, "rewards/margins": 0.09779875725507736, "rewards/rejected": -0.23987902700901031, "step": 758 }, { "epoch": 1.1954294720252168, "grad_norm": 0.21787188947200775, "learning_rate": 1.674378544320714e-06, "log_odds_chosen": 1.433253288269043, "log_odds_ratio": -0.22483661770820618, "logits/chosen": -0.5183004140853882, "logits/rejected": -1.2640109062194824, "logps/chosen": -1.376549243927002, "logps/rejected": -2.5912718772888184, "loss": 1.4142, "nll_loss": 1.3917633295059204, "rewards/accuracies": 1.0, "rewards/chosen": -0.13765491545200348, "rewards/margins": 0.12147226184606552, "rewards/rejected": -0.2591271996498108, "step": 759 }, { "epoch": 1.1970055161544524, "grad_norm": 0.2076132595539093, "learning_rate": 1.6689560271475922e-06, "log_odds_chosen": 1.2801876068115234, "log_odds_ratio": -0.24890056252479553, "logits/chosen": -0.5343514680862427, "logits/rejected": -1.2012145519256592, "logps/chosen": -1.3967100381851196, "logps/rejected": -2.4787793159484863, "loss": 1.4275, "nll_loss": 1.4025884866714478, "rewards/accuracies": 1.0, "rewards/chosen": -0.13967101275920868, "rewards/margins": 0.10820694267749786, "rewards/rejected": -0.24787792563438416, "step": 760 }, { "epoch": 1.198581560283688, "grad_norm": 0.19358055293560028, "learning_rate": 1.6635360108535665e-06, "log_odds_chosen": 1.2866816520690918, "log_odds_ratio": -0.2558908462524414, "logits/chosen": -0.578610360622406, "logits/rejected": -1.237229585647583, "logps/chosen": -1.349299430847168, "logps/rejected": -2.432044744491577, "loss": 1.3954, "nll_loss": 1.3698068857192993, "rewards/accuracies": 1.0, "rewards/chosen": -0.13492995500564575, "rewards/margins": 0.10827453434467316, "rewards/rejected": -0.24320447444915771, "step": 761 }, { "epoch": 1.2001576044129236, "grad_norm": 0.21084879338741302, "learning_rate": 1.6581185363842717e-06, "log_odds_chosen": 1.2801154851913452, "log_odds_ratio": -0.2593454420566559, "logits/chosen": -0.6350646615028381, "logits/rejected": -1.3286490440368652, "logps/chosen": -1.3955661058425903, "logps/rejected": -2.481843948364258, "loss": 1.4193, "nll_loss": 1.3934065103530884, "rewards/accuracies": 1.0, "rewards/chosen": -0.1395566165447235, "rewards/margins": 0.10862776637077332, "rewards/rejected": -0.24818439781665802, "step": 762 }, { "epoch": 1.2017336485421592, "grad_norm": 0.2247847020626068, "learning_rate": 1.6527036446661393e-06, "log_odds_chosen": 1.2048765420913696, "log_odds_ratio": -0.2726666331291199, "logits/chosen": -0.5534381866455078, "logits/rejected": -1.2356462478637695, "logps/chosen": -1.4167560338974, "logps/rejected": -2.437016487121582, "loss": 1.4472, "nll_loss": 1.4199172258377075, "rewards/accuracies": 1.0, "rewards/chosen": -0.14167560636997223, "rewards/margins": 0.10202603042125702, "rewards/rejected": -0.24370163679122925, "step": 763 }, { "epoch": 1.2033096926713949, "grad_norm": 0.2119828313589096, "learning_rate": 1.6472913766060901e-06, "log_odds_chosen": 1.3249211311340332, "log_odds_ratio": -0.2429380565881729, "logits/chosen": -0.6790407299995422, "logits/rejected": -1.1693089008331299, "logps/chosen": -1.3499059677124023, "logps/rejected": -2.4564337730407715, "loss": 1.3899, "nll_loss": 1.365631341934204, "rewards/accuracies": 1.0, "rewards/chosen": -0.13499058783054352, "rewards/margins": 0.11065276712179184, "rewards/rejected": -0.24564333260059357, "step": 764 }, { "epoch": 1.2048857368006305, "grad_norm": 0.20084248483181, "learning_rate": 1.6418817730912252e-06, "log_odds_chosen": 1.0923593044281006, "log_odds_ratio": -0.30785131454467773, "logits/chosen": -0.6217441558837891, "logits/rejected": -0.9932678937911987, "logps/chosen": -1.4512114524841309, "logps/rejected": -2.3775970935821533, "loss": 1.4833, "nll_loss": 1.4525004625320435, "rewards/accuracies": 1.0, "rewards/chosen": -0.14512114226818085, "rewards/margins": 0.09263855218887329, "rewards/rejected": -0.23775969445705414, "step": 765 }, { "epoch": 1.2064617809298661, "grad_norm": 0.218719944357872, "learning_rate": 1.6364748749885133e-06, "log_odds_chosen": 0.9474446773529053, "log_odds_ratio": -0.3324336111545563, "logits/chosen": -0.5795613527297974, "logits/rejected": -1.0887925624847412, "logps/chosen": -1.414801836013794, "logps/rejected": -2.2032525539398193, "loss": 1.4485, "nll_loss": 1.415292739868164, "rewards/accuracies": 1.0, "rewards/chosen": -0.1414801925420761, "rewards/margins": 0.0788450688123703, "rewards/rejected": -0.22032523155212402, "step": 766 }, { "epoch": 1.2080378250591017, "grad_norm": 0.23504634201526642, "learning_rate": 1.6310707231444883e-06, "log_odds_chosen": 1.186997652053833, "log_odds_ratio": -0.2763690650463104, "logits/chosen": -0.5859046578407288, "logits/rejected": -1.0978572368621826, "logps/chosen": -1.419836163520813, "logps/rejected": -2.4256393909454346, "loss": 1.4468, "nll_loss": 1.4191646575927734, "rewards/accuracies": 1.0, "rewards/chosen": -0.14198361337184906, "rewards/margins": 0.10058033466339111, "rewards/rejected": -0.24256394803524017, "step": 767 }, { "epoch": 1.2096138691883374, "grad_norm": 0.21579289436340332, "learning_rate": 1.625669358384936e-06, "log_odds_chosen": 0.9713581204414368, "log_odds_ratio": -0.3374493718147278, "logits/chosen": -0.500206708908081, "logits/rejected": -0.947388231754303, "logps/chosen": -1.483232021331787, "logps/rejected": -2.3049139976501465, "loss": 1.5186, "nll_loss": 1.4848817586898804, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483232080936432, "rewards/margins": 0.08216820657253265, "rewards/rejected": -0.23049141466617584, "step": 768 }, { "epoch": 1.211189913317573, "grad_norm": 0.22036954760551453, "learning_rate": 1.620270821514587e-06, "log_odds_chosen": 1.328906536102295, "log_odds_ratio": -0.2527619004249573, "logits/chosen": -0.46030980348587036, "logits/rejected": -1.15547776222229, "logps/chosen": -1.508017659187317, "logps/rejected": -2.666835308074951, "loss": 1.5242, "nll_loss": 1.4988842010498047, "rewards/accuracies": 1.0, "rewards/chosen": -0.15080177783966064, "rewards/margins": 0.1158817708492279, "rewards/rejected": -0.26668351888656616, "step": 769 }, { "epoch": 1.2127659574468086, "grad_norm": 0.22563743591308594, "learning_rate": 1.6148751533168104e-06, "log_odds_chosen": 1.5124189853668213, "log_odds_ratio": -0.2152455598115921, "logits/chosen": -0.5869713425636292, "logits/rejected": -1.3768200874328613, "logps/chosen": -1.4721519947052002, "logps/rejected": -2.789640188217163, "loss": 1.4841, "nll_loss": 1.4625388383865356, "rewards/accuracies": 1.0, "rewards/chosen": -0.14721520245075226, "rewards/margins": 0.13174879550933838, "rewards/rejected": -0.27896398305892944, "step": 770 }, { "epoch": 1.2143420015760442, "grad_norm": 0.21955843269824982, "learning_rate": 1.6094823945532996e-06, "log_odds_chosen": 1.159914255142212, "log_odds_ratio": -0.28828197717666626, "logits/chosen": -0.6304282546043396, "logits/rejected": -1.2890020608901978, "logps/chosen": -1.429293155670166, "logps/rejected": -2.415302276611328, "loss": 1.4527, "nll_loss": 1.4239130020141602, "rewards/accuracies": 1.0, "rewards/chosen": -0.1429293304681778, "rewards/margins": 0.09860090911388397, "rewards/rejected": -0.24153022468090057, "step": 771 }, { "epoch": 1.2159180457052798, "grad_norm": 0.23726646602153778, "learning_rate": 1.6040925859637728e-06, "log_odds_chosen": 1.1744619607925415, "log_odds_ratio": -0.28108540177345276, "logits/chosen": -0.5889579653739929, "logits/rejected": -1.1440235376358032, "logps/chosen": -1.5084424018859863, "logps/rejected": -2.516566753387451, "loss": 1.5327, "nll_loss": 1.5046378374099731, "rewards/accuracies": 1.0, "rewards/chosen": -0.1508442461490631, "rewards/margins": 0.10081242769956589, "rewards/rejected": -0.2516566812992096, "step": 772 }, { "epoch": 1.2174940898345155, "grad_norm": 0.24190925061702728, "learning_rate": 1.5987057682656596e-06, "log_odds_chosen": 1.1003702878952026, "log_odds_ratio": -0.2898416519165039, "logits/chosen": -0.4985043704509735, "logits/rejected": -1.0609208345413208, "logps/chosen": -1.548935055732727, "logps/rejected": -2.496812343597412, "loss": 1.5807, "nll_loss": 1.5517053604125977, "rewards/accuracies": 1.0, "rewards/chosen": -0.15489351749420166, "rewards/margins": 0.09478770941495895, "rewards/rejected": -0.2496812343597412, "step": 773 }, { "epoch": 1.219070133963751, "grad_norm": 0.2459522932767868, "learning_rate": 1.593321982153795e-06, "log_odds_chosen": 1.130623459815979, "log_odds_ratio": -0.2844310402870178, "logits/chosen": -0.6087486743927002, "logits/rejected": -1.2695708274841309, "logps/chosen": -1.5679212808609009, "logps/rejected": -2.546003818511963, "loss": 1.5829, "nll_loss": 1.5545040369033813, "rewards/accuracies": 1.0, "rewards/chosen": -0.15679211914539337, "rewards/margins": 0.09780827909708023, "rewards/rejected": -0.2546004056930542, "step": 774 }, { "epoch": 1.2206461780929867, "grad_norm": 0.263910710811615, "learning_rate": 1.5879412683001106e-06, "log_odds_chosen": 1.0849615335464478, "log_odds_ratio": -0.29708150029182434, "logits/chosen": -0.6315842866897583, "logits/rejected": -0.9837450981140137, "logps/chosen": -1.3872767686843872, "logps/rejected": -2.2935962677001953, "loss": 1.431, "nll_loss": 1.4012638330459595, "rewards/accuracies": 1.0, "rewards/chosen": -0.13872767984867096, "rewards/margins": 0.09063193947076797, "rewards/rejected": -0.22935961186885834, "step": 775 }, { "epoch": 1.2222222222222223, "grad_norm": 0.21891245245933533, "learning_rate": 1.5825636673533298e-06, "log_odds_chosen": 1.5271731615066528, "log_odds_ratio": -0.20585866272449493, "logits/chosen": -0.6670053601264954, "logits/rejected": -1.418460726737976, "logps/chosen": -1.4539694786071777, "logps/rejected": -2.7789816856384277, "loss": 1.4598, "nll_loss": 1.4391790628433228, "rewards/accuracies": 1.0, "rewards/chosen": -0.14539696276187897, "rewards/margins": 0.13250122964382172, "rewards/rejected": -0.2778981924057007, "step": 776 }, { "epoch": 1.2237982663514577, "grad_norm": 0.23335790634155273, "learning_rate": 1.5771892199386598e-06, "log_odds_chosen": 1.1346737146377563, "log_odds_ratio": -0.28671741485595703, "logits/chosen": -0.5840818881988525, "logits/rejected": -1.152944803237915, "logps/chosen": -1.4742546081542969, "logps/rejected": -2.4443798065185547, "loss": 1.5161, "nll_loss": 1.487383246421814, "rewards/accuracies": 1.0, "rewards/chosen": -0.14742547273635864, "rewards/margins": 0.09701251983642578, "rewards/rejected": -0.24443799257278442, "step": 777 }, { "epoch": 1.2253743104806936, "grad_norm": 0.20319467782974243, "learning_rate": 1.5718179666574834e-06, "log_odds_chosen": 1.4909844398498535, "log_odds_ratio": -0.2130488157272339, "logits/chosen": -0.5028564929962158, "logits/rejected": -1.3472793102264404, "logps/chosen": -1.4410914182662964, "logps/rejected": -2.7312090396881104, "loss": 1.471, "nll_loss": 1.4497259855270386, "rewards/accuracies": 1.0, "rewards/chosen": -0.14410914480686188, "rewards/margins": 0.12901173532009125, "rewards/rejected": -0.2731208801269531, "step": 778 }, { "epoch": 1.226950354609929, "grad_norm": 0.22173888981342316, "learning_rate": 1.5664499480870539e-06, "log_odds_chosen": 1.4054442644119263, "log_odds_ratio": -0.22597062587738037, "logits/chosen": -0.5345644950866699, "logits/rejected": -1.3104472160339355, "logps/chosen": -1.465614914894104, "logps/rejected": -2.6791446208953857, "loss": 1.5006, "nll_loss": 1.4780365228652954, "rewards/accuracies": 1.0, "rewards/chosen": -0.14656148850917816, "rewards/margins": 0.12135298550128937, "rewards/rejected": -0.26791447401046753, "step": 779 }, { "epoch": 1.2285263987391648, "grad_norm": 0.22311998903751373, "learning_rate": 1.5610852047801875e-06, "log_odds_chosen": 1.0542051792144775, "log_odds_ratio": -0.3052675127983093, "logits/chosen": -0.624690592288971, "logits/rejected": -1.1640291213989258, "logps/chosen": -1.3995742797851562, "logps/rejected": -2.2823874950408936, "loss": 1.4228, "nll_loss": 1.3922507762908936, "rewards/accuracies": 1.0, "rewards/chosen": -0.13995742797851562, "rewards/margins": 0.08828133344650269, "rewards/rejected": -0.2282387763261795, "step": 780 }, { "epoch": 1.2301024428684002, "grad_norm": 0.21877580881118774, "learning_rate": 1.5557237772649567e-06, "log_odds_chosen": 1.0367674827575684, "log_odds_ratio": -0.30913954973220825, "logits/chosen": -0.5240108966827393, "logits/rejected": -1.2291767597198486, "logps/chosen": -1.4782272577285767, "logps/rejected": -2.355313777923584, "loss": 1.5111, "nll_loss": 1.4801530838012695, "rewards/accuracies": 1.0, "rewards/chosen": -0.14782273769378662, "rewards/margins": 0.08770866692066193, "rewards/rejected": -0.23553138971328735, "step": 781 }, { "epoch": 1.231678486997636, "grad_norm": 0.20333434641361237, "learning_rate": 1.5503657060443866e-06, "log_odds_chosen": 1.132176399230957, "log_odds_ratio": -0.28855758905410767, "logits/chosen": -0.5540459752082825, "logits/rejected": -1.2549809217453003, "logps/chosen": -1.4005221128463745, "logps/rejected": -2.3478870391845703, "loss": 1.4448, "nll_loss": 1.415934443473816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1400521993637085, "rewards/margins": 0.0947365090250969, "rewards/rejected": -0.234788715839386, "step": 782 }, { "epoch": 1.2332545311268714, "grad_norm": 0.2054745852947235, "learning_rate": 1.5450110315961457e-06, "log_odds_chosen": 1.394212007522583, "log_odds_ratio": -0.23807555437088013, "logits/chosen": -0.5969313383102417, "logits/rejected": -1.2922520637512207, "logps/chosen": -1.432874083518982, "logps/rejected": -2.633666515350342, "loss": 1.4738, "nll_loss": 1.4500117301940918, "rewards/accuracies": 1.0, "rewards/chosen": -0.14328742027282715, "rewards/margins": 0.12007924914360046, "rewards/rejected": -0.2633666694164276, "step": 783 }, { "epoch": 1.2348305752561073, "grad_norm": 0.21883922815322876, "learning_rate": 1.539659794372243e-06, "log_odds_chosen": 1.3183274269104004, "log_odds_ratio": -0.26364922523498535, "logits/chosen": -0.6094043850898743, "logits/rejected": -1.3630642890930176, "logps/chosen": -1.4483036994934082, "logps/rejected": -2.5834264755249023, "loss": 1.479, "nll_loss": 1.4525939226150513, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448303610086441, "rewards/margins": 0.1135122999548912, "rewards/rejected": -0.2583426833152771, "step": 784 }, { "epoch": 1.2364066193853427, "grad_norm": 0.21961982548236847, "learning_rate": 1.5343120347987172e-06, "log_odds_chosen": 1.3260589838027954, "log_odds_ratio": -0.23876364529132843, "logits/chosen": -0.5705952048301697, "logits/rejected": -1.3521499633789062, "logps/chosen": -1.4333276748657227, "logps/rejected": -2.5668129920959473, "loss": 1.478, "nll_loss": 1.4541019201278687, "rewards/accuracies": 1.0, "rewards/chosen": -0.14333274960517883, "rewards/margins": 0.1133485734462738, "rewards/rejected": -0.25668132305145264, "step": 785 }, { "epoch": 1.2379826635145785, "grad_norm": 0.21360503137111664, "learning_rate": 1.5289677932753398e-06, "log_odds_chosen": 1.0597894191741943, "log_odds_ratio": -0.3073855936527252, "logits/chosen": -0.5851569175720215, "logits/rejected": -1.1548525094985962, "logps/chosen": -1.4631526470184326, "logps/rejected": -2.3595664501190186, "loss": 1.4975, "nll_loss": 1.4667913913726807, "rewards/accuracies": 1.0, "rewards/chosen": -0.14631526172161102, "rewards/margins": 0.08964138478040695, "rewards/rejected": -0.23595665395259857, "step": 786 }, { "epoch": 1.239558707643814, "grad_norm": 0.21199171245098114, "learning_rate": 1.5236271101753017e-06, "log_odds_chosen": 1.279382348060608, "log_odds_ratio": -0.24772295355796814, "logits/chosen": -0.6358316540718079, "logits/rejected": -1.3125535249710083, "logps/chosen": -1.4651093482971191, "logps/rejected": -2.559737205505371, "loss": 1.4811, "nll_loss": 1.456289291381836, "rewards/accuracies": 1.0, "rewards/chosen": -0.14651094377040863, "rewards/margins": 0.10946279764175415, "rewards/rejected": -0.2559737265110016, "step": 787 }, { "epoch": 1.2411347517730495, "grad_norm": 0.2220183163881302, "learning_rate": 1.5182900258449135e-06, "log_odds_chosen": 1.388107419013977, "log_odds_ratio": -0.22697725892066956, "logits/chosen": -0.5791980028152466, "logits/rejected": -1.3052403926849365, "logps/chosen": -1.4486891031265259, "logps/rejected": -2.643555164337158, "loss": 1.4776, "nll_loss": 1.454932451248169, "rewards/accuracies": 1.0, "rewards/chosen": -0.1448688954114914, "rewards/margins": 0.11948662251234055, "rewards/rejected": -0.26435554027557373, "step": 788 }, { "epoch": 1.2427107959022852, "grad_norm": 0.21619641780853271, "learning_rate": 1.5129565806032986e-06, "log_odds_chosen": 1.3488595485687256, "log_odds_ratio": -0.2432767152786255, "logits/chosen": -0.6071189641952515, "logits/rejected": -1.2094124555587769, "logps/chosen": -1.3328895568847656, "logps/rejected": -2.467470407485962, "loss": 1.379, "nll_loss": 1.3546922206878662, "rewards/accuracies": 1.0, "rewards/chosen": -0.13328896462917328, "rewards/margins": 0.1134580746293068, "rewards/rejected": -0.24674703180789948, "step": 789 }, { "epoch": 1.2442868400315208, "grad_norm": 0.2596001923084259, "learning_rate": 1.507626814742087e-06, "log_odds_chosen": 1.147154688835144, "log_odds_ratio": -0.2837839126586914, "logits/chosen": -0.6274202466011047, "logits/rejected": -1.1922141313552856, "logps/chosen": -1.5136209726333618, "logps/rejected": -2.4962592124938965, "loss": 1.5492, "nll_loss": 1.52080237865448, "rewards/accuracies": 1.0, "rewards/chosen": -0.1513621062040329, "rewards/margins": 0.09826381504535675, "rewards/rejected": -0.24962592124938965, "step": 790 }, { "epoch": 1.2458628841607564, "grad_norm": 0.2129853367805481, "learning_rate": 1.502300768525115e-06, "log_odds_chosen": 1.277753233909607, "log_odds_ratio": -0.266977995634079, "logits/chosen": -0.5967295169830322, "logits/rejected": -1.2391985654830933, "logps/chosen": -1.5308218002319336, "logps/rejected": -2.6411221027374268, "loss": 1.5317, "nll_loss": 1.5049808025360107, "rewards/accuracies": 1.0, "rewards/chosen": -0.15308216214179993, "rewards/margins": 0.11103005707263947, "rewards/rejected": -0.2641122341156006, "step": 791 }, { "epoch": 1.247438928289992, "grad_norm": 0.2184506505727768, "learning_rate": 1.4969784821881177e-06, "log_odds_chosen": 1.1175075769424438, "log_odds_ratio": -0.2925605773925781, "logits/chosen": -0.5976652503013611, "logits/rejected": -1.0554449558258057, "logps/chosen": -1.3985931873321533, "logps/rejected": -2.333056926727295, "loss": 1.4526, "nll_loss": 1.423383116722107, "rewards/accuracies": 1.0, "rewards/chosen": -0.13985933363437653, "rewards/margins": 0.09344638139009476, "rewards/rejected": -0.23330573737621307, "step": 792 }, { "epoch": 1.2490149724192277, "grad_norm": 0.22777415812015533, "learning_rate": 1.4916599959384262e-06, "log_odds_chosen": 1.0911645889282227, "log_odds_ratio": -0.29374760389328003, "logits/chosen": -0.6116424798965454, "logits/rejected": -1.263769268989563, "logps/chosen": -1.4797879457473755, "logps/rejected": -2.4067490100860596, "loss": 1.5037, "nll_loss": 1.4743411540985107, "rewards/accuracies": 1.0, "rewards/chosen": -0.1479787975549698, "rewards/margins": 0.09269611537456512, "rewards/rejected": -0.2406749129295349, "step": 793 }, { "epoch": 1.2505910165484633, "grad_norm": 0.24789996445178986, "learning_rate": 1.4863453499546643e-06, "log_odds_chosen": 1.3286534547805786, "log_odds_ratio": -0.24497836828231812, "logits/chosen": -0.572461724281311, "logits/rejected": -1.1910439729690552, "logps/chosen": -1.4105679988861084, "logps/rejected": -2.5433671474456787, "loss": 1.4408, "nll_loss": 1.4162709712982178, "rewards/accuracies": 1.0, "rewards/chosen": -0.14105680584907532, "rewards/margins": 0.11327990889549255, "rewards/rejected": -0.25433671474456787, "step": 794 }, { "epoch": 1.252167060677699, "grad_norm": 0.2219705879688263, "learning_rate": 1.4810345843864427e-06, "log_odds_chosen": 1.2198662757873535, "log_odds_ratio": -0.26821431517601013, "logits/chosen": -0.6375918388366699, "logits/rejected": -1.1806565523147583, "logps/chosen": -1.4773579835891724, "logps/rejected": -2.51969051361084, "loss": 1.5094, "nll_loss": 1.482530117034912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1477358043193817, "rewards/margins": 0.1042332723736763, "rewards/rejected": -0.2519690692424774, "step": 795 }, { "epoch": 1.2537431048069345, "grad_norm": 0.2212211936712265, "learning_rate": 1.4757277393540598e-06, "log_odds_chosen": 1.235634446144104, "log_odds_ratio": -0.2696811556816101, "logits/chosen": -0.5857970714569092, "logits/rejected": -1.0587821006774902, "logps/chosen": -1.455737590789795, "logps/rejected": -2.5133907794952393, "loss": 1.4956, "nll_loss": 1.468680739402771, "rewards/accuracies": 1.0, "rewards/chosen": -0.14557376503944397, "rewards/margins": 0.10576532781124115, "rewards/rejected": -0.2513391077518463, "step": 796 }, { "epoch": 1.2553191489361701, "grad_norm": 0.22049546241760254, "learning_rate": 1.4704248549481946e-06, "log_odds_chosen": 1.1177611351013184, "log_odds_ratio": -0.2930131256580353, "logits/chosen": -0.6071667671203613, "logits/rejected": -1.348658800125122, "logps/chosen": -1.5226861238479614, "logps/rejected": -2.484973907470703, "loss": 1.5459, "nll_loss": 1.5165510177612305, "rewards/accuracies": 1.0, "rewards/chosen": -0.15226861834526062, "rewards/margins": 0.09622877836227417, "rewards/rejected": -0.2484973967075348, "step": 797 }, { "epoch": 1.2568951930654058, "grad_norm": 0.20678599178791046, "learning_rate": 1.4651259712296063e-06, "log_odds_chosen": 1.398108959197998, "log_odds_ratio": -0.2260427176952362, "logits/chosen": -0.5228291749954224, "logits/rejected": -1.2606028318405151, "logps/chosen": -1.495984673500061, "logps/rejected": -2.7078545093536377, "loss": 1.5162, "nll_loss": 1.4935977458953857, "rewards/accuracies": 1.0, "rewards/chosen": -0.14959846436977386, "rewards/margins": 0.12118701636791229, "rewards/rejected": -0.27078545093536377, "step": 798 }, { "epoch": 1.2584712371946414, "grad_norm": 0.21311365067958832, "learning_rate": 1.45983112822883e-06, "log_odds_chosen": 1.1702497005462646, "log_odds_ratio": -0.2745209336280823, "logits/chosen": -0.49887627363204956, "logits/rejected": -1.0869661569595337, "logps/chosen": -1.4164506196975708, "logps/rejected": -2.4050211906433105, "loss": 1.4519, "nll_loss": 1.4244639873504639, "rewards/accuracies": 1.0, "rewards/chosen": -0.14164507389068604, "rewards/margins": 0.09885703772306442, "rewards/rejected": -0.24050211906433105, "step": 799 }, { "epoch": 1.260047281323877, "grad_norm": 0.22688302397727966, "learning_rate": 1.4545403659458756e-06, "log_odds_chosen": 1.226530909538269, "log_odds_ratio": -0.2663864493370056, "logits/chosen": -0.6176812052726746, "logits/rejected": -1.111179232597351, "logps/chosen": -1.4274733066558838, "logps/rejected": -2.4704573154449463, "loss": 1.4615, "nll_loss": 1.4348269701004028, "rewards/accuracies": 1.0, "rewards/chosen": -0.14274734258651733, "rewards/margins": 0.10429838299751282, "rewards/rejected": -0.24704572558403015, "step": 800 }, { "epoch": 1.2616233254531126, "grad_norm": 0.21282625198364258, "learning_rate": 1.4492537243499253e-06, "log_odds_chosen": 1.2974098920822144, "log_odds_ratio": -0.24690502882003784, "logits/chosen": -0.6217365264892578, "logits/rejected": -1.1915017366409302, "logps/chosen": -1.4191696643829346, "logps/rejected": -2.516692876815796, "loss": 1.4356, "nll_loss": 1.4108633995056152, "rewards/accuracies": 1.0, "rewards/chosen": -0.14191697537899017, "rewards/margins": 0.10975231230258942, "rewards/rejected": -0.2516692578792572, "step": 801 }, { "epoch": 1.2631993695823482, "grad_norm": 0.21096579730510712, "learning_rate": 1.443971243379031e-06, "log_odds_chosen": 1.2948417663574219, "log_odds_ratio": -0.2565336525440216, "logits/chosen": -0.5444697141647339, "logits/rejected": -1.1919379234313965, "logps/chosen": -1.4546860456466675, "logps/rejected": -2.562107801437378, "loss": 1.4784, "nll_loss": 1.4527922868728638, "rewards/accuracies": 1.0, "rewards/chosen": -0.145468607544899, "rewards/margins": 0.11074218153953552, "rewards/rejected": -0.2562108039855957, "step": 802 }, { "epoch": 1.2647754137115839, "grad_norm": 0.21897241473197937, "learning_rate": 1.4386929629398144e-06, "log_odds_chosen": 1.4477595090866089, "log_odds_ratio": -0.21539321541786194, "logits/chosen": -0.5912661552429199, "logits/rejected": -1.306860327720642, "logps/chosen": -1.4861226081848145, "logps/rejected": -2.7430782318115234, "loss": 1.5058, "nll_loss": 1.4842352867126465, "rewards/accuracies": 1.0, "rewards/chosen": -0.14861226081848145, "rewards/margins": 0.12569554150104523, "rewards/rejected": -0.27430781722068787, "step": 803 }, { "epoch": 1.2663514578408195, "grad_norm": 0.20164766907691956, "learning_rate": 1.4334189229071614e-06, "log_odds_chosen": 1.2101818323135376, "log_odds_ratio": -0.2757173776626587, "logits/chosen": -0.6016882061958313, "logits/rejected": -1.1531254053115845, "logps/chosen": -1.3826367855072021, "logps/rejected": -2.4040839672088623, "loss": 1.4133, "nll_loss": 1.3857529163360596, "rewards/accuracies": 1.0, "rewards/chosen": -0.13826368749141693, "rewards/margins": 0.10214471071958542, "rewards/rejected": -0.24040839076042175, "step": 804 }, { "epoch": 1.267927501970055, "grad_norm": 0.2569519877433777, "learning_rate": 1.4281491631239263e-06, "log_odds_chosen": 1.034188985824585, "log_odds_ratio": -0.3123238682746887, "logits/chosen": -0.6481592655181885, "logits/rejected": -1.2144945859909058, "logps/chosen": -1.5199673175811768, "logps/rejected": -2.4049673080444336, "loss": 1.5348, "nll_loss": 1.5035518407821655, "rewards/accuracies": 1.0, "rewards/chosen": -0.15199674665927887, "rewards/margins": 0.088500015437603, "rewards/rejected": -0.24049675464630127, "step": 805 }, { "epoch": 1.2695035460992907, "grad_norm": 0.2255389392375946, "learning_rate": 1.4228837234006272e-06, "log_odds_chosen": 1.3796045780181885, "log_odds_ratio": -0.2318534255027771, "logits/chosen": -0.6522822380065918, "logits/rejected": -1.1650481224060059, "logps/chosen": -1.389268398284912, "logps/rejected": -2.566591262817383, "loss": 1.4413, "nll_loss": 1.4181089401245117, "rewards/accuracies": 1.0, "rewards/chosen": -0.13892683386802673, "rewards/margins": 0.11773229390382767, "rewards/rejected": -0.2566591203212738, "step": 806 }, { "epoch": 1.2710795902285263, "grad_norm": 0.2232556939125061, "learning_rate": 1.4176226435151462e-06, "log_odds_chosen": 1.2342092990875244, "log_odds_ratio": -0.26956236362457275, "logits/chosen": -0.6295043230056763, "logits/rejected": -1.3596892356872559, "logps/chosen": -1.4895908832550049, "logps/rejected": -2.5549004077911377, "loss": 1.5014, "nll_loss": 1.4744771718978882, "rewards/accuracies": 1.0, "rewards/chosen": -0.14895908534526825, "rewards/margins": 0.10653094947338104, "rewards/rejected": -0.2554900348186493, "step": 807 }, { "epoch": 1.272655634357762, "grad_norm": 0.20261798799037933, "learning_rate": 1.4123659632124298e-06, "log_odds_chosen": 1.3622705936431885, "log_odds_ratio": -0.23639526963233948, "logits/chosen": -0.6416347026824951, "logits/rejected": -1.3288860321044922, "logps/chosen": -1.4526416063308716, "logps/rejected": -2.6243927478790283, "loss": 1.4835, "nll_loss": 1.4598387479782104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1452641636133194, "rewards/margins": 0.11717512458562851, "rewards/rejected": -0.2624392807483673, "step": 808 }, { "epoch": 1.2742316784869976, "grad_norm": 0.2093254178762436, "learning_rate": 1.4071137222041852e-06, "log_odds_chosen": 1.363694190979004, "log_odds_ratio": -0.2363823652267456, "logits/chosen": -0.5269907712936401, "logits/rejected": -1.409184455871582, "logps/chosen": -1.5035203695297241, "logps/rejected": -2.685701608657837, "loss": 1.5248, "nll_loss": 1.5011268854141235, "rewards/accuracies": 1.0, "rewards/chosen": -0.15035203099250793, "rewards/margins": 0.11821816861629486, "rewards/rejected": -0.268570214509964, "step": 809 }, { "epoch": 1.2758077226162332, "grad_norm": 0.2128390371799469, "learning_rate": 1.4018659601685857e-06, "log_odds_chosen": 1.1988935470581055, "log_odds_ratio": -0.270069420337677, "logits/chosen": -0.5626112818717957, "logits/rejected": -1.2920762300491333, "logps/chosen": -1.489626169204712, "logps/rejected": -2.519320487976074, "loss": 1.5114, "nll_loss": 1.484366774559021, "rewards/accuracies": 1.0, "rewards/chosen": -0.1489626169204712, "rewards/margins": 0.10296941548585892, "rewards/rejected": -0.2519320249557495, "step": 810 }, { "epoch": 1.2773837667454688, "grad_norm": 0.24874334037303925, "learning_rate": 1.3966227167499667e-06, "log_odds_chosen": 1.1731008291244507, "log_odds_ratio": -0.2739385962486267, "logits/chosen": -0.6401690244674683, "logits/rejected": -1.162697434425354, "logps/chosen": -1.5324712991714478, "logps/rejected": -2.5428264141082764, "loss": 1.5491, "nll_loss": 1.5216964483261108, "rewards/accuracies": 1.0, "rewards/chosen": -0.15324713289737701, "rewards/margins": 0.10103552043437958, "rewards/rejected": -0.2542826533317566, "step": 811 }, { "epoch": 1.2789598108747045, "grad_norm": 0.21829232573509216, "learning_rate": 1.3913840315585277e-06, "log_odds_chosen": 1.1419519186019897, "log_odds_ratio": -0.277925044298172, "logits/chosen": -0.5999048948287964, "logits/rejected": -1.359046220779419, "logps/chosen": -1.4545201063156128, "logps/rejected": -2.4232535362243652, "loss": 1.4862, "nll_loss": 1.4583704471588135, "rewards/accuracies": 1.0, "rewards/chosen": -0.14545200765132904, "rewards/margins": 0.09687335044145584, "rewards/rejected": -0.24232535064220428, "step": 812 }, { "epoch": 1.28053585500394, "grad_norm": 0.2484036386013031, "learning_rate": 1.3861499441700337e-06, "log_odds_chosen": 1.4458749294281006, "log_odds_ratio": -0.2213050276041031, "logits/chosen": -0.7367854118347168, "logits/rejected": -1.3375928401947021, "logps/chosen": -1.5378611087799072, "logps/rejected": -2.8061869144439697, "loss": 1.5569, "nll_loss": 1.5347464084625244, "rewards/accuracies": 1.0, "rewards/chosen": -0.15378612279891968, "rewards/margins": 0.12683258950710297, "rewards/rejected": -0.28061872720718384, "step": 813 }, { "epoch": 1.2821118991331757, "grad_norm": 0.22845400869846344, "learning_rate": 1.3809204941255144e-06, "log_odds_chosen": 1.4313955307006836, "log_odds_ratio": -0.22132830321788788, "logits/chosen": -0.657297670841217, "logits/rejected": -1.303873896598816, "logps/chosen": -1.4194843769073486, "logps/rejected": -2.647003173828125, "loss": 1.4471, "nll_loss": 1.4249849319458008, "rewards/accuracies": 1.0, "rewards/chosen": -0.14194843173027039, "rewards/margins": 0.1227518767118454, "rewards/rejected": -0.264700323343277, "step": 814 }, { "epoch": 1.2836879432624113, "grad_norm": 0.2106010466814041, "learning_rate": 1.3756957209309667e-06, "log_odds_chosen": 1.5038448572158813, "log_odds_ratio": -0.21202105283737183, "logits/chosen": -0.639805018901825, "logits/rejected": -1.4728705883026123, "logps/chosen": -1.4671958684921265, "logps/rejected": -2.772200345993042, "loss": 1.4959, "nll_loss": 1.4747065305709839, "rewards/accuracies": 1.0, "rewards/chosen": -0.14671958982944489, "rewards/margins": 0.13050046563148499, "rewards/rejected": -0.2772200107574463, "step": 815 }, { "epoch": 1.285263987391647, "grad_norm": 0.24912430346012115, "learning_rate": 1.3704756640570575e-06, "log_odds_chosen": 1.5811009407043457, "log_odds_ratio": -0.2080104649066925, "logits/chosen": -0.6791413426399231, "logits/rejected": -1.1796469688415527, "logps/chosen": -1.4295443296432495, "logps/rejected": -2.8001911640167236, "loss": 1.4561, "nll_loss": 1.4352586269378662, "rewards/accuracies": 1.0, "rewards/chosen": -0.14295443892478943, "rewards/margins": 0.13706468045711517, "rewards/rejected": -0.2800191342830658, "step": 816 }, { "epoch": 1.2868400315208826, "grad_norm": 0.2279585599899292, "learning_rate": 1.3652603629388224e-06, "log_odds_chosen": 1.3864128589630127, "log_odds_ratio": -0.24498578906059265, "logits/chosen": -0.6164575815200806, "logits/rejected": -1.2485588788986206, "logps/chosen": -1.3957561254501343, "logps/rejected": -2.574767589569092, "loss": 1.4205, "nll_loss": 1.395990252494812, "rewards/accuracies": 1.0, "rewards/chosen": -0.13957563042640686, "rewards/margins": 0.11790116131305695, "rewards/rejected": -0.2574767768383026, "step": 817 }, { "epoch": 1.2884160756501182, "grad_norm": 0.2900654077529907, "learning_rate": 1.3600498569753715e-06, "log_odds_chosen": 1.1914547681808472, "log_odds_ratio": -0.2667907476425171, "logits/chosen": -0.4763542711734772, "logits/rejected": -1.0976914167404175, "logps/chosen": -1.4544169902801514, "logps/rejected": -2.46539044380188, "loss": 1.4881, "nll_loss": 1.4614025354385376, "rewards/accuracies": 1.0, "rewards/chosen": -0.1454417109489441, "rewards/margins": 0.10109736025333405, "rewards/rejected": -0.24653908610343933, "step": 818 }, { "epoch": 1.2899921197793538, "grad_norm": 0.2253255397081375, "learning_rate": 1.3548441855295872e-06, "log_odds_chosen": 1.212156057357788, "log_odds_ratio": -0.26494577527046204, "logits/chosen": -0.618108868598938, "logits/rejected": -1.415005087852478, "logps/chosen": -1.5244392156600952, "logps/rejected": -2.570408582687378, "loss": 1.5471, "nll_loss": 1.5205867290496826, "rewards/accuracies": 1.0, "rewards/chosen": -0.15244391560554504, "rewards/margins": 0.10459694266319275, "rewards/rejected": -0.2570408582687378, "step": 819 }, { "epoch": 1.2915681639085894, "grad_norm": 0.2325769066810608, "learning_rate": 1.3496433879278315e-06, "log_odds_chosen": 1.2832176685333252, "log_odds_ratio": -0.2645808756351471, "logits/chosen": -0.586574375629425, "logits/rejected": -1.2879778146743774, "logps/chosen": -1.5669478178024292, "logps/rejected": -2.690969944000244, "loss": 1.5776, "nll_loss": 1.5511255264282227, "rewards/accuracies": 1.0, "rewards/chosen": -0.15669478476047516, "rewards/margins": 0.11240223795175552, "rewards/rejected": -0.2690970301628113, "step": 820 }, { "epoch": 1.293144208037825, "grad_norm": 0.25114527344703674, "learning_rate": 1.3444475034596463e-06, "log_odds_chosen": 1.491232991218567, "log_odds_ratio": -0.20878681540489197, "logits/chosen": -0.6662592887878418, "logits/rejected": -1.2903168201446533, "logps/chosen": -1.4409993886947632, "logps/rejected": -2.7317893505096436, "loss": 1.4613, "nll_loss": 1.4404513835906982, "rewards/accuracies": 1.0, "rewards/chosen": -0.14409995079040527, "rewards/margins": 0.12907901406288147, "rewards/rejected": -0.27317896485328674, "step": 821 }, { "epoch": 1.2947202521670607, "grad_norm": 0.22515331208705902, "learning_rate": 1.3392565713774575e-06, "log_odds_chosen": 1.5715899467468262, "log_odds_ratio": -0.1938280314207077, "logits/chosen": -0.6638049483299255, "logits/rejected": -1.4696121215820312, "logps/chosen": -1.4550765752792358, "logps/rejected": -2.821686267852783, "loss": 1.4875, "nll_loss": 1.4680683612823486, "rewards/accuracies": 1.0, "rewards/chosen": -0.14550766348838806, "rewards/margins": 0.13666100800037384, "rewards/rejected": -0.2821686565876007, "step": 822 }, { "epoch": 1.2962962962962963, "grad_norm": 0.2394685000181198, "learning_rate": 1.3340706308962763e-06, "log_odds_chosen": 1.313214659690857, "log_odds_ratio": -0.25101637840270996, "logits/chosen": -0.6359928250312805, "logits/rejected": -1.0666664838790894, "logps/chosen": -1.4218816757202148, "logps/rejected": -2.5398035049438477, "loss": 1.4519, "nll_loss": 1.4267902374267578, "rewards/accuracies": 1.0, "rewards/chosen": -0.1421881765127182, "rewards/margins": 0.11179221421480179, "rewards/rejected": -0.2539803981781006, "step": 823 }, { "epoch": 1.297872340425532, "grad_norm": 0.2412787675857544, "learning_rate": 1.3288897211934066e-06, "log_odds_chosen": 1.3031283617019653, "log_odds_ratio": -0.254698783159256, "logits/chosen": -0.6228452324867249, "logits/rejected": -1.041465401649475, "logps/chosen": -1.4170979261398315, "logps/rejected": -2.5273680686950684, "loss": 1.4423, "nll_loss": 1.416801929473877, "rewards/accuracies": 1.0, "rewards/chosen": -0.14170978963375092, "rewards/margins": 0.11102700978517532, "rewards/rejected": -0.25273680686950684, "step": 824 }, { "epoch": 1.2994483845547675, "grad_norm": 0.21021676063537598, "learning_rate": 1.323713881408147e-06, "log_odds_chosen": 1.164210557937622, "log_odds_ratio": -0.2797536849975586, "logits/chosen": -0.5995229482650757, "logits/rejected": -1.2917746305465698, "logps/chosen": -1.469295620918274, "logps/rejected": -2.463841199874878, "loss": 1.4925, "nll_loss": 1.464564323425293, "rewards/accuracies": 1.0, "rewards/chosen": -0.1469295620918274, "rewards/margins": 0.09945456683635712, "rewards/rejected": -0.2463841289281845, "step": 825 }, { "epoch": 1.3010244286840031, "grad_norm": 0.20562396943569183, "learning_rate": 1.3185431506414943e-06, "log_odds_chosen": 1.3276572227478027, "log_odds_ratio": -0.24276313185691833, "logits/chosen": -0.6386169195175171, "logits/rejected": -1.313894510269165, "logps/chosen": -1.3626242876052856, "logps/rejected": -2.4853250980377197, "loss": 1.3922, "nll_loss": 1.3679537773132324, "rewards/accuracies": 1.0, "rewards/chosen": -0.1362624168395996, "rewards/margins": 0.11227010190486908, "rewards/rejected": -0.24853253364562988, "step": 826 }, { "epoch": 1.3026004728132388, "grad_norm": 0.24511098861694336, "learning_rate": 1.313377567955851e-06, "log_odds_chosen": 1.0994113683700562, "log_odds_ratio": -0.3038662374019623, "logits/chosen": -0.6211011409759521, "logits/rejected": -1.17819082736969, "logps/chosen": -1.4937620162963867, "logps/rejected": -2.435581684112549, "loss": 1.513, "nll_loss": 1.482654094696045, "rewards/accuracies": 1.0, "rewards/chosen": -0.14937619864940643, "rewards/margins": 0.09418196976184845, "rewards/rejected": -0.24355816841125488, "step": 827 }, { "epoch": 1.3041765169424744, "grad_norm": 0.23530146479606628, "learning_rate": 1.3082171723747257e-06, "log_odds_chosen": 1.1812208890914917, "log_odds_ratio": -0.2834025025367737, "logits/chosen": -0.7477203011512756, "logits/rejected": -1.2340842485427856, "logps/chosen": -1.4443615674972534, "logps/rejected": -2.4478816986083984, "loss": 1.4732, "nll_loss": 1.4449094533920288, "rewards/accuracies": 1.0, "rewards/chosen": -0.14443616569042206, "rewards/margins": 0.10035203397274017, "rewards/rejected": -0.24478819966316223, "step": 828 }, { "epoch": 1.30575256107171, "grad_norm": 0.2564273476600647, "learning_rate": 1.3030620028824424e-06, "log_odds_chosen": 1.152633547782898, "log_odds_ratio": -0.28096577525138855, "logits/chosen": -0.6377764940261841, "logits/rejected": -1.1922519207000732, "logps/chosen": -1.462409496307373, "logps/rejected": -2.4399070739746094, "loss": 1.4943, "nll_loss": 1.4662330150604248, "rewards/accuracies": 1.0, "rewards/chosen": -0.1462409496307373, "rewards/margins": 0.09774976968765259, "rewards/rejected": -0.2439907193183899, "step": 829 }, { "epoch": 1.3073286052009456, "grad_norm": 0.22426116466522217, "learning_rate": 1.2979120984238449e-06, "log_odds_chosen": 1.1866298913955688, "log_odds_ratio": -0.2738218307495117, "logits/chosen": -0.6474897861480713, "logits/rejected": -1.1808933019638062, "logps/chosen": -1.5059709548950195, "logps/rejected": -2.5248422622680664, "loss": 1.5311, "nll_loss": 1.5037049055099487, "rewards/accuracies": 1.0, "rewards/chosen": -0.15059709548950195, "rewards/margins": 0.10188712924718857, "rewards/rejected": -0.2524842321872711, "step": 830 }, { "epoch": 1.3089046493301812, "grad_norm": 0.2413811832666397, "learning_rate": 1.2927674979040009e-06, "log_odds_chosen": 1.2341867685317993, "log_odds_ratio": -0.25936102867126465, "logits/chosen": -0.5372585654258728, "logits/rejected": -1.1181827783584595, "logps/chosen": -1.3933722972869873, "logps/rejected": -2.4324631690979004, "loss": 1.4411, "nll_loss": 1.4151148796081543, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393372267484665, "rewards/margins": 0.10390909016132355, "rewards/rejected": -0.24324631690979004, "step": 831 }, { "epoch": 1.3104806934594169, "grad_norm": 0.20916147530078888, "learning_rate": 1.2876282401879106e-06, "log_odds_chosen": 1.3919665813446045, "log_odds_ratio": -0.22659431397914886, "logits/chosen": -0.5827940702438354, "logits/rejected": -1.230362892150879, "logps/chosen": -1.4287047386169434, "logps/rejected": -2.623344659805298, "loss": 1.4609, "nll_loss": 1.4382367134094238, "rewards/accuracies": 1.0, "rewards/chosen": -0.1428704708814621, "rewards/margins": 0.1194639727473259, "rewards/rejected": -0.2623344659805298, "step": 832 }, { "epoch": 1.3120567375886525, "grad_norm": 0.2328689694404602, "learning_rate": 1.2824943641002115e-06, "log_odds_chosen": 1.4374995231628418, "log_odds_ratio": -0.21854473650455475, "logits/chosen": -0.5202988386154175, "logits/rejected": -1.4040541648864746, "logps/chosen": -1.4226146936416626, "logps/rejected": -2.65875244140625, "loss": 1.4532, "nll_loss": 1.431359887123108, "rewards/accuracies": 1.0, "rewards/chosen": -0.14226147532463074, "rewards/margins": 0.12361378967761993, "rewards/rejected": -0.2658752501010895, "step": 833 }, { "epoch": 1.313632781717888, "grad_norm": 0.26447591185569763, "learning_rate": 1.2773659084248845e-06, "log_odds_chosen": 1.588423728942871, "log_odds_ratio": -0.2112349569797516, "logits/chosen": -0.7431791424751282, "logits/rejected": -1.3696362972259521, "logps/chosen": -1.374894142150879, "logps/rejected": -2.7474727630615234, "loss": 1.4004, "nll_loss": 1.3793179988861084, "rewards/accuracies": 1.0, "rewards/chosen": -0.1374894231557846, "rewards/margins": 0.1372578740119934, "rewards/rejected": -0.2747472822666168, "step": 834 }, { "epoch": 1.3152088258471237, "grad_norm": 0.21846246719360352, "learning_rate": 1.2722429119049632e-06, "log_odds_chosen": 1.09127676486969, "log_odds_ratio": -0.3059004545211792, "logits/chosen": -0.549738347530365, "logits/rejected": -1.1896039247512817, "logps/chosen": -1.5059549808502197, "logps/rejected": -2.442054510116577, "loss": 1.5398, "nll_loss": 1.5091662406921387, "rewards/accuracies": 1.0, "rewards/chosen": -0.1505955010652542, "rewards/margins": 0.09360997378826141, "rewards/rejected": -0.24420545995235443, "step": 835 }, { "epoch": 1.3167848699763594, "grad_norm": 0.22426699101924896, "learning_rate": 1.267125413242239e-06, "log_odds_chosen": 1.3592300415039062, "log_odds_ratio": -0.24118666350841522, "logits/chosen": -0.6401463747024536, "logits/rejected": -1.1762772798538208, "logps/chosen": -1.3827054500579834, "logps/rejected": -2.538344144821167, "loss": 1.422, "nll_loss": 1.3978909254074097, "rewards/accuracies": 1.0, "rewards/chosen": -0.1382705420255661, "rewards/margins": 0.11556386947631836, "rewards/rejected": -0.25383439660072327, "step": 836 }, { "epoch": 1.318360914105595, "grad_norm": 0.24560320377349854, "learning_rate": 1.2620134510969719e-06, "log_odds_chosen": 1.2600908279418945, "log_odds_ratio": -0.2642902433872223, "logits/chosen": -0.6954754590988159, "logits/rejected": -1.0778411626815796, "logps/chosen": -1.3690192699432373, "logps/rejected": -2.4331064224243164, "loss": 1.4094, "nll_loss": 1.3830010890960693, "rewards/accuracies": 1.0, "rewards/chosen": -0.13690192997455597, "rewards/margins": 0.10640871524810791, "rewards/rejected": -0.24331064522266388, "step": 837 }, { "epoch": 1.3199369582348306, "grad_norm": 0.2238793969154358, "learning_rate": 1.2569070640875912e-06, "log_odds_chosen": 1.4926786422729492, "log_odds_ratio": -0.23559126257896423, "logits/chosen": -0.5361714959144592, "logits/rejected": -1.1999014616012573, "logps/chosen": -1.3522824048995972, "logps/rejected": -2.5953502655029297, "loss": 1.3956, "nll_loss": 1.3720366954803467, "rewards/accuracies": 1.0, "rewards/chosen": -0.1352282464504242, "rewards/margins": 0.12430679798126221, "rewards/rejected": -0.2595350444316864, "step": 838 }, { "epoch": 1.3215130023640662, "grad_norm": 0.24217304587364197, "learning_rate": 1.2518062907904136e-06, "log_odds_chosen": 1.5321965217590332, "log_odds_ratio": -0.21319562196731567, "logits/chosen": -0.6855440735816956, "logits/rejected": -1.3967851400375366, "logps/chosen": -1.4190380573272705, "logps/rejected": -2.7448883056640625, "loss": 1.4492, "nll_loss": 1.4278764724731445, "rewards/accuracies": 1.0, "rewards/chosen": -0.141903817653656, "rewards/margins": 0.13258501887321472, "rewards/rejected": -0.2744888365268707, "step": 839 }, { "epoch": 1.3230890464933018, "grad_norm": 0.22823341190814972, "learning_rate": 1.2467111697393446e-06, "log_odds_chosen": 1.3126676082611084, "log_odds_ratio": -0.2455110400915146, "logits/chosen": -0.5909042358398438, "logits/rejected": -1.4499304294586182, "logps/chosen": -1.5280629396438599, "logps/rejected": -2.6682920455932617, "loss": 1.5503, "nll_loss": 1.5257198810577393, "rewards/accuracies": 1.0, "rewards/chosen": -0.15280629694461823, "rewards/margins": 0.11402291059494019, "rewards/rejected": -0.2668291926383972, "step": 840 }, { "epoch": 1.3246650906225375, "grad_norm": 0.22689875960350037, "learning_rate": 1.2416217394255905e-06, "log_odds_chosen": 1.1709719896316528, "log_odds_ratio": -0.27490392327308655, "logits/chosen": -0.5521284937858582, "logits/rejected": -1.1081751585006714, "logps/chosen": -1.3606747388839722, "logps/rejected": -2.3374969959259033, "loss": 1.4233, "nll_loss": 1.3958425521850586, "rewards/accuracies": 1.0, "rewards/chosen": -0.1360674649477005, "rewards/margins": 0.09768223762512207, "rewards/rejected": -0.23374973237514496, "step": 841 }, { "epoch": 1.326241134751773, "grad_norm": 0.24880895018577576, "learning_rate": 1.2365380382973669e-06, "log_odds_chosen": 1.105400800704956, "log_odds_ratio": -0.2960892617702484, "logits/chosen": -0.6185301542282104, "logits/rejected": -0.9958997964859009, "logps/chosen": -1.520857810974121, "logps/rejected": -2.4716732501983643, "loss": 1.5367, "nll_loss": 1.5070462226867676, "rewards/accuracies": 1.0, "rewards/chosen": -0.1520857959985733, "rewards/margins": 0.09508156776428223, "rewards/rejected": -0.24716736376285553, "step": 842 }, { "epoch": 1.3278171788810087, "grad_norm": 0.22568634152412415, "learning_rate": 1.2314601047596061e-06, "log_odds_chosen": 1.5309821367263794, "log_odds_ratio": -0.2005751132965088, "logits/chosen": -0.5617668628692627, "logits/rejected": -1.2538548707962036, "logps/chosen": -1.4406756162643433, "logps/rejected": -2.7666633129119873, "loss": 1.4557, "nll_loss": 1.4356415271759033, "rewards/accuracies": 1.0, "rewards/chosen": -0.14406757056713104, "rewards/margins": 0.13259875774383545, "rewards/rejected": -0.2766663432121277, "step": 843 }, { "epoch": 1.3293932230102443, "grad_norm": 0.2184203863143921, "learning_rate": 1.2263879771736713e-06, "log_odds_chosen": 1.1662484407424927, "log_odds_ratio": -0.2803865075111389, "logits/chosen": -0.6450378894805908, "logits/rejected": -1.3481870889663696, "logps/chosen": -1.4607118368148804, "logps/rejected": -2.45200252532959, "loss": 1.4906, "nll_loss": 1.4625518321990967, "rewards/accuracies": 1.0, "rewards/chosen": -0.1460711807012558, "rewards/margins": 0.0991290807723999, "rewards/rejected": -0.2452002465724945, "step": 844 }, { "epoch": 1.33096926713948, "grad_norm": 0.21399280428886414, "learning_rate": 1.2213216938570642e-06, "log_odds_chosen": 1.346273422241211, "log_odds_ratio": -0.2556445002555847, "logits/chosen": -0.6238068342208862, "logits/rejected": -1.2513978481292725, "logps/chosen": -1.38446044921875, "logps/rejected": -2.5230977535247803, "loss": 1.4286, "nll_loss": 1.403060793876648, "rewards/accuracies": 1.0, "rewards/chosen": -0.13844604790210724, "rewards/margins": 0.11386376619338989, "rewards/rejected": -0.25230979919433594, "step": 845 }, { "epoch": 1.3325453112687156, "grad_norm": 0.23077259957790375, "learning_rate": 1.2162612930831354e-06, "log_odds_chosen": 1.6726126670837402, "log_odds_ratio": -0.18770115077495575, "logits/chosen": -0.679032027721405, "logits/rejected": -1.3372013568878174, "logps/chosen": -1.2837086915969849, "logps/rejected": -2.7025535106658936, "loss": 1.325, "nll_loss": 1.3062329292297363, "rewards/accuracies": 1.0, "rewards/chosen": -0.12837088108062744, "rewards/margins": 0.14188450574874878, "rewards/rejected": -0.2702553868293762, "step": 846 }, { "epoch": 1.3341213553979512, "grad_norm": 0.22375719249248505, "learning_rate": 1.2112068130807949e-06, "log_odds_chosen": 1.716221809387207, "log_odds_ratio": -0.17406882345676422, "logits/chosen": -0.6283752918243408, "logits/rejected": -1.4763813018798828, "logps/chosen": -1.4325740337371826, "logps/rejected": -2.9346535205841064, "loss": 1.4424, "nll_loss": 1.4250414371490479, "rewards/accuracies": 1.0, "rewards/chosen": -0.14325739443302155, "rewards/margins": 0.15020796656608582, "rewards/rejected": -0.29346537590026855, "step": 847 }, { "epoch": 1.3356973995271868, "grad_norm": 0.23664440214633942, "learning_rate": 1.206158292034226e-06, "log_odds_chosen": 1.4216163158416748, "log_odds_ratio": -0.2219858169555664, "logits/chosen": -0.655118465423584, "logits/rejected": -1.259392499923706, "logps/chosen": -1.450413465499878, "logps/rejected": -2.6762232780456543, "loss": 1.4754, "nll_loss": 1.4532475471496582, "rewards/accuracies": 1.0, "rewards/chosen": -0.145041361451149, "rewards/margins": 0.12258099764585495, "rewards/rejected": -0.26762235164642334, "step": 848 }, { "epoch": 1.3372734436564224, "grad_norm": 0.2171028107404709, "learning_rate": 1.2011157680825928e-06, "log_odds_chosen": 1.5037569999694824, "log_odds_ratio": -0.21429608762264252, "logits/chosen": -0.5462362766265869, "logits/rejected": -1.3398933410644531, "logps/chosen": -1.3809682130813599, "logps/rejected": -2.6717283725738525, "loss": 1.4141, "nll_loss": 1.392699122428894, "rewards/accuracies": 1.0, "rewards/chosen": -0.13809683918952942, "rewards/margins": 0.1290760040283203, "rewards/rejected": -0.26717284321784973, "step": 849 }, { "epoch": 1.338849487785658, "grad_norm": 0.23693877458572388, "learning_rate": 1.1960792793197553e-06, "log_odds_chosen": 1.3994156122207642, "log_odds_ratio": -0.23651733994483948, "logits/chosen": -0.7159205675125122, "logits/rejected": -1.3087269067764282, "logps/chosen": -1.435623288154602, "logps/rejected": -2.6384575366973877, "loss": 1.4612, "nll_loss": 1.4375219345092773, "rewards/accuracies": 1.0, "rewards/chosen": -0.14356234669685364, "rewards/margins": 0.12028342485427856, "rewards/rejected": -0.2638457715511322, "step": 850 }, { "epoch": 1.3404255319148937, "grad_norm": 0.23495543003082275, "learning_rate": 1.1910488637939824e-06, "log_odds_chosen": 1.6415536403656006, "log_odds_ratio": -0.1799456775188446, "logits/chosen": -0.7626610398292542, "logits/rejected": -1.4606178998947144, "logps/chosen": -1.4732147455215454, "logps/rejected": -2.910473108291626, "loss": 1.4756, "nll_loss": 1.4575589895248413, "rewards/accuracies": 1.0, "rewards/chosen": -0.14732146263122559, "rewards/margins": 0.14372582733631134, "rewards/rejected": -0.2910473048686981, "step": 851 }, { "epoch": 1.3420015760441293, "grad_norm": 0.23265422880649567, "learning_rate": 1.1860245595076582e-06, "log_odds_chosen": 1.137512445449829, "log_odds_ratio": -0.2833695113658905, "logits/chosen": -0.5420858860015869, "logits/rejected": -1.2606393098831177, "logps/chosen": -1.513380527496338, "logps/rejected": -2.489454746246338, "loss": 1.518, "nll_loss": 1.4896390438079834, "rewards/accuracies": 1.0, "rewards/chosen": -0.15133805572986603, "rewards/margins": 0.09760741889476776, "rewards/rejected": -0.2489454746246338, "step": 852 }, { "epoch": 1.343577620173365, "grad_norm": 0.23471572995185852, "learning_rate": 1.1810064044170027e-06, "log_odds_chosen": 1.3799176216125488, "log_odds_ratio": -0.23571422696113586, "logits/chosen": -0.677787184715271, "logits/rejected": -1.3286588191986084, "logps/chosen": -1.3944374322891235, "logps/rejected": -2.5705387592315674, "loss": 1.4269, "nll_loss": 1.4033761024475098, "rewards/accuracies": 1.0, "rewards/chosen": -0.1394437700510025, "rewards/margins": 0.11761011928319931, "rewards/rejected": -0.2570538818836212, "step": 853 }, { "epoch": 1.3451536643026005, "grad_norm": 0.2248305231332779, "learning_rate": 1.1759944364317812e-06, "log_odds_chosen": 1.399195909500122, "log_odds_ratio": -0.23164813220500946, "logits/chosen": -0.6954638361930847, "logits/rejected": -1.3272819519042969, "logps/chosen": -1.454675316810608, "logps/rejected": -2.6616015434265137, "loss": 1.4738, "nll_loss": 1.4506566524505615, "rewards/accuracies": 1.0, "rewards/chosen": -0.14546753466129303, "rewards/margins": 0.12069262564182281, "rewards/rejected": -0.26616016030311584, "step": 854 }, { "epoch": 1.3467297084318361, "grad_norm": 0.22791528701782227, "learning_rate": 1.1709886934150172e-06, "log_odds_chosen": 1.2449368238449097, "log_odds_ratio": -0.26195237040519714, "logits/chosen": -0.5790132284164429, "logits/rejected": -1.1786377429962158, "logps/chosen": -1.4745280742645264, "logps/rejected": -2.5385427474975586, "loss": 1.5088, "nll_loss": 1.4826158285140991, "rewards/accuracies": 1.0, "rewards/chosen": -0.14745281636714935, "rewards/margins": 0.1064014732837677, "rewards/rejected": -0.25385427474975586, "step": 855 }, { "epoch": 1.3483057525610718, "grad_norm": 0.2516065537929535, "learning_rate": 1.1659892131827097e-06, "log_odds_chosen": 1.4126324653625488, "log_odds_ratio": -0.2332790195941925, "logits/chosen": -0.6840938329696655, "logits/rejected": -1.4321739673614502, "logps/chosen": -1.4983186721801758, "logps/rejected": -2.729335069656372, "loss": 1.5146, "nll_loss": 1.4912675619125366, "rewards/accuracies": 1.0, "rewards/chosen": -0.1498318761587143, "rewards/margins": 0.12310166656970978, "rewards/rejected": -0.2729335129261017, "step": 856 }, { "epoch": 1.3498817966903074, "grad_norm": 0.22173132002353668, "learning_rate": 1.1609960335035423e-06, "log_odds_chosen": 1.857784628868103, "log_odds_ratio": -0.15618272125720978, "logits/chosen": -0.6507941484451294, "logits/rejected": -1.464099645614624, "logps/chosen": -1.4839560985565186, "logps/rejected": -3.1259849071502686, "loss": 1.5014, "nll_loss": 1.4857747554779053, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483956128358841, "rewards/margins": 0.16420289874076843, "rewards/rejected": -0.31259849667549133, "step": 857 }, { "epoch": 1.351457840819543, "grad_norm": 0.23569224774837494, "learning_rate": 1.1560091920986028e-06, "log_odds_chosen": 1.5377146005630493, "log_odds_ratio": -0.20144188404083252, "logits/chosen": -0.6365929841995239, "logits/rejected": -1.3504419326782227, "logps/chosen": -1.4079368114471436, "logps/rejected": -2.733362913131714, "loss": 1.4403, "nll_loss": 1.4201616048812866, "rewards/accuracies": 1.0, "rewards/chosen": -0.14079368114471436, "rewards/margins": 0.13254259526729584, "rewards/rejected": -0.273336261510849, "step": 858 }, { "epoch": 1.3530338849487786, "grad_norm": 0.2383047342300415, "learning_rate": 1.1510287266410967e-06, "log_odds_chosen": 1.2335214614868164, "log_odds_ratio": -0.26210707426071167, "logits/chosen": -0.6658295392990112, "logits/rejected": -1.2150765657424927, "logps/chosen": -1.5179202556610107, "logps/rejected": -2.5849180221557617, "loss": 1.5371, "nll_loss": 1.5109120607376099, "rewards/accuracies": 1.0, "rewards/chosen": -0.1517920196056366, "rewards/margins": 0.10669977962970734, "rewards/rejected": -0.2584918141365051, "step": 859 }, { "epoch": 1.3546099290780143, "grad_norm": 0.22694183886051178, "learning_rate": 1.1460546747560616e-06, "log_odds_chosen": 1.6129558086395264, "log_odds_ratio": -0.1908552348613739, "logits/chosen": -0.6709847450256348, "logits/rejected": -1.4357986450195312, "logps/chosen": -1.449781060218811, "logps/rejected": -2.8573789596557617, "loss": 1.4608, "nll_loss": 1.4417297840118408, "rewards/accuracies": 1.0, "rewards/chosen": -0.1449781060218811, "rewards/margins": 0.14075979590415955, "rewards/rejected": -0.28573790192604065, "step": 860 }, { "epoch": 1.3561859732072499, "grad_norm": 0.22804084420204163, "learning_rate": 1.1410870740200839e-06, "log_odds_chosen": 1.4266773462295532, "log_odds_ratio": -0.23483175039291382, "logits/chosen": -0.6395347714424133, "logits/rejected": -1.3960647583007812, "logps/chosen": -1.4997472763061523, "logps/rejected": -2.74517822265625, "loss": 1.5151, "nll_loss": 1.4916000366210938, "rewards/accuracies": 1.0, "rewards/chosen": -0.1499747335910797, "rewards/margins": 0.12454307079315186, "rewards/rejected": -0.27451783418655396, "step": 861 }, { "epoch": 1.3577620173364855, "grad_norm": 0.2264339029788971, "learning_rate": 1.1361259619610138e-06, "log_odds_chosen": 1.504522442817688, "log_odds_ratio": -0.21264749765396118, "logits/chosen": -0.636001467704773, "logits/rejected": -1.4413343667984009, "logps/chosen": -1.4349030256271362, "logps/rejected": -2.731299877166748, "loss": 1.4661, "nll_loss": 1.4447886943817139, "rewards/accuracies": 1.0, "rewards/chosen": -0.14349031448364258, "rewards/margins": 0.12963968515396118, "rewards/rejected": -0.27312999963760376, "step": 862 }, { "epoch": 1.3593380614657211, "grad_norm": 0.2602965235710144, "learning_rate": 1.1311713760576834e-06, "log_odds_chosen": 1.4819672107696533, "log_odds_ratio": -0.21919040381908417, "logits/chosen": -0.774250328540802, "logits/rejected": -1.1287730932235718, "logps/chosen": -1.4118751287460327, "logps/rejected": -2.6844165325164795, "loss": 1.4224, "nll_loss": 1.4004489183425903, "rewards/accuracies": 1.0, "rewards/chosen": -0.14118753373622894, "rewards/margins": 0.12725412845611572, "rewards/rejected": -0.26844167709350586, "step": 863 }, { "epoch": 1.3609141055949567, "grad_norm": 0.22611349821090698, "learning_rate": 1.1262233537396228e-06, "log_odds_chosen": 1.6887205839157104, "log_odds_ratio": -0.1933886855840683, "logits/chosen": -0.6210219860076904, "logits/rejected": -1.3085074424743652, "logps/chosen": -1.3957865238189697, "logps/rejected": -2.8608171939849854, "loss": 1.4071, "nll_loss": 1.3877183198928833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1395786553621292, "rewards/margins": 0.1465030312538147, "rewards/rejected": -0.2860816717147827, "step": 864 }, { "epoch": 1.3624901497241924, "grad_norm": 0.22818666696548462, "learning_rate": 1.1212819323867778e-06, "log_odds_chosen": 1.4383111000061035, "log_odds_ratio": -0.23275214433670044, "logits/chosen": -0.6812857389450073, "logits/rejected": -1.2923259735107422, "logps/chosen": -1.3695812225341797, "logps/rejected": -2.602672815322876, "loss": 1.4133, "nll_loss": 1.3900020122528076, "rewards/accuracies": 1.0, "rewards/chosen": -0.13695812225341797, "rewards/margins": 0.12330914288759232, "rewards/rejected": -0.2602672576904297, "step": 865 }, { "epoch": 1.364066193853428, "grad_norm": 0.23372229933738708, "learning_rate": 1.1163471493292267e-06, "log_odds_chosen": 1.4558780193328857, "log_odds_ratio": -0.2220865786075592, "logits/chosen": -0.6780717372894287, "logits/rejected": -1.3150091171264648, "logps/chosen": -1.4515866041183472, "logps/rejected": -2.7108144760131836, "loss": 1.4681, "nll_loss": 1.4459339380264282, "rewards/accuracies": 1.0, "rewards/chosen": -0.14515866339206696, "rewards/margins": 0.1259227991104126, "rewards/rejected": -0.27108144760131836, "step": 866 }, { "epoch": 1.3656422379826636, "grad_norm": 0.2135072648525238, "learning_rate": 1.1114190418468972e-06, "log_odds_chosen": 1.3662996292114258, "log_odds_ratio": -0.24186179041862488, "logits/chosen": -0.5894110202789307, "logits/rejected": -1.234513521194458, "logps/chosen": -1.4710414409637451, "logps/rejected": -2.656111478805542, "loss": 1.5074, "nll_loss": 1.4831658601760864, "rewards/accuracies": 1.0, "rewards/chosen": -0.14710412919521332, "rewards/margins": 0.1185070127248764, "rewards/rejected": -0.2656111419200897, "step": 867 }, { "epoch": 1.367218282111899, "grad_norm": 0.23583169281482697, "learning_rate": 1.106497647169288e-06, "log_odds_chosen": 1.5545036792755127, "log_odds_ratio": -0.19786261022090912, "logits/chosen": -0.5935441255569458, "logits/rejected": -1.3420860767364502, "logps/chosen": -1.3998342752456665, "logps/rejected": -2.739306926727295, "loss": 1.4158, "nll_loss": 1.396035075187683, "rewards/accuracies": 1.0, "rewards/chosen": -0.1399834305047989, "rewards/margins": 0.13394726812839508, "rewards/rejected": -0.27393069863319397, "step": 868 }, { "epoch": 1.3687943262411348, "grad_norm": 0.22831623256206512, "learning_rate": 1.1015830024751854e-06, "log_odds_chosen": 1.3860647678375244, "log_odds_ratio": -0.23448021709918976, "logits/chosen": -0.7633264064788818, "logits/rejected": -1.1685283184051514, "logps/chosen": -1.3554713726043701, "logps/rejected": -2.5315845012664795, "loss": 1.3889, "nll_loss": 1.365429162979126, "rewards/accuracies": 1.0, "rewards/chosen": -0.13554714620113373, "rewards/margins": 0.11761126667261124, "rewards/rejected": -0.25315842032432556, "step": 869 }, { "epoch": 1.3703703703703702, "grad_norm": 0.23310081660747528, "learning_rate": 1.0966751448923834e-06, "log_odds_chosen": 1.742652177810669, "log_odds_ratio": -0.1702868491411209, "logits/chosen": -0.6170627474784851, "logits/rejected": -1.3226041793823242, "logps/chosen": -1.3587743043899536, "logps/rejected": -2.8653130531311035, "loss": 1.388, "nll_loss": 1.3709335327148438, "rewards/accuracies": 1.0, "rewards/chosen": -0.13587743043899536, "rewards/margins": 0.15065383911132812, "rewards/rejected": -0.2865312695503235, "step": 870 }, { "epoch": 1.371946414499606, "grad_norm": 0.2376309186220169, "learning_rate": 1.0917741114974007e-06, "log_odds_chosen": 1.344078779220581, "log_odds_ratio": -0.23724061250686646, "logits/chosen": -0.5631985068321228, "logits/rejected": -1.3488740921020508, "logps/chosen": -1.500316858291626, "logps/rejected": -2.661327838897705, "loss": 1.5223, "nll_loss": 1.4986134767532349, "rewards/accuracies": 1.0, "rewards/chosen": -0.1500317007303238, "rewards/margins": 0.11610110849142075, "rewards/rejected": -0.26613280177116394, "step": 871 }, { "epoch": 1.3735224586288415, "grad_norm": 0.22864164412021637, "learning_rate": 1.0868799393152035e-06, "log_odds_chosen": 1.6591737270355225, "log_odds_ratio": -0.19571372866630554, "logits/chosen": -0.716070830821991, "logits/rejected": -1.3921763896942139, "logps/chosen": -1.3805607557296753, "logps/rejected": -2.815901279449463, "loss": 1.408, "nll_loss": 1.3884165287017822, "rewards/accuracies": 1.0, "rewards/chosen": -0.13805608451366425, "rewards/margins": 0.14353404939174652, "rewards/rejected": -0.28159013390541077, "step": 872 }, { "epoch": 1.3750985027580773, "grad_norm": 0.31318002939224243, "learning_rate": 1.0819926653189271e-06, "log_odds_chosen": 1.404085636138916, "log_odds_ratio": -0.23184779286384583, "logits/chosen": -0.6663385629653931, "logits/rejected": -1.15491783618927, "logps/chosen": -1.4694868326187134, "logps/rejected": -2.684523820877075, "loss": 1.4952, "nll_loss": 1.472047209739685, "rewards/accuracies": 1.0, "rewards/chosen": -0.1469486802816391, "rewards/margins": 0.12150370329618454, "rewards/rejected": -0.26845240592956543, "step": 873 }, { "epoch": 1.3766745468873127, "grad_norm": 0.21613894402980804, "learning_rate": 1.0771123264295895e-06, "log_odds_chosen": 1.4242324829101562, "log_odds_ratio": -0.22460418939590454, "logits/chosen": -0.6542535424232483, "logits/rejected": -1.3950517177581787, "logps/chosen": -1.362542748451233, "logps/rejected": -2.569579601287842, "loss": 1.3922, "nll_loss": 1.3697339296340942, "rewards/accuracies": 1.0, "rewards/chosen": -0.13625426590442657, "rewards/margins": 0.12070368975400925, "rewards/rejected": -0.2569579482078552, "step": 874 }, { "epoch": 1.3782505910165486, "grad_norm": 0.21156375110149384, "learning_rate": 1.0722389595158215e-06, "log_odds_chosen": 1.6333661079406738, "log_odds_ratio": -0.19591745734214783, "logits/chosen": -0.5586254000663757, "logits/rejected": -1.3503246307373047, "logps/chosen": -1.3475499153137207, "logps/rejected": -2.7531394958496094, "loss": 1.3833, "nll_loss": 1.363679051399231, "rewards/accuracies": 1.0, "rewards/chosen": -0.1347550004720688, "rewards/margins": 0.14055892825126648, "rewards/rejected": -0.27531394362449646, "step": 875 }, { "epoch": 1.379826635145784, "grad_norm": 0.2382674217224121, "learning_rate": 1.0673726013935827e-06, "log_odds_chosen": 1.5173158645629883, "log_odds_ratio": -0.22835971415042877, "logits/chosen": -0.5920229554176331, "logits/rejected": -1.2161948680877686, "logps/chosen": -1.364715814590454, "logps/rejected": -2.6635069847106934, "loss": 1.4015, "nll_loss": 1.3786344528198242, "rewards/accuracies": 1.0, "rewards/chosen": -0.13647159934043884, "rewards/margins": 0.12987911701202393, "rewards/rejected": -0.26635071635246277, "step": 876 }, { "epoch": 1.3814026792750198, "grad_norm": 0.22258909046649933, "learning_rate": 1.0625132888258833e-06, "log_odds_chosen": 1.3993436098098755, "log_odds_ratio": -0.23552975058555603, "logits/chosen": -0.721338152885437, "logits/rejected": -1.2683022022247314, "logps/chosen": -1.430884599685669, "logps/rejected": -2.6385984420776367, "loss": 1.455, "nll_loss": 1.431429147720337, "rewards/accuracies": 1.0, "rewards/chosen": -0.1430884748697281, "rewards/margins": 0.12077140063047409, "rewards/rejected": -0.2638598680496216, "step": 877 }, { "epoch": 1.3829787234042552, "grad_norm": 0.23172186315059662, "learning_rate": 1.057661058522509e-06, "log_odds_chosen": 1.5464884042739868, "log_odds_ratio": -0.20113787055015564, "logits/chosen": -0.669167160987854, "logits/rejected": -1.4978021383285522, "logps/chosen": -1.4551262855529785, "logps/rejected": -2.797950267791748, "loss": 1.4829, "nll_loss": 1.4627622365951538, "rewards/accuracies": 1.0, "rewards/chosen": -0.1455126404762268, "rewards/margins": 0.1342824101448059, "rewards/rejected": -0.2797950506210327, "step": 878 }, { "epoch": 1.384554767533491, "grad_norm": 0.257834792137146, "learning_rate": 1.0528159471397425e-06, "log_odds_chosen": 1.4086424112319946, "log_odds_ratio": -0.22979885339736938, "logits/chosen": -0.6474118232727051, "logits/rejected": -1.3380630016326904, "logps/chosen": -1.3984726667404175, "logps/rejected": -2.6040029525756836, "loss": 1.4336, "nll_loss": 1.410598874092102, "rewards/accuracies": 1.0, "rewards/chosen": -0.1398472785949707, "rewards/margins": 0.12055300921201706, "rewards/rejected": -0.26040029525756836, "step": 879 }, { "epoch": 1.3861308116627264, "grad_norm": 0.26112839579582214, "learning_rate": 1.0479779912800868e-06, "log_odds_chosen": 1.3672027587890625, "log_odds_ratio": -0.23483988642692566, "logits/chosen": -0.7720797061920166, "logits/rejected": -1.2881569862365723, "logps/chosen": -1.3734962940216064, "logps/rejected": -2.5312793254852295, "loss": 1.4089, "nll_loss": 1.385401964187622, "rewards/accuracies": 1.0, "rewards/chosen": -0.13734962046146393, "rewards/margins": 0.11577831953763962, "rewards/rejected": -0.25312793254852295, "step": 880 }, { "epoch": 1.3877068557919623, "grad_norm": 0.22247852385044098, "learning_rate": 1.0431472274919863e-06, "log_odds_chosen": 1.6057835817337036, "log_odds_ratio": -0.19174842536449432, "logits/chosen": -0.5732901692390442, "logits/rejected": -1.447432279586792, "logps/chosen": -1.3863584995269775, "logps/rejected": -2.7723236083984375, "loss": 1.4116, "nll_loss": 1.3924009799957275, "rewards/accuracies": 1.0, "rewards/chosen": -0.13863585889339447, "rewards/margins": 0.1385965496301651, "rewards/rejected": -0.2772323787212372, "step": 881 }, { "epoch": 1.3892828999211977, "grad_norm": 0.2083035707473755, "learning_rate": 1.0383236922695543e-06, "log_odds_chosen": 1.6839567422866821, "log_odds_ratio": -0.17856833338737488, "logits/chosen": -0.4908663034439087, "logits/rejected": -1.336336612701416, "logps/chosen": -1.2355999946594238, "logps/rejected": -2.634152889251709, "loss": 1.2841, "nll_loss": 1.2662585973739624, "rewards/accuracies": 1.0, "rewards/chosen": -0.12355998903512955, "rewards/margins": 0.1398552805185318, "rewards/rejected": -0.26341530680656433, "step": 882 }, { "epoch": 1.3908589440504335, "grad_norm": 0.3688744008541107, "learning_rate": 1.0335074220522962e-06, "log_odds_chosen": 1.6426076889038086, "log_odds_ratio": -0.1960243582725525, "logits/chosen": -0.6236636638641357, "logits/rejected": -1.1652271747589111, "logps/chosen": -1.464396595954895, "logps/rejected": -2.906247615814209, "loss": 1.488, "nll_loss": 1.4683482646942139, "rewards/accuracies": 1.0, "rewards/chosen": -0.14643965661525726, "rewards/margins": 0.1441851109266281, "rewards/rejected": -0.29062479734420776, "step": 883 }, { "epoch": 1.392434988179669, "grad_norm": 0.2636977732181549, "learning_rate": 1.0286984532248326e-06, "log_odds_chosen": 1.1682370901107788, "log_odds_ratio": -0.2839449644088745, "logits/chosen": -0.5573156476020813, "logits/rejected": -1.3760584592819214, "logps/chosen": -1.4987510442733765, "logps/rejected": -2.4986791610717773, "loss": 1.5201, "nll_loss": 1.4916810989379883, "rewards/accuracies": 1.0, "rewards/chosen": -0.14987510442733765, "rewards/margins": 0.09999283403158188, "rewards/rejected": -0.24986793100833893, "step": 884 }, { "epoch": 1.3940110323089048, "grad_norm": 0.24730992317199707, "learning_rate": 1.0238968221166269e-06, "log_odds_chosen": 1.3833285570144653, "log_odds_ratio": -0.24921075999736786, "logits/chosen": -0.6354068517684937, "logits/rejected": -1.3926112651824951, "logps/chosen": -1.452917456626892, "logps/rejected": -2.6520602703094482, "loss": 1.4854, "nll_loss": 1.4604960680007935, "rewards/accuracies": 1.0, "rewards/chosen": -0.14529173076152802, "rewards/margins": 0.11991427838802338, "rewards/rejected": -0.2652060091495514, "step": 885 }, { "epoch": 1.3955870764381402, "grad_norm": 0.23205415904521942, "learning_rate": 1.019102565001707e-06, "log_odds_chosen": 1.3424711227416992, "log_odds_ratio": -0.2470676451921463, "logits/chosen": -0.6062210202217102, "logits/rejected": -1.0933165550231934, "logps/chosen": -1.4911824464797974, "logps/rejected": -2.654860258102417, "loss": 1.5139, "nll_loss": 1.48921537399292, "rewards/accuracies": 1.0, "rewards/chosen": -0.14911825954914093, "rewards/margins": 0.11636777967214584, "rewards/rejected": -0.2654860317707062, "step": 886 }, { "epoch": 1.397163120567376, "grad_norm": 0.2643720507621765, "learning_rate": 1.0143157180983965e-06, "log_odds_chosen": 1.3510806560516357, "log_odds_ratio": -0.24315354228019714, "logits/chosen": -0.6877184510231018, "logits/rejected": -1.2413437366485596, "logps/chosen": -1.470854640007019, "logps/rejected": -2.6378273963928223, "loss": 1.4694, "nll_loss": 1.4450820684432983, "rewards/accuracies": 1.0, "rewards/chosen": -0.14708545804023743, "rewards/margins": 0.1166972815990448, "rewards/rejected": -0.2637827396392822, "step": 887 }, { "epoch": 1.3987391646966114, "grad_norm": 0.2214188426733017, "learning_rate": 1.0095363175690375e-06, "log_odds_chosen": 1.3754997253417969, "log_odds_ratio": -0.23635491728782654, "logits/chosen": -0.6993992328643799, "logits/rejected": -1.1483128070831299, "logps/chosen": -1.4644639492034912, "logps/rejected": -2.6512155532836914, "loss": 1.4749, "nll_loss": 1.4512659311294556, "rewards/accuracies": 1.0, "rewards/chosen": -0.14644640684127808, "rewards/margins": 0.11867515742778778, "rewards/rejected": -0.26512157917022705, "step": 888 }, { "epoch": 1.4003152088258473, "grad_norm": 0.2210846245288849, "learning_rate": 1.0047643995197178e-06, "log_odds_chosen": 1.4658843278884888, "log_odds_ratio": -0.22319269180297852, "logits/chosen": -0.5951950550079346, "logits/rejected": -1.27753746509552, "logps/chosen": -1.3810845613479614, "logps/rejected": -2.6306235790252686, "loss": 1.4061, "nll_loss": 1.3837357759475708, "rewards/accuracies": 1.0, "rewards/chosen": -0.13810846209526062, "rewards/margins": 0.12495388090610504, "rewards/rejected": -0.26306235790252686, "step": 889 }, { "epoch": 1.4018912529550827, "grad_norm": 0.23862753808498383, "learning_rate": 1.0000000000000004e-06, "log_odds_chosen": 1.3239901065826416, "log_odds_ratio": -0.24482491612434387, "logits/chosen": -0.5615009665489197, "logits/rejected": -1.1960052251815796, "logps/chosen": -1.428395390510559, "logps/rejected": -2.554837703704834, "loss": 1.4676, "nll_loss": 1.4431042671203613, "rewards/accuracies": 1.0, "rewards/chosen": -0.14283955097198486, "rewards/margins": 0.11264419555664062, "rewards/rejected": -0.2554837465286255, "step": 890 }, { "epoch": 1.4034672970843185, "grad_norm": 0.2373189628124237, "learning_rate": 9.952431550026459e-07, "log_odds_chosen": 1.463064193725586, "log_odds_ratio": -0.22052603960037231, "logits/chosen": -0.7203116416931152, "logits/rejected": -1.1815186738967896, "logps/chosen": -1.4511473178863525, "logps/rejected": -2.720369577407837, "loss": 1.4791, "nll_loss": 1.4570112228393555, "rewards/accuracies": 1.0, "rewards/chosen": -0.1451147347688675, "rewards/margins": 0.12692220509052277, "rewards/rejected": -0.27203693985939026, "step": 891 }, { "epoch": 1.405043341213554, "grad_norm": 0.23864883184432983, "learning_rate": 9.904939004633471e-07, "log_odds_chosen": 1.5694756507873535, "log_odds_ratio": -0.19214697182178497, "logits/chosen": -0.7610747218132019, "logits/rejected": -1.3267537355422974, "logps/chosen": -1.5084730386734009, "logps/rejected": -2.882596254348755, "loss": 1.5118, "nll_loss": 1.4925544261932373, "rewards/accuracies": 1.0, "rewards/chosen": -0.15084730088710785, "rewards/margins": 0.13741232454776764, "rewards/rejected": -0.2882596254348755, "step": 892 }, { "epoch": 1.4066193853427895, "grad_norm": 0.2424936145544052, "learning_rate": 9.857522722604536e-07, "log_odds_chosen": 1.2864803075790405, "log_odds_ratio": -0.2700284719467163, "logits/chosen": -0.5973088145256042, "logits/rejected": -1.0232847929000854, "logps/chosen": -1.3831663131713867, "logps/rejected": -2.4751455783843994, "loss": 1.4291, "nll_loss": 1.402083396911621, "rewards/accuracies": 1.0, "rewards/chosen": -0.13831663131713867, "rewards/margins": 0.1091979444026947, "rewards/rejected": -0.24751457571983337, "step": 893 }, { "epoch": 1.4081954294720251, "grad_norm": 0.25511765480041504, "learning_rate": 9.81018306214702e-07, "log_odds_chosen": 1.324715256690979, "log_odds_ratio": -0.24273060262203217, "logits/chosen": -0.6530345678329468, "logits/rejected": -1.1975769996643066, "logps/chosen": -1.3939695358276367, "logps/rejected": -2.515407085418701, "loss": 1.4415, "nll_loss": 1.4172462224960327, "rewards/accuracies": 1.0, "rewards/chosen": -0.13939696550369263, "rewards/margins": 0.11214376986026764, "rewards/rejected": -0.2515407204627991, "step": 894 }, { "epoch": 1.4097714736012608, "grad_norm": 0.2710649073123932, "learning_rate": 9.76292038088945e-07, "log_odds_chosen": 1.3045034408569336, "log_odds_ratio": -0.24552682042121887, "logits/chosen": -0.5830535888671875, "logits/rejected": -1.089822769165039, "logps/chosen": -1.4674913883209229, "logps/rejected": -2.5890750885009766, "loss": 1.4871, "nll_loss": 1.4625575542449951, "rewards/accuracies": 1.0, "rewards/chosen": -0.14674915373325348, "rewards/margins": 0.11215835809707642, "rewards/rejected": -0.2589074671268463, "step": 895 }, { "epoch": 1.4113475177304964, "grad_norm": 0.21869832277297974, "learning_rate": 9.715735035878799e-07, "log_odds_chosen": 1.5492786169052124, "log_odds_ratio": -0.20071250200271606, "logits/chosen": -0.6723061203956604, "logits/rejected": -1.2856287956237793, "logps/chosen": -1.3623251914978027, "logps/rejected": -2.681443214416504, "loss": 1.3873, "nll_loss": 1.3672643899917603, "rewards/accuracies": 1.0, "rewards/chosen": -0.13623252511024475, "rewards/margins": 0.13191181421279907, "rewards/rejected": -0.2681443393230438, "step": 896 }, { "epoch": 1.412923561859732, "grad_norm": 0.21382860839366913, "learning_rate": 9.668627383577812e-07, "log_odds_chosen": 1.3884141445159912, "log_odds_ratio": -0.25185370445251465, "logits/chosen": -0.6271622776985168, "logits/rejected": -1.3721461296081543, "logps/chosen": -1.4484784603118896, "logps/rejected": -2.6517696380615234, "loss": 1.4795, "nll_loss": 1.4542728662490845, "rewards/accuracies": 1.0, "rewards/chosen": -0.14484785497188568, "rewards/margins": 0.12032908946275711, "rewards/rejected": -0.2651769518852234, "step": 897 }, { "epoch": 1.4144996059889676, "grad_norm": 0.2329423725605011, "learning_rate": 9.621597779862307e-07, "log_odds_chosen": 1.2616857290267944, "log_odds_ratio": -0.2574716806411743, "logits/chosen": -0.7111046314239502, "logits/rejected": -1.2678980827331543, "logps/chosen": -1.4533891677856445, "logps/rejected": -2.530416488647461, "loss": 1.4769, "nll_loss": 1.451107382774353, "rewards/accuracies": 1.0, "rewards/chosen": -0.14533893764019012, "rewards/margins": 0.10770271718502045, "rewards/rejected": -0.25304165482521057, "step": 898 }, { "epoch": 1.4160756501182032, "grad_norm": 0.23720115423202515, "learning_rate": 9.57464658001848e-07, "log_odds_chosen": 1.3332675695419312, "log_odds_ratio": -0.23988232016563416, "logits/chosen": -0.6351300477981567, "logits/rejected": -1.276170015335083, "logps/chosen": -1.464157223701477, "logps/rejected": -2.611715078353882, "loss": 1.4892, "nll_loss": 1.4651670455932617, "rewards/accuracies": 1.0, "rewards/chosen": -0.14641571044921875, "rewards/margins": 0.1147557944059372, "rewards/rejected": -0.26117151975631714, "step": 899 }, { "epoch": 1.4176516942474389, "grad_norm": 0.2529568672180176, "learning_rate": 9.527774138740212e-07, "log_odds_chosen": 1.4207415580749512, "log_odds_ratio": -0.2224026918411255, "logits/chosen": -0.7809243202209473, "logits/rejected": -1.3837926387786865, "logps/chosen": -1.420925259590149, "logps/rejected": -2.6291465759277344, "loss": 1.4527, "nll_loss": 1.4304237365722656, "rewards/accuracies": 1.0, "rewards/chosen": -0.1420925110578537, "rewards/margins": 0.12082216143608093, "rewards/rejected": -0.26291465759277344, "step": 900 }, { "epoch": 1.4192277383766745, "grad_norm": 0.23738139867782593, "learning_rate": 9.480980810126411e-07, "log_odds_chosen": 1.551971673965454, "log_odds_ratio": -0.22836817800998688, "logits/chosen": -0.7299198508262634, "logits/rejected": -1.224428653717041, "logps/chosen": -1.32070791721344, "logps/rejected": -2.655421018600464, "loss": 1.3639, "nll_loss": 1.3410696983337402, "rewards/accuracies": 1.0, "rewards/chosen": -0.13207079470157623, "rewards/margins": 0.1334713250398636, "rewards/rejected": -0.2655421197414398, "step": 901 }, { "epoch": 1.42080378250591, "grad_norm": 0.24616296589374542, "learning_rate": 9.434266947678324e-07, "log_odds_chosen": 1.4568251371383667, "log_odds_ratio": -0.22730094194412231, "logits/chosen": -0.7136987447738647, "logits/rejected": -1.2868367433547974, "logps/chosen": -1.4121330976486206, "logps/rejected": -2.6679399013519287, "loss": 1.4312, "nll_loss": 1.4085018634796143, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412133127450943, "rewards/margins": 0.12558068335056305, "rewards/rejected": -0.26679399609565735, "step": 902 }, { "epoch": 1.4223798266351457, "grad_norm": 0.2434018850326538, "learning_rate": 9.387632904296872e-07, "log_odds_chosen": 1.701801061630249, "log_odds_ratio": -0.18518248200416565, "logits/chosen": -0.6617938280105591, "logits/rejected": -1.3858933448791504, "logps/chosen": -1.435595989227295, "logps/rejected": -2.922405481338501, "loss": 1.4525, "nll_loss": 1.43398118019104, "rewards/accuracies": 1.0, "rewards/chosen": -0.14355960488319397, "rewards/margins": 0.1486809402704239, "rewards/rejected": -0.29224056005477905, "step": 903 }, { "epoch": 1.4239558707643813, "grad_norm": 0.22994695603847504, "learning_rate": 9.341079032279986e-07, "log_odds_chosen": 1.577636957168579, "log_odds_ratio": -0.20652614533901215, "logits/chosen": -0.5851540565490723, "logits/rejected": -1.1802699565887451, "logps/chosen": -1.3982754945755005, "logps/rejected": -2.758606195449829, "loss": 1.4294, "nll_loss": 1.4087923765182495, "rewards/accuracies": 1.0, "rewards/chosen": -0.13982754945755005, "rewards/margins": 0.1360330581665039, "rewards/rejected": -0.27586060762405396, "step": 904 }, { "epoch": 1.425531914893617, "grad_norm": 0.21848729252815247, "learning_rate": 9.294605683319919e-07, "log_odds_chosen": 1.5972235202789307, "log_odds_ratio": -0.19709482789039612, "logits/chosen": -0.7076174020767212, "logits/rejected": -1.3356118202209473, "logps/chosen": -1.2920252084732056, "logps/rejected": -2.6448347568511963, "loss": 1.3222, "nll_loss": 1.3025282621383667, "rewards/accuracies": 1.0, "rewards/chosen": -0.12920251488685608, "rewards/margins": 0.13528095185756683, "rewards/rejected": -0.2644834816455841, "step": 905 }, { "epoch": 1.4271079590228526, "grad_norm": 0.23984983563423157, "learning_rate": 9.248213208500629e-07, "log_odds_chosen": 1.3629462718963623, "log_odds_ratio": -0.2444595992565155, "logits/chosen": -0.7552050352096558, "logits/rejected": -1.293509602546692, "logps/chosen": -1.4704115390777588, "logps/rejected": -2.6470746994018555, "loss": 1.4831, "nll_loss": 1.4586949348449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.14704115688800812, "rewards/margins": 0.11766630411148071, "rewards/rejected": -0.26470747590065, "step": 906 }, { "epoch": 1.4286840031520882, "grad_norm": 0.24960680305957794, "learning_rate": 9.201901958295115e-07, "log_odds_chosen": 1.6368037462234497, "log_odds_ratio": -0.19702821969985962, "logits/chosen": -0.6471220254898071, "logits/rejected": -1.2962175607681274, "logps/chosen": -1.425323486328125, "logps/rejected": -2.851389169692993, "loss": 1.4648, "nll_loss": 1.4451216459274292, "rewards/accuracies": 1.0, "rewards/chosen": -0.1425323486328125, "rewards/margins": 0.14260657131671906, "rewards/rejected": -0.28513893485069275, "step": 907 }, { "epoch": 1.4302600472813238, "grad_norm": 0.22295747697353363, "learning_rate": 9.155672282562736e-07, "log_odds_chosen": 1.4643927812576294, "log_odds_ratio": -0.2211674600839615, "logits/chosen": -0.7684674263000488, "logits/rejected": -1.2800066471099854, "logps/chosen": -1.4485883712768555, "logps/rejected": -2.7177271842956543, "loss": 1.4787, "nll_loss": 1.4565672874450684, "rewards/accuracies": 1.0, "rewards/chosen": -0.14485882222652435, "rewards/margins": 0.1269138902425766, "rewards/rejected": -0.27177271246910095, "step": 908 }, { "epoch": 1.4318360914105595, "grad_norm": 0.24412484467029572, "learning_rate": 9.109524530546622e-07, "log_odds_chosen": 1.3177937269210815, "log_odds_ratio": -0.24450668692588806, "logits/chosen": -0.7197924256324768, "logits/rejected": -1.2897931337356567, "logps/chosen": -1.4474880695343018, "logps/rejected": -2.5726189613342285, "loss": 1.4801, "nll_loss": 1.4556747674942017, "rewards/accuracies": 1.0, "rewards/chosen": -0.14474882185459137, "rewards/margins": 0.11251308023929596, "rewards/rejected": -0.25726187229156494, "step": 909 }, { "epoch": 1.433412135539795, "grad_norm": 0.2901386320590973, "learning_rate": 9.063459050871001e-07, "log_odds_chosen": 1.3122071027755737, "log_odds_ratio": -0.24745714664459229, "logits/chosen": -0.7841672301292419, "logits/rejected": -1.0461746454238892, "logps/chosen": -1.4149231910705566, "logps/rejected": -2.5323569774627686, "loss": 1.435, "nll_loss": 1.4103010892868042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1414923071861267, "rewards/margins": 0.11174337565898895, "rewards/rejected": -0.25323572754859924, "step": 910 }, { "epoch": 1.4349881796690307, "grad_norm": 0.23500244319438934, "learning_rate": 9.017476191538555e-07, "log_odds_chosen": 1.5380570888519287, "log_odds_ratio": -0.2097090631723404, "logits/chosen": -0.7386124730110168, "logits/rejected": -1.096914291381836, "logps/chosen": -1.3656436204910278, "logps/rejected": -2.676828622817993, "loss": 1.4054, "nll_loss": 1.3844537734985352, "rewards/accuracies": 1.0, "rewards/chosen": -0.13656435906887054, "rewards/margins": 0.1311185210943222, "rewards/rejected": -0.26768285036087036, "step": 911 }, { "epoch": 1.4365642237982663, "grad_norm": 0.24014200270175934, "learning_rate": 8.971576299927832e-07, "log_odds_chosen": 1.595320224761963, "log_odds_ratio": -0.1951339840888977, "logits/chosen": -0.6914607882499695, "logits/rejected": -1.4646146297454834, "logps/chosen": -1.4537521600723267, "logps/rejected": -2.8430793285369873, "loss": 1.4772, "nll_loss": 1.4576513767242432, "rewards/accuracies": 1.0, "rewards/chosen": -0.14537522196769714, "rewards/margins": 0.1389327347278595, "rewards/rejected": -0.28430795669555664, "step": 912 }, { "epoch": 1.438140267927502, "grad_norm": 0.2397628277540207, "learning_rate": 8.925759722790591e-07, "log_odds_chosen": 1.4049484729766846, "log_odds_ratio": -0.2262987494468689, "logits/chosen": -0.6498869061470032, "logits/rejected": -1.2962441444396973, "logps/chosen": -1.426429271697998, "logps/rejected": -2.6309728622436523, "loss": 1.4555, "nll_loss": 1.4329168796539307, "rewards/accuracies": 1.0, "rewards/chosen": -0.14264293015003204, "rewards/margins": 0.12045436352491379, "rewards/rejected": -0.26309728622436523, "step": 913 }, { "epoch": 1.4397163120567376, "grad_norm": 0.2561264932155609, "learning_rate": 8.880026806249192e-07, "log_odds_chosen": 1.6941354274749756, "log_odds_ratio": -0.17529326677322388, "logits/chosen": -0.5849270820617676, "logits/rejected": -1.4002407789230347, "logps/chosen": -1.4796934127807617, "logps/rejected": -2.9666757583618164, "loss": 1.4962, "nll_loss": 1.4786688089370728, "rewards/accuracies": 1.0, "rewards/chosen": -0.14796936511993408, "rewards/margins": 0.14869821071624756, "rewards/rejected": -0.29666754603385925, "step": 914 }, { "epoch": 1.4412923561859732, "grad_norm": 0.27028515934944153, "learning_rate": 8.834377895793965e-07, "log_odds_chosen": 1.3969942331314087, "log_odds_ratio": -0.2436298131942749, "logits/chosen": -0.615172266960144, "logits/rejected": -1.1797115802764893, "logps/chosen": -1.5547003746032715, "logps/rejected": -2.7802202701568604, "loss": 1.5732, "nll_loss": 1.548871397972107, "rewards/accuracies": 1.0, "rewards/chosen": -0.15547004342079163, "rewards/margins": 0.12255197763442993, "rewards/rejected": -0.27802205085754395, "step": 915 }, { "epoch": 1.4428684003152088, "grad_norm": 0.2357224076986313, "learning_rate": 8.78881333628063e-07, "log_odds_chosen": 1.4083137512207031, "log_odds_ratio": -0.22135290503501892, "logits/chosen": -0.6914385557174683, "logits/rejected": -1.4413189888000488, "logps/chosen": -1.4735187292099, "logps/rejected": -2.6905767917633057, "loss": 1.488, "nll_loss": 1.4658350944519043, "rewards/accuracies": 1.0, "rewards/chosen": -0.14735187590122223, "rewards/margins": 0.12170581519603729, "rewards/rejected": -0.2690576910972595, "step": 916 }, { "epoch": 1.4444444444444444, "grad_norm": 0.224204882979393, "learning_rate": 8.743333471927672e-07, "log_odds_chosen": 1.5857540369033813, "log_odds_ratio": -0.1905209720134735, "logits/chosen": -0.7319490909576416, "logits/rejected": -1.3981595039367676, "logps/chosen": -1.4768825769424438, "logps/rejected": -2.862194538116455, "loss": 1.5006, "nll_loss": 1.4815452098846436, "rewards/accuracies": 1.0, "rewards/chosen": -0.14768825471401215, "rewards/margins": 0.13853120803833008, "rewards/rejected": -0.28621944785118103, "step": 917 }, { "epoch": 1.44602048857368, "grad_norm": 0.237936869263649, "learning_rate": 8.697938646313742e-07, "log_odds_chosen": 1.506066083908081, "log_odds_ratio": -0.20262272655963898, "logits/chosen": -0.7431957721710205, "logits/rejected": -1.20036780834198, "logps/chosen": -1.3826744556427002, "logps/rejected": -2.6691064834594727, "loss": 1.4065, "nll_loss": 1.3862018585205078, "rewards/accuracies": 1.0, "rewards/chosen": -0.13826745748519897, "rewards/margins": 0.1286431849002838, "rewards/rejected": -0.2669106423854828, "step": 918 }, { "epoch": 1.4475965327029157, "grad_norm": 0.2390269637107849, "learning_rate": 8.652629202375075e-07, "log_odds_chosen": 1.356788992881775, "log_odds_ratio": -0.24105380475521088, "logits/chosen": -0.5163722634315491, "logits/rejected": -1.3722693920135498, "logps/chosen": -1.4018796682357788, "logps/rejected": -2.5581085681915283, "loss": 1.4414, "nll_loss": 1.417291283607483, "rewards/accuracies": 1.0, "rewards/chosen": -0.14018797874450684, "rewards/margins": 0.11562289297580719, "rewards/rejected": -0.25581085681915283, "step": 919 }, { "epoch": 1.4491725768321513, "grad_norm": 0.23875784873962402, "learning_rate": 8.607405482402861e-07, "log_odds_chosen": 1.7022291421890259, "log_odds_ratio": -0.19013413786888123, "logits/chosen": -0.7322274446487427, "logits/rejected": -1.318604826927185, "logps/chosen": -1.2851026058197021, "logps/rejected": -2.739962100982666, "loss": 1.3209, "nll_loss": 1.301877498626709, "rewards/accuracies": 1.0, "rewards/chosen": -0.1285102665424347, "rewards/margins": 0.14548592269420624, "rewards/rejected": -0.2739962041378021, "step": 920 }, { "epoch": 1.450748620961387, "grad_norm": 0.23226508498191833, "learning_rate": 8.562267828040712e-07, "log_odds_chosen": 1.6082755327224731, "log_odds_ratio": -0.19235759973526, "logits/chosen": -0.738680362701416, "logits/rejected": -1.6129546165466309, "logps/chosen": -1.411106824874878, "logps/rejected": -2.8042585849761963, "loss": 1.4307, "nll_loss": 1.4114558696746826, "rewards/accuracies": 1.0, "rewards/chosen": -0.14111070334911346, "rewards/margins": 0.1393151432275772, "rewards/rejected": -0.2804258465766907, "step": 921 }, { "epoch": 1.4523246650906225, "grad_norm": 0.22625631093978882, "learning_rate": 8.517216580282048e-07, "log_odds_chosen": 1.3667917251586914, "log_odds_ratio": -0.23798587918281555, "logits/chosen": -0.579673707485199, "logits/rejected": -1.37380051612854, "logps/chosen": -1.4829236268997192, "logps/rejected": -2.663116693496704, "loss": 1.5243, "nll_loss": 1.500511646270752, "rewards/accuracies": 1.0, "rewards/chosen": -0.14829237759113312, "rewards/margins": 0.11801932007074356, "rewards/rejected": -0.2663117051124573, "step": 922 }, { "epoch": 1.4539007092198581, "grad_norm": 0.2619623839855194, "learning_rate": 8.472252079467535e-07, "log_odds_chosen": 1.3680424690246582, "log_odds_ratio": -0.23256750404834747, "logits/chosen": -0.6674222350120544, "logits/rejected": -1.3710373640060425, "logps/chosen": -1.4581406116485596, "logps/rejected": -2.6354715824127197, "loss": 1.484, "nll_loss": 1.4606943130493164, "rewards/accuracies": 1.0, "rewards/chosen": -0.14581406116485596, "rewards/margins": 0.11773312091827393, "rewards/rejected": -0.2635471820831299, "step": 923 }, { "epoch": 1.4554767533490938, "grad_norm": 0.25333184003829956, "learning_rate": 8.427374665282488e-07, "log_odds_chosen": 1.2261314392089844, "log_odds_ratio": -0.26179471611976624, "logits/chosen": -0.5632379651069641, "logits/rejected": -1.0951900482177734, "logps/chosen": -1.370491862297058, "logps/rejected": -2.3976216316223145, "loss": 1.4098, "nll_loss": 1.3836238384246826, "rewards/accuracies": 1.0, "rewards/chosen": -0.13704918324947357, "rewards/margins": 0.10271298140287399, "rewards/rejected": -0.23976217210292816, "step": 924 }, { "epoch": 1.4570527974783294, "grad_norm": 0.2828598916530609, "learning_rate": 8.382584676754336e-07, "log_odds_chosen": 1.3424285650253296, "log_odds_ratio": -0.2383449673652649, "logits/chosen": -0.7193729877471924, "logits/rejected": -1.2673097848892212, "logps/chosen": -1.455522060394287, "logps/rejected": -2.6107735633850098, "loss": 1.4823, "nll_loss": 1.4584755897521973, "rewards/accuracies": 1.0, "rewards/chosen": -0.14555218815803528, "rewards/margins": 0.11552514880895615, "rewards/rejected": -0.261077344417572, "step": 925 }, { "epoch": 1.458628841607565, "grad_norm": 0.2343621701002121, "learning_rate": 8.337882452250058e-07, "log_odds_chosen": 1.829087734222412, "log_odds_ratio": -0.1707460731267929, "logits/chosen": -0.7277917265892029, "logits/rejected": -1.4610968828201294, "logps/chosen": -1.4779551029205322, "logps/rejected": -3.1036906242370605, "loss": 1.4926, "nll_loss": 1.475477695465088, "rewards/accuracies": 1.0, "rewards/chosen": -0.14779552817344666, "rewards/margins": 0.16257351636886597, "rewards/rejected": -0.3103690445423126, "step": 926 }, { "epoch": 1.4602048857368006, "grad_norm": 0.23729124665260315, "learning_rate": 8.293268329473602e-07, "log_odds_chosen": 1.8108925819396973, "log_odds_ratio": -0.16629691421985626, "logits/chosen": -0.7638081908226013, "logits/rejected": -1.3801249265670776, "logps/chosen": -1.4157474040985107, "logps/rejected": -3.005319595336914, "loss": 1.4395, "nll_loss": 1.4228744506835938, "rewards/accuracies": 1.0, "rewards/chosen": -0.14157472550868988, "rewards/margins": 0.15895719826221466, "rewards/rejected": -0.30053192377090454, "step": 927 }, { "epoch": 1.4617809298660362, "grad_norm": 0.23078180849552155, "learning_rate": 8.248742645463367e-07, "log_odds_chosen": 1.6101691722869873, "log_odds_ratio": -0.1969299465417862, "logits/chosen": -0.6740590333938599, "logits/rejected": -1.339869737625122, "logps/chosen": -1.4835829734802246, "logps/rejected": -2.8963966369628906, "loss": 1.4914, "nll_loss": 1.471714735031128, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483582854270935, "rewards/margins": 0.141281396150589, "rewards/rejected": -0.2896396815776825, "step": 928 }, { "epoch": 1.4633569739952719, "grad_norm": 0.24942730367183685, "learning_rate": 8.204305736589612e-07, "log_odds_chosen": 1.557928204536438, "log_odds_ratio": -0.20739729702472687, "logits/chosen": -0.6712103486061096, "logits/rejected": -1.2416367530822754, "logps/chosen": -1.431049108505249, "logps/rejected": -2.7830963134765625, "loss": 1.4509, "nll_loss": 1.4301552772521973, "rewards/accuracies": 1.0, "rewards/chosen": -0.1431049257516861, "rewards/margins": 0.1352047473192215, "rewards/rejected": -0.2783096730709076, "step": 929 }, { "epoch": 1.4649330181245075, "grad_norm": 0.22981353104114532, "learning_rate": 8.159957938551966e-07, "log_odds_chosen": 1.454850673675537, "log_odds_ratio": -0.22549110651016235, "logits/chosen": -0.6281288266181946, "logits/rejected": -1.2660706043243408, "logps/chosen": -1.4389593601226807, "logps/rejected": -2.687685012817383, "loss": 1.4687, "nll_loss": 1.4461032152175903, "rewards/accuracies": 1.0, "rewards/chosen": -0.1438959389925003, "rewards/margins": 0.12487256526947021, "rewards/rejected": -0.2687685191631317, "step": 930 }, { "epoch": 1.4665090622537431, "grad_norm": 0.24757270514965057, "learning_rate": 8.115699586376865e-07, "log_odds_chosen": 1.3925848007202148, "log_odds_ratio": -0.23080500960350037, "logits/chosen": -0.7412785291671753, "logits/rejected": -1.272114634513855, "logps/chosen": -1.4819211959838867, "logps/rejected": -2.6907401084899902, "loss": 1.5013, "nll_loss": 1.4782263040542603, "rewards/accuracies": 1.0, "rewards/chosen": -0.14819210767745972, "rewards/margins": 0.12088190019130707, "rewards/rejected": -0.269074022769928, "step": 931 }, { "epoch": 1.4680851063829787, "grad_norm": 0.2500283718109131, "learning_rate": 8.071531014415018e-07, "log_odds_chosen": 1.6840711832046509, "log_odds_ratio": -0.18267367780208588, "logits/chosen": -0.7013669610023499, "logits/rejected": -1.3033456802368164, "logps/chosen": -1.440954327583313, "logps/rejected": -2.9147908687591553, "loss": 1.4458, "nll_loss": 1.427519679069519, "rewards/accuracies": 1.0, "rewards/chosen": -0.14409542083740234, "rewards/margins": 0.1473836898803711, "rewards/rejected": -0.29147911071777344, "step": 932 }, { "epoch": 1.4696611505122144, "grad_norm": 0.22321783006191254, "learning_rate": 8.027452556338894e-07, "log_odds_chosen": 1.5976731777191162, "log_odds_ratio": -0.18740679323673248, "logits/chosen": -0.6387951374053955, "logits/rejected": -1.475955605506897, "logps/chosen": -1.4478017091751099, "logps/rejected": -2.8323302268981934, "loss": 1.4687, "nll_loss": 1.44996178150177, "rewards/accuracies": 1.0, "rewards/chosen": -0.14478018879890442, "rewards/margins": 0.13845285773277283, "rewards/rejected": -0.28323301672935486, "step": 933 }, { "epoch": 1.47123719464145, "grad_norm": 0.2513580322265625, "learning_rate": 7.983464545140179e-07, "log_odds_chosen": 1.6734238862991333, "log_odds_ratio": -0.17683839797973633, "logits/chosen": -0.7284808158874512, "logits/rejected": -1.3483668565750122, "logps/chosen": -1.3835601806640625, "logps/rejected": -2.8295843601226807, "loss": 1.3975, "nll_loss": 1.3798243999481201, "rewards/accuracies": 1.0, "rewards/chosen": -0.138356015086174, "rewards/margins": 0.14460240304470062, "rewards/rejected": -0.282958447933197, "step": 934 }, { "epoch": 1.4728132387706856, "grad_norm": 0.2782896161079407, "learning_rate": 7.939567313127295e-07, "log_odds_chosen": 1.5912904739379883, "log_odds_ratio": -0.18954217433929443, "logits/chosen": -0.6867378354072571, "logits/rejected": -1.248403787612915, "logps/chosen": -1.4521732330322266, "logps/rejected": -2.8360166549682617, "loss": 1.4755, "nll_loss": 1.4565839767456055, "rewards/accuracies": 1.0, "rewards/chosen": -0.14521732926368713, "rewards/margins": 0.1383843570947647, "rewards/rejected": -0.28360170125961304, "step": 935 }, { "epoch": 1.4743892828999212, "grad_norm": 0.2405909299850464, "learning_rate": 7.89576119192286e-07, "log_odds_chosen": 1.6822706460952759, "log_odds_ratio": -0.17994974553585052, "logits/chosen": -0.6468403339385986, "logits/rejected": -1.330075979232788, "logps/chosen": -1.389624834060669, "logps/rejected": -2.8466427326202393, "loss": 1.4027, "nll_loss": 1.3846580982208252, "rewards/accuracies": 1.0, "rewards/chosen": -0.1389624923467636, "rewards/margins": 0.1457017958164215, "rewards/rejected": -0.2846642732620239, "step": 936 }, { "epoch": 1.4759653270291568, "grad_norm": 0.23850460350513458, "learning_rate": 7.852046512461201e-07, "log_odds_chosen": 1.6394765377044678, "log_odds_ratio": -0.1856173276901245, "logits/chosen": -0.7022053003311157, "logits/rejected": -1.299256682395935, "logps/chosen": -1.4076240062713623, "logps/rejected": -2.82450795173645, "loss": 1.4299, "nll_loss": 1.4112954139709473, "rewards/accuracies": 1.0, "rewards/chosen": -0.14076240360736847, "rewards/margins": 0.14168839156627655, "rewards/rejected": -0.282450795173645, "step": 937 }, { "epoch": 1.4775413711583925, "grad_norm": 0.22538453340530396, "learning_rate": 7.808423604985843e-07, "log_odds_chosen": 1.6555981636047363, "log_odds_ratio": -0.19030633568763733, "logits/chosen": -0.5821723937988281, "logits/rejected": -1.2832187414169312, "logps/chosen": -1.4027788639068604, "logps/rejected": -2.835968017578125, "loss": 1.4519, "nll_loss": 1.4328638315200806, "rewards/accuracies": 1.0, "rewards/chosen": -0.1402778923511505, "rewards/margins": 0.14331893622875214, "rewards/rejected": -0.28359681367874146, "step": 938 }, { "epoch": 1.479117415287628, "grad_norm": 0.2290097177028656, "learning_rate": 7.764892799047005e-07, "log_odds_chosen": 1.7153334617614746, "log_odds_ratio": -0.17847242951393127, "logits/chosen": -0.650507390499115, "logits/rejected": -1.4185820817947388, "logps/chosen": -1.4453359842300415, "logps/rejected": -2.946399688720703, "loss": 1.4666, "nll_loss": 1.448720932006836, "rewards/accuracies": 1.0, "rewards/chosen": -0.14453358948230743, "rewards/margins": 0.15010638535022736, "rewards/rejected": -0.2946400046348572, "step": 939 }, { "epoch": 1.4806934594168637, "grad_norm": 0.22584521770477295, "learning_rate": 7.721454423499143e-07, "log_odds_chosen": 1.6934208869934082, "log_odds_ratio": -0.18340080976486206, "logits/chosen": -0.7101742625236511, "logits/rejected": -1.4766960144042969, "logps/chosen": -1.4228019714355469, "logps/rejected": -2.895491600036621, "loss": 1.4454, "nll_loss": 1.4270652532577515, "rewards/accuracies": 1.0, "rewards/chosen": -0.1422802060842514, "rewards/margins": 0.14726899564266205, "rewards/rejected": -0.28954920172691345, "step": 940 }, { "epoch": 1.4822695035460993, "grad_norm": 0.3073555827140808, "learning_rate": 7.678108806498441e-07, "log_odds_chosen": 1.712252140045166, "log_odds_ratio": -0.18345150351524353, "logits/chosen": -0.638253927230835, "logits/rejected": -1.2792203426361084, "logps/chosen": -1.3969178199768066, "logps/rejected": -2.8891093730926514, "loss": 1.4293, "nll_loss": 1.4109400510787964, "rewards/accuracies": 1.0, "rewards/chosen": -0.1396917998790741, "rewards/margins": 0.14921915531158447, "rewards/rejected": -0.2889109253883362, "step": 941 }, { "epoch": 1.483845547675335, "grad_norm": 0.2764735817909241, "learning_rate": 7.634856275500315e-07, "log_odds_chosen": 1.325348973274231, "log_odds_ratio": -0.24945427477359772, "logits/chosen": -0.6737896203994751, "logits/rejected": -1.1839401721954346, "logps/chosen": -1.536679744720459, "logps/rejected": -2.6940834522247314, "loss": 1.5576, "nll_loss": 1.5326553583145142, "rewards/accuracies": 1.0, "rewards/chosen": -0.15366797149181366, "rewards/margins": 0.11574037373065948, "rewards/rejected": -0.26940834522247314, "step": 942 }, { "epoch": 1.4854215918045706, "grad_norm": 0.21751107275485992, "learning_rate": 7.591697157256991e-07, "log_odds_chosen": 1.6585466861724854, "log_odds_ratio": -0.1918494701385498, "logits/chosen": -0.6525353789329529, "logits/rejected": -1.3311265707015991, "logps/chosen": -1.3682565689086914, "logps/rejected": -2.7965948581695557, "loss": 1.3931, "nll_loss": 1.3738960027694702, "rewards/accuracies": 1.0, "rewards/chosen": -0.13682566583156586, "rewards/margins": 0.14283384382724762, "rewards/rejected": -0.2796595096588135, "step": 943 }, { "epoch": 1.4869976359338062, "grad_norm": 0.26778918504714966, "learning_rate": 7.548631777814995e-07, "log_odds_chosen": 1.7599365711212158, "log_odds_ratio": -0.17856289446353912, "logits/chosen": -0.6652897596359253, "logits/rejected": -1.3054307699203491, "logps/chosen": -1.4152657985687256, "logps/rejected": -2.9564874172210693, "loss": 1.444, "nll_loss": 1.4261281490325928, "rewards/accuracies": 1.0, "rewards/chosen": -0.14152657985687256, "rewards/margins": 0.15412217378616333, "rewards/rejected": -0.2956487536430359, "step": 944 }, { "epoch": 1.4885736800630418, "grad_norm": 0.23051071166992188, "learning_rate": 7.505660462512689e-07, "log_odds_chosen": 1.45986008644104, "log_odds_ratio": -0.21791455149650574, "logits/chosen": -0.7278865575790405, "logits/rejected": -1.1881009340286255, "logps/chosen": -1.3439713716506958, "logps/rejected": -2.576228141784668, "loss": 1.3872, "nll_loss": 1.365444302558899, "rewards/accuracies": 1.0, "rewards/chosen": -0.13439713418483734, "rewards/margins": 0.12322570383548737, "rewards/rejected": -0.2576228380203247, "step": 945 }, { "epoch": 1.4901497241922774, "grad_norm": 0.24693530797958374, "learning_rate": 7.462783535977842e-07, "log_odds_chosen": 1.6751735210418701, "log_odds_ratio": -0.17621569335460663, "logits/chosen": -0.6944935917854309, "logits/rejected": -1.2936582565307617, "logps/chosen": -1.399308204650879, "logps/rejected": -2.8498661518096924, "loss": 1.4232, "nll_loss": 1.405564308166504, "rewards/accuracies": 1.0, "rewards/chosen": -0.1399308294057846, "rewards/margins": 0.14505577087402344, "rewards/rejected": -0.28498658537864685, "step": 946 }, { "epoch": 1.491725768321513, "grad_norm": 0.22004899382591248, "learning_rate": 7.420001322125156e-07, "log_odds_chosen": 1.1114012002944946, "log_odds_ratio": -0.2998209595680237, "logits/chosen": -0.7848727703094482, "logits/rejected": -1.1558805704116821, "logps/chosen": -1.3824275732040405, "logps/rejected": -2.318377733230591, "loss": 1.416, "nll_loss": 1.3860433101654053, "rewards/accuracies": 1.0, "rewards/chosen": -0.13824275135993958, "rewards/margins": 0.09359502792358398, "rewards/rejected": -0.23183779418468475, "step": 947 }, { "epoch": 1.4933018124507487, "grad_norm": 0.24083304405212402, "learning_rate": 7.377314144153814e-07, "log_odds_chosen": 1.2946269512176514, "log_odds_ratio": -0.24751748144626617, "logits/chosen": -0.7243244647979736, "logits/rejected": -1.3368916511535645, "logps/chosen": -1.4547210931777954, "logps/rejected": -2.5638506412506104, "loss": 1.4864, "nll_loss": 1.4616656303405762, "rewards/accuracies": 1.0, "rewards/chosen": -0.14547210931777954, "rewards/margins": 0.11091295629739761, "rewards/rejected": -0.25638505816459656, "step": 948 }, { "epoch": 1.4948778565799843, "grad_norm": 0.238874152302742, "learning_rate": 7.334722324545064e-07, "log_odds_chosen": 1.6375446319580078, "log_odds_ratio": -0.18435253202915192, "logits/chosen": -0.6372507810592651, "logits/rejected": -1.5748916864395142, "logps/chosen": -1.4091687202453613, "logps/rejected": -2.826665163040161, "loss": 1.4161, "nll_loss": 1.3977106809616089, "rewards/accuracies": 1.0, "rewards/chosen": -0.1409168839454651, "rewards/margins": 0.14174962043762207, "rewards/rejected": -0.28266650438308716, "step": 949 }, { "epoch": 1.49645390070922, "grad_norm": 0.23900504410266876, "learning_rate": 7.292226185059756e-07, "log_odds_chosen": 1.498952865600586, "log_odds_ratio": -0.21228618919849396, "logits/chosen": -0.6418887376785278, "logits/rejected": -1.1928619146347046, "logps/chosen": -1.4736647605895996, "logps/rejected": -2.7802810668945312, "loss": 1.5064, "nll_loss": 1.4851421117782593, "rewards/accuracies": 1.0, "rewards/chosen": -0.1473664790391922, "rewards/margins": 0.13066160678863525, "rewards/rejected": -0.27802810072898865, "step": 950 }, { "epoch": 1.4980299448384555, "grad_norm": 0.21942299604415894, "learning_rate": 7.249826046735927e-07, "log_odds_chosen": 1.5628658533096313, "log_odds_ratio": -0.19864961504936218, "logits/chosen": -0.6987692713737488, "logits/rejected": -1.3551541566848755, "logps/chosen": -1.4094568490982056, "logps/rejected": -2.7603650093078613, "loss": 1.4328, "nll_loss": 1.4128936529159546, "rewards/accuracies": 1.0, "rewards/chosen": -0.1409456729888916, "rewards/margins": 0.13509082794189453, "rewards/rejected": -0.27603650093078613, "step": 951 }, { "epoch": 1.4996059889676912, "grad_norm": 0.27445751428604126, "learning_rate": 7.207522229886379e-07, "log_odds_chosen": 1.6240615844726562, "log_odds_ratio": -0.19309791922569275, "logits/chosen": -0.7298609614372253, "logits/rejected": -1.1425058841705322, "logps/chosen": -1.3362138271331787, "logps/rejected": -2.7210559844970703, "loss": 1.3671, "nll_loss": 1.347779393196106, "rewards/accuracies": 1.0, "rewards/chosen": -0.13362140953540802, "rewards/margins": 0.13848420977592468, "rewards/rejected": -0.2721056044101715, "step": 952 }, { "epoch": 1.5011820330969265, "grad_norm": 0.255359023809433, "learning_rate": 7.165315054096228e-07, "log_odds_chosen": 1.5372127294540405, "log_odds_ratio": -0.2110520303249359, "logits/chosen": -0.6947832703590393, "logits/rejected": -1.3758889436721802, "logps/chosen": -1.4896736145019531, "logps/rejected": -2.8352179527282715, "loss": 1.4983, "nll_loss": 1.4772429466247559, "rewards/accuracies": 1.0, "rewards/chosen": -0.14896735548973083, "rewards/margins": 0.1345544308423996, "rewards/rejected": -0.28352177143096924, "step": 953 }, { "epoch": 1.5027580772261624, "grad_norm": 0.21928419172763824, "learning_rate": 7.123204838220533e-07, "log_odds_chosen": 1.779129981994629, "log_odds_ratio": -0.17474862933158875, "logits/chosen": -0.7586661577224731, "logits/rejected": -1.494895339012146, "logps/chosen": -1.3686449527740479, "logps/rejected": -2.9177207946777344, "loss": 1.3917, "nll_loss": 1.3742096424102783, "rewards/accuracies": 1.0, "rewards/chosen": -0.13686451315879822, "rewards/margins": 0.15490756928920746, "rewards/rejected": -0.2917720675468445, "step": 954 }, { "epoch": 1.5043341213553978, "grad_norm": 0.24408267438411713, "learning_rate": 7.081191900381862e-07, "log_odds_chosen": 1.634125828742981, "log_odds_ratio": -0.18857534229755402, "logits/chosen": -0.7132138013839722, "logits/rejected": -1.425289273262024, "logps/chosen": -1.4829065799713135, "logps/rejected": -2.9163081645965576, "loss": 1.5057, "nll_loss": 1.4868156909942627, "rewards/accuracies": 1.0, "rewards/chosen": -0.14829064905643463, "rewards/margins": 0.14334018528461456, "rewards/rejected": -0.2916308641433716, "step": 955 }, { "epoch": 1.5059101654846336, "grad_norm": 0.22805668413639069, "learning_rate": 7.039276557967895e-07, "log_odds_chosen": 1.9169952869415283, "log_odds_ratio": -0.14808149635791779, "logits/chosen": -0.6480576992034912, "logits/rejected": -1.5390878915786743, "logps/chosen": -1.3680206537246704, "logps/rejected": -3.0362184047698975, "loss": 1.3988, "nll_loss": 1.3840351104736328, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368020623922348, "rewards/margins": 0.16681978106498718, "rewards/rejected": -0.3036218583583832, "step": 956 }, { "epoch": 1.507486209613869, "grad_norm": 0.22027228772640228, "learning_rate": 6.997459127629028e-07, "log_odds_chosen": 1.301336407661438, "log_odds_ratio": -0.25504469871520996, "logits/chosen": -0.6821280121803284, "logits/rejected": -1.3463248014450073, "logps/chosen": -1.445275068283081, "logps/rejected": -2.5613512992858887, "loss": 1.4868, "nll_loss": 1.4613120555877686, "rewards/accuracies": 1.0, "rewards/chosen": -0.14452750980854034, "rewards/margins": 0.11160758137702942, "rewards/rejected": -0.25613510608673096, "step": 957 }, { "epoch": 1.5090622537431049, "grad_norm": 0.24477079510688782, "learning_rate": 6.955739925275963e-07, "log_odds_chosen": 1.9386751651763916, "log_odds_ratio": -0.14761295914649963, "logits/chosen": -0.7345253229141235, "logits/rejected": -1.384972333908081, "logps/chosen": -1.3612054586410522, "logps/rejected": -3.050525426864624, "loss": 1.3896, "nll_loss": 1.3748297691345215, "rewards/accuracies": 1.0, "rewards/chosen": -0.13612055778503418, "rewards/margins": 0.1689319759607315, "rewards/rejected": -0.3050525188446045, "step": 958 }, { "epoch": 1.5106382978723403, "grad_norm": 0.25155043601989746, "learning_rate": 6.914119266077354e-07, "log_odds_chosen": 1.709695816040039, "log_odds_ratio": -0.16907371580600739, "logits/chosen": -0.6001753807067871, "logits/rejected": -1.4711008071899414, "logps/chosen": -1.492048740386963, "logps/rejected": -2.9978222846984863, "loss": 1.5165, "nll_loss": 1.4996267557144165, "rewards/accuracies": 1.0, "rewards/chosen": -0.14920487999916077, "rewards/margins": 0.1505773663520813, "rewards/rejected": -0.29978224635124207, "step": 959 }, { "epoch": 1.5122143420015761, "grad_norm": 0.25593528151512146, "learning_rate": 6.872597464457397e-07, "log_odds_chosen": 1.4520260095596313, "log_odds_ratio": -0.22392773628234863, "logits/chosen": -0.6551870107650757, "logits/rejected": -1.2438572645187378, "logps/chosen": -1.37356436252594, "logps/rejected": -2.6101818084716797, "loss": 1.3931, "nll_loss": 1.370749592781067, "rewards/accuracies": 1.0, "rewards/chosen": -0.1373564451932907, "rewards/margins": 0.12366174906492233, "rewards/rejected": -0.26101818680763245, "step": 960 }, { "epoch": 1.5137903861308115, "grad_norm": 0.24914170801639557, "learning_rate": 6.831174834093476e-07, "log_odds_chosen": 1.5355050563812256, "log_odds_ratio": -0.20510242879390717, "logits/chosen": -0.7082363367080688, "logits/rejected": -1.3170217275619507, "logps/chosen": -1.3940333127975464, "logps/rejected": -2.7147819995880127, "loss": 1.414, "nll_loss": 1.3935034275054932, "rewards/accuracies": 1.0, "rewards/chosen": -0.1394033282995224, "rewards/margins": 0.13207487761974335, "rewards/rejected": -0.27147820591926575, "step": 961 }, { "epoch": 1.5153664302600474, "grad_norm": 0.24422487616539001, "learning_rate": 6.789851687913784e-07, "log_odds_chosen": 1.3011542558670044, "log_odds_ratio": -0.26340824365615845, "logits/chosen": -0.739920437335968, "logits/rejected": -1.292670726776123, "logps/chosen": -1.3479275703430176, "logps/rejected": -2.4493749141693115, "loss": 1.3789, "nll_loss": 1.3525618314743042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1347927749156952, "rewards/margins": 0.11014476418495178, "rewards/rejected": -0.24493753910064697, "step": 962 }, { "epoch": 1.5169424743892828, "grad_norm": 0.23131269216537476, "learning_rate": 6.748628338094937e-07, "log_odds_chosen": 1.7110823392868042, "log_odds_ratio": -0.1749618947505951, "logits/chosen": -0.6548817753791809, "logits/rejected": -1.267356514930725, "logps/chosen": -1.3604304790496826, "logps/rejected": -2.8328256607055664, "loss": 1.388, "nll_loss": 1.3705523014068604, "rewards/accuracies": 1.0, "rewards/chosen": -0.1360430270433426, "rewards/margins": 0.14723952114582062, "rewards/rejected": -0.2832825779914856, "step": 963 }, { "epoch": 1.5185185185185186, "grad_norm": 0.21920260787010193, "learning_rate": 6.707505096059662e-07, "log_odds_chosen": 1.701429843902588, "log_odds_ratio": -0.17971427738666534, "logits/chosen": -0.6781407594680786, "logits/rejected": -1.3067864179611206, "logps/chosen": -1.3690398931503296, "logps/rejected": -2.8425722122192383, "loss": 1.4046, "nll_loss": 1.3865872621536255, "rewards/accuracies": 1.0, "rewards/chosen": -0.13690398633480072, "rewards/margins": 0.14735324680805206, "rewards/rejected": -0.28425726294517517, "step": 964 }, { "epoch": 1.520094562647754, "grad_norm": 0.23546142876148224, "learning_rate": 6.666482272474412e-07, "log_odds_chosen": 1.6285417079925537, "log_odds_ratio": -0.18455177545547485, "logits/chosen": -0.6912217140197754, "logits/rejected": -1.2969645261764526, "logps/chosen": -1.4208413362503052, "logps/rejected": -2.8340508937835693, "loss": 1.4441, "nll_loss": 1.425673484802246, "rewards/accuracies": 1.0, "rewards/chosen": -0.14208413660526276, "rewards/margins": 0.14132097363471985, "rewards/rejected": -0.2834050953388214, "step": 965 }, { "epoch": 1.5216706067769898, "grad_norm": 0.24440234899520874, "learning_rate": 6.625560177247023e-07, "log_odds_chosen": 1.6036605834960938, "log_odds_ratio": -0.18962253630161285, "logits/chosen": -0.6972826719284058, "logits/rejected": -1.363321304321289, "logps/chosen": -1.5166881084442139, "logps/rejected": -2.922785520553589, "loss": 1.5307, "nll_loss": 1.5117499828338623, "rewards/accuracies": 1.0, "rewards/chosen": -0.15166881680488586, "rewards/margins": 0.1406097710132599, "rewards/rejected": -0.29227858781814575, "step": 966 }, { "epoch": 1.5232466509062252, "grad_norm": 0.2762506902217865, "learning_rate": 6.584739119524383e-07, "log_odds_chosen": 1.7608506679534912, "log_odds_ratio": -0.17605550587177277, "logits/chosen": -0.6674488186836243, "logits/rejected": -1.3793137073516846, "logps/chosen": -1.3838387727737427, "logps/rejected": -2.9154858589172363, "loss": 1.3923, "nll_loss": 1.3746768236160278, "rewards/accuracies": 1.0, "rewards/chosen": -0.1383838802576065, "rewards/margins": 0.15316472947597504, "rewards/rejected": -0.29154860973358154, "step": 967 }, { "epoch": 1.524822695035461, "grad_norm": 0.23789601027965546, "learning_rate": 6.544019407690077e-07, "log_odds_chosen": 1.442301630973816, "log_odds_ratio": -0.22503390908241272, "logits/chosen": -0.6969836950302124, "logits/rejected": -1.2053622007369995, "logps/chosen": -1.4793297052383423, "logps/rejected": -2.7343239784240723, "loss": 1.4936, "nll_loss": 1.4711220264434814, "rewards/accuracies": 1.0, "rewards/chosen": -0.1479329615831375, "rewards/margins": 0.125499427318573, "rewards/rejected": -0.2734324038028717, "step": 968 }, { "epoch": 1.5263987391646965, "grad_norm": 0.24980661273002625, "learning_rate": 6.503401349362083e-07, "log_odds_chosen": 1.525141954421997, "log_odds_ratio": -0.20600546896457672, "logits/chosen": -0.8025378584861755, "logits/rejected": -1.4259079694747925, "logps/chosen": -1.5078123807907104, "logps/rejected": -2.8429291248321533, "loss": 1.5256, "nll_loss": 1.5050173997879028, "rewards/accuracies": 1.0, "rewards/chosen": -0.15078124403953552, "rewards/margins": 0.13351167738437653, "rewards/rejected": -0.28429290652275085, "step": 969 }, { "epoch": 1.5279747832939323, "grad_norm": 0.24054929614067078, "learning_rate": 6.462885251390433e-07, "log_odds_chosen": 1.5378131866455078, "log_odds_ratio": -0.2006787359714508, "logits/chosen": -0.7657828330993652, "logits/rejected": -1.2990119457244873, "logps/chosen": -1.4990078210830688, "logps/rejected": -2.843885898590088, "loss": 1.5036, "nll_loss": 1.483548641204834, "rewards/accuracies": 1.0, "rewards/chosen": -0.14990077912807465, "rewards/margins": 0.1344878226518631, "rewards/rejected": -0.28438860177993774, "step": 970 }, { "epoch": 1.5295508274231677, "grad_norm": 0.2183712124824524, "learning_rate": 6.422471419854898e-07, "log_odds_chosen": 1.6890003681182861, "log_odds_ratio": -0.17618075013160706, "logits/chosen": -0.6801695823669434, "logits/rejected": -1.3422974348068237, "logps/chosen": -1.3283157348632812, "logps/rejected": -2.7713510990142822, "loss": 1.3561, "nll_loss": 1.3385310173034668, "rewards/accuracies": 1.0, "rewards/chosen": -0.13283158838748932, "rewards/margins": 0.14430353045463562, "rewards/rejected": -0.27713513374328613, "step": 971 }, { "epoch": 1.5311268715524036, "grad_norm": 0.2736135721206665, "learning_rate": 6.382160160062662e-07, "log_odds_chosen": 1.9198710918426514, "log_odds_ratio": -0.15545989573001862, "logits/chosen": -0.6708865761756897, "logits/rejected": -1.3491849899291992, "logps/chosen": -1.4164518117904663, "logps/rejected": -3.10852313041687, "loss": 1.4405, "nll_loss": 1.4249604940414429, "rewards/accuracies": 1.0, "rewards/chosen": -0.1416451781988144, "rewards/margins": 0.1692071259021759, "rewards/rejected": -0.3108523190021515, "step": 972 }, { "epoch": 1.532702915681639, "grad_norm": 0.24191005527973175, "learning_rate": 6.341951776546044e-07, "log_odds_chosen": 1.5389604568481445, "log_odds_ratio": -0.2046566754579544, "logits/chosen": -0.631176769733429, "logits/rejected": -1.3082035779953003, "logps/chosen": -1.423797607421875, "logps/rejected": -2.754075050354004, "loss": 1.4447, "nll_loss": 1.4241943359375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1423797607421875, "rewards/margins": 0.13302774727344513, "rewards/rejected": -0.2754075229167938, "step": 973 }, { "epoch": 1.5342789598108748, "grad_norm": 0.24173320829868317, "learning_rate": 6.301846573060177e-07, "log_odds_chosen": 1.3617725372314453, "log_odds_ratio": -0.23609870672225952, "logits/chosen": -0.7418637275695801, "logits/rejected": -1.226946473121643, "logps/chosen": -1.377112865447998, "logps/rejected": -2.5337042808532715, "loss": 1.4131, "nll_loss": 1.3894826173782349, "rewards/accuracies": 1.0, "rewards/chosen": -0.137711301445961, "rewards/margins": 0.11565914005041122, "rewards/rejected": -0.253370463848114, "step": 974 }, { "epoch": 1.5358550039401102, "grad_norm": 0.24520975351333618, "learning_rate": 6.261844852580722e-07, "log_odds_chosen": 1.621200680732727, "log_odds_ratio": -0.20305833220481873, "logits/chosen": -0.7346788048744202, "logits/rejected": -1.091653823852539, "logps/chosen": -1.328252911567688, "logps/rejected": -2.720503568649292, "loss": 1.3618, "nll_loss": 1.341509222984314, "rewards/accuracies": 1.0, "rewards/chosen": -0.13282527029514313, "rewards/margins": 0.1392250955104828, "rewards/rejected": -0.2720503509044647, "step": 975 }, { "epoch": 1.537431048069346, "grad_norm": 0.22858092188835144, "learning_rate": 6.221946917301561e-07, "log_odds_chosen": 1.6802921295166016, "log_odds_ratio": -0.18564584851264954, "logits/chosen": -0.6427308320999146, "logits/rejected": -1.3663489818572998, "logps/chosen": -1.4192376136779785, "logps/rejected": -2.880953788757324, "loss": 1.4444, "nll_loss": 1.425816535949707, "rewards/accuracies": 1.0, "rewards/chosen": -0.14192375540733337, "rewards/margins": 0.14617159962654114, "rewards/rejected": -0.2880953848361969, "step": 976 }, { "epoch": 1.5390070921985815, "grad_norm": 0.24284838140010834, "learning_rate": 6.182153068632545e-07, "log_odds_chosen": 1.6177151203155518, "log_odds_ratio": -0.195210263133049, "logits/chosen": -0.667177677154541, "logits/rejected": -1.3496806621551514, "logps/chosen": -1.4372763633728027, "logps/rejected": -2.8486926555633545, "loss": 1.4737, "nll_loss": 1.4541935920715332, "rewards/accuracies": 1.0, "rewards/chosen": -0.14372766017913818, "rewards/margins": 0.1411416232585907, "rewards/rejected": -0.2848692536354065, "step": 977 }, { "epoch": 1.5405831363278173, "grad_norm": 0.2526456415653229, "learning_rate": 6.142463607197197e-07, "log_odds_chosen": 1.6186964511871338, "log_odds_ratio": -0.19424474239349365, "logits/chosen": -0.749358594417572, "logits/rejected": -1.4463564157485962, "logps/chosen": -1.445967197418213, "logps/rejected": -2.8594303131103516, "loss": 1.4548, "nll_loss": 1.435330867767334, "rewards/accuracies": 1.0, "rewards/chosen": -0.14459671080112457, "rewards/margins": 0.14134635031223297, "rewards/rejected": -0.28594306111335754, "step": 978 }, { "epoch": 1.5421591804570527, "grad_norm": 0.22550563514232635, "learning_rate": 6.102878832830431e-07, "log_odds_chosen": 1.5682380199432373, "log_odds_ratio": -0.19513316452503204, "logits/chosen": -0.7531631588935852, "logits/rejected": -1.506245732307434, "logps/chosen": -1.4470077753067017, "logps/rejected": -2.811253070831299, "loss": 1.4761, "nll_loss": 1.45656156539917, "rewards/accuracies": 1.0, "rewards/chosen": -0.1447007656097412, "rewards/margins": 0.13642454147338867, "rewards/rejected": -0.2811253070831299, "step": 979 }, { "epoch": 1.5437352245862885, "grad_norm": 0.23052246868610382, "learning_rate": 6.063399044576316e-07, "log_odds_chosen": 1.6534953117370605, "log_odds_ratio": -0.18321163952350616, "logits/chosen": -0.7004735469818115, "logits/rejected": -1.3356420993804932, "logps/chosen": -1.3877995014190674, "logps/rejected": -2.8147621154785156, "loss": 1.4156, "nll_loss": 1.3972423076629639, "rewards/accuracies": 1.0, "rewards/chosen": -0.13877993822097778, "rewards/margins": 0.14269626140594482, "rewards/rejected": -0.2814761996269226, "step": 980 }, { "epoch": 1.545311268715524, "grad_norm": 0.2577225863933563, "learning_rate": 6.024024540685802e-07, "log_odds_chosen": 1.6303168535232544, "log_odds_ratio": -0.1967567354440689, "logits/chosen": -0.6427086591720581, "logits/rejected": -1.2005834579467773, "logps/chosen": -1.3928618431091309, "logps/rejected": -2.8020970821380615, "loss": 1.4237, "nll_loss": 1.4040113687515259, "rewards/accuracies": 1.0, "rewards/chosen": -0.13928619027137756, "rewards/margins": 0.14092348515987396, "rewards/rejected": -0.2802096903324127, "step": 981 }, { "epoch": 1.5468873128447598, "grad_norm": 0.24495375156402588, "learning_rate": 5.984755618614443e-07, "log_odds_chosen": 1.52199125289917, "log_odds_ratio": -0.2077389657497406, "logits/chosen": -0.774683952331543, "logits/rejected": -1.4239087104797363, "logps/chosen": -1.4303879737854004, "logps/rejected": -2.745952606201172, "loss": 1.4536, "nll_loss": 1.4328104257583618, "rewards/accuracies": 1.0, "rewards/chosen": -0.1430387794971466, "rewards/margins": 0.13155648112297058, "rewards/rejected": -0.2745952606201172, "step": 982 }, { "epoch": 1.5484633569739952, "grad_norm": 0.22301128506660461, "learning_rate": 5.945592575020199e-07, "log_odds_chosen": 1.5836595296859741, "log_odds_ratio": -0.20468348264694214, "logits/chosen": -0.5643726587295532, "logits/rejected": -1.2470513582229614, "logps/chosen": -1.4009495973587036, "logps/rejected": -2.7709860801696777, "loss": 1.4364, "nll_loss": 1.4159749746322632, "rewards/accuracies": 1.0, "rewards/chosen": -0.14009495079517365, "rewards/margins": 0.13700367510318756, "rewards/rejected": -0.2770986258983612, "step": 983 }, { "epoch": 1.550039401103231, "grad_norm": 0.27008649706840515, "learning_rate": 5.90653570576116e-07, "log_odds_chosen": 1.9642605781555176, "log_odds_ratio": -0.14685845375061035, "logits/chosen": -0.7992998361587524, "logits/rejected": -1.4197301864624023, "logps/chosen": -1.4529603719711304, "logps/rejected": -3.197136878967285, "loss": 1.468, "nll_loss": 1.4533517360687256, "rewards/accuracies": 1.0, "rewards/chosen": -0.14529602229595184, "rewards/margins": 0.1744176596403122, "rewards/rejected": -0.31971368193626404, "step": 984 }, { "epoch": 1.5516154452324664, "grad_norm": 0.2333638221025467, "learning_rate": 5.867585305893315e-07, "log_odds_chosen": 1.5673391819000244, "log_odds_ratio": -0.20247569680213928, "logits/chosen": -0.7529939413070679, "logits/rejected": -1.2255744934082031, "logps/chosen": -1.3691632747650146, "logps/rejected": -2.716348648071289, "loss": 1.4075, "nll_loss": 1.3872790336608887, "rewards/accuracies": 1.0, "rewards/chosen": -0.13691633939743042, "rewards/margins": 0.13471853733062744, "rewards/rejected": -0.27163487672805786, "step": 985 }, { "epoch": 1.5531914893617023, "grad_norm": 0.225324347615242, "learning_rate": 5.828741669668337e-07, "log_odds_chosen": 1.871507167816162, "log_odds_ratio": -0.14700734615325928, "logits/chosen": -0.6826817989349365, "logits/rejected": -1.4189668893814087, "logps/chosen": -1.424090027809143, "logps/rejected": -3.067885637283325, "loss": 1.4355, "nll_loss": 1.4208197593688965, "rewards/accuracies": 1.0, "rewards/chosen": -0.14240899682044983, "rewards/margins": 0.1643795520067215, "rewards/rejected": -0.3067885637283325, "step": 986 }, { "epoch": 1.5547675334909377, "grad_norm": 0.22026561200618744, "learning_rate": 5.790005090531333e-07, "log_odds_chosen": 1.8329660892486572, "log_odds_ratio": -0.16997922956943512, "logits/chosen": -0.7399470210075378, "logits/rejected": -1.3965716361999512, "logps/chosen": -1.4371373653411865, "logps/rejected": -3.0477631092071533, "loss": 1.4553, "nll_loss": 1.438310980796814, "rewards/accuracies": 1.0, "rewards/chosen": -0.14371372759342194, "rewards/margins": 0.1610625833272934, "rewards/rejected": -0.30477631092071533, "step": 987 }, { "epoch": 1.5563435776201735, "grad_norm": 0.2416786402463913, "learning_rate": 5.751375861118666e-07, "log_odds_chosen": 1.7833058834075928, "log_odds_ratio": -0.185949444770813, "logits/chosen": -0.7473629117012024, "logits/rejected": -1.3171467781066895, "logps/chosen": -1.4520373344421387, "logps/rejected": -3.0279481410980225, "loss": 1.4579, "nll_loss": 1.4392765760421753, "rewards/accuracies": 1.0, "rewards/chosen": -0.14520373940467834, "rewards/margins": 0.1575910747051239, "rewards/rejected": -0.30279481410980225, "step": 988 }, { "epoch": 1.557919621749409, "grad_norm": 0.24890170991420746, "learning_rate": 5.712854273255707e-07, "log_odds_chosen": 1.5047317743301392, "log_odds_ratio": -0.2076880782842636, "logits/chosen": -0.6026687026023865, "logits/rejected": -1.4133058786392212, "logps/chosen": -1.43433678150177, "logps/rejected": -2.7359883785247803, "loss": 1.4727, "nll_loss": 1.4518938064575195, "rewards/accuracies": 1.0, "rewards/chosen": -0.14343367516994476, "rewards/margins": 0.13016517460346222, "rewards/rejected": -0.273598849773407, "step": 989 }, { "epoch": 1.5594956658786447, "grad_norm": 0.2281206101179123, "learning_rate": 5.674440617954659e-07, "log_odds_chosen": 1.6535592079162598, "log_odds_ratio": -0.18544477224349976, "logits/chosen": -0.7395283579826355, "logits/rejected": -1.2622352838516235, "logps/chosen": -1.3390345573425293, "logps/rejected": -2.7547459602355957, "loss": 1.3597, "nll_loss": 1.3411731719970703, "rewards/accuracies": 1.0, "rewards/chosen": -0.13390345871448517, "rewards/margins": 0.14157113432884216, "rewards/rejected": -0.2754746079444885, "step": 990 }, { "epoch": 1.5610717100078801, "grad_norm": 0.23246391117572784, "learning_rate": 5.63613518541234e-07, "log_odds_chosen": 1.6276425123214722, "log_odds_ratio": -0.20659905672073364, "logits/chosen": -0.6540700197219849, "logits/rejected": -1.309479832649231, "logps/chosen": -1.3813396692276, "logps/rejected": -2.791038751602173, "loss": 1.4206, "nll_loss": 1.3999210596084595, "rewards/accuracies": 1.0, "rewards/chosen": -0.1381339579820633, "rewards/margins": 0.140969917178154, "rewards/rejected": -0.2791039049625397, "step": 991 }, { "epoch": 1.562647754137116, "grad_norm": 0.23927409946918488, "learning_rate": 5.597938265007993e-07, "log_odds_chosen": 1.7180593013763428, "log_odds_ratio": -0.16913798451423645, "logits/chosen": -0.7266747951507568, "logits/rejected": -1.4240138530731201, "logps/chosen": -1.3688453435897827, "logps/rejected": -2.8487486839294434, "loss": 1.391, "nll_loss": 1.3741101026535034, "rewards/accuracies": 1.0, "rewards/chosen": -0.13688454031944275, "rewards/margins": 0.14799034595489502, "rewards/rejected": -0.28487491607666016, "step": 992 }, { "epoch": 1.5642237982663514, "grad_norm": 0.43743252754211426, "learning_rate": 5.559850145301106e-07, "log_odds_chosen": 1.4167253971099854, "log_odds_ratio": -0.22718274593353271, "logits/chosen": -0.6696202754974365, "logits/rejected": -1.1853585243225098, "logps/chosen": -1.4132310152053833, "logps/rejected": -2.6275668144226074, "loss": 1.4477, "nll_loss": 1.424946665763855, "rewards/accuracies": 1.0, "rewards/chosen": -0.14132308959960938, "rewards/margins": 0.1214335709810257, "rewards/rejected": -0.26275667548179626, "step": 993 }, { "epoch": 1.5657998423955872, "grad_norm": 0.23367926478385925, "learning_rate": 5.521871114029233e-07, "log_odds_chosen": 1.6394641399383545, "log_odds_ratio": -0.18021884560585022, "logits/chosen": -0.724879264831543, "logits/rejected": -1.3613380193710327, "logps/chosen": -1.3319449424743652, "logps/rejected": -2.732529401779175, "loss": 1.3756, "nll_loss": 1.3575654029846191, "rewards/accuracies": 1.0, "rewards/chosen": -0.13319450616836548, "rewards/margins": 0.1400584578514099, "rewards/rejected": -0.273252934217453, "step": 994 }, { "epoch": 1.5673758865248226, "grad_norm": 0.2905685305595398, "learning_rate": 5.484001458105823e-07, "log_odds_chosen": 1.4699556827545166, "log_odds_ratio": -0.2240980863571167, "logits/chosen": -0.7927016019821167, "logits/rejected": -1.1190454959869385, "logps/chosen": -1.4154982566833496, "logps/rejected": -2.680901050567627, "loss": 1.4401, "nll_loss": 1.41773521900177, "rewards/accuracies": 1.0, "rewards/chosen": -0.14154982566833496, "rewards/margins": 0.12654025852680206, "rewards/rejected": -0.2680900990962982, "step": 995 }, { "epoch": 1.5689519306540585, "grad_norm": 0.26202821731567383, "learning_rate": 5.446241463618027e-07, "log_odds_chosen": 2.0487170219421387, "log_odds_ratio": -0.18200302124023438, "logits/chosen": -0.7314298748970032, "logits/rejected": -1.2905378341674805, "logps/chosen": -1.435488224029541, "logps/rejected": -3.2668802738189697, "loss": 1.4596, "nll_loss": 1.4414234161376953, "rewards/accuracies": 1.0, "rewards/chosen": -0.14354883134365082, "rewards/margins": 0.18313920497894287, "rewards/rejected": -0.3266880512237549, "step": 996 }, { "epoch": 1.5705279747832939, "grad_norm": 0.23933438956737518, "learning_rate": 5.40859141582457e-07, "log_odds_chosen": 1.8784371614456177, "log_odds_ratio": -0.14710834622383118, "logits/chosen": -0.7720879316329956, "logits/rejected": -1.3841354846954346, "logps/chosen": -1.3001539707183838, "logps/rejected": -2.914290189743042, "loss": 1.3235, "nll_loss": 1.3088268041610718, "rewards/accuracies": 1.0, "rewards/chosen": -0.13001540303230286, "rewards/margins": 0.16141363978385925, "rewards/rejected": -0.2914290428161621, "step": 997 }, { "epoch": 1.5721040189125297, "grad_norm": 0.2629656195640564, "learning_rate": 5.371051599153582e-07, "log_odds_chosen": 1.3569281101226807, "log_odds_ratio": -0.2578011155128479, "logits/chosen": -0.6768915057182312, "logits/rejected": -1.0179378986358643, "logps/chosen": -1.3634341955184937, "logps/rejected": -2.512542963027954, "loss": 1.3996, "nll_loss": 1.3738161325454712, "rewards/accuracies": 1.0, "rewards/chosen": -0.13634341955184937, "rewards/margins": 0.11491090059280396, "rewards/rejected": -0.25125429034233093, "step": 998 }, { "epoch": 1.573680063041765, "grad_norm": 0.24958176910877228, "learning_rate": 5.333622297200449e-07, "log_odds_chosen": 1.4999337196350098, "log_odds_ratio": -0.20396636426448822, "logits/chosen": -0.7094676494598389, "logits/rejected": -1.2609398365020752, "logps/chosen": -1.3577684164047241, "logps/rejected": -2.626173973083496, "loss": 1.4012, "nll_loss": 1.3808488845825195, "rewards/accuracies": 1.0, "rewards/chosen": -0.1357768476009369, "rewards/margins": 0.12684056162834167, "rewards/rejected": -0.26261740922927856, "step": 999 }, { "epoch": 1.575256107171001, "grad_norm": 0.21548432111740112, "learning_rate": 5.296303792725676e-07, "log_odds_chosen": 1.7665679454803467, "log_odds_ratio": -0.16213518381118774, "logits/chosen": -0.7415461540222168, "logits/rejected": -1.456018090248108, "logps/chosen": -1.4721094369888306, "logps/rejected": -3.0248775482177734, "loss": 1.4834, "nll_loss": 1.4671509265899658, "rewards/accuracies": 1.0, "rewards/chosen": -0.1472109705209732, "rewards/margins": 0.1552768051624298, "rewards/rejected": -0.3024877607822418, "step": 1000 } ], "logging_steps": 1, "max_steps": 1270, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }